{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00029850746268656717, "grad_norm": 14.117414815113976, "learning_rate": 4.950495049504951e-08, "loss": 1.4506, "step": 1 }, { "epoch": 0.0005970149253731343, "grad_norm": 13.931060049875214, "learning_rate": 9.900990099009901e-08, "loss": 1.4497, "step": 2 }, { "epoch": 0.0008955223880597015, "grad_norm": 12.951035081832662, "learning_rate": 1.4851485148514852e-07, "loss": 1.4249, "step": 3 }, { "epoch": 0.0011940298507462687, "grad_norm": 12.506012032234118, "learning_rate": 1.9801980198019803e-07, "loss": 1.3703, "step": 4 }, { "epoch": 0.0014925373134328358, "grad_norm": 12.759791737538139, "learning_rate": 2.4752475247524754e-07, "loss": 1.3928, "step": 5 }, { "epoch": 0.001791044776119403, "grad_norm": 11.981120238868472, "learning_rate": 2.9702970297029703e-07, "loss": 1.3379, "step": 6 }, { "epoch": 0.00208955223880597, "grad_norm": 14.232070759545241, "learning_rate": 3.4653465346534657e-07, "loss": 1.3504, "step": 7 }, { "epoch": 0.0023880597014925373, "grad_norm": 14.919999725885855, "learning_rate": 3.9603960396039606e-07, "loss": 1.4683, "step": 8 }, { "epoch": 0.0026865671641791043, "grad_norm": 15.081665565944352, "learning_rate": 4.4554455445544555e-07, "loss": 1.4529, "step": 9 }, { "epoch": 0.0029850746268656717, "grad_norm": 14.1043643853956, "learning_rate": 4.950495049504951e-07, "loss": 1.4489, "step": 10 }, { "epoch": 0.0032835820895522386, "grad_norm": 13.03984594597713, "learning_rate": 5.445544554455446e-07, "loss": 1.3641, "step": 11 }, { "epoch": 0.003582089552238806, "grad_norm": 12.55643909326301, "learning_rate": 5.940594059405941e-07, "loss": 1.366, "step": 12 }, { "epoch": 0.003880597014925373, "grad_norm": 13.066793846628885, "learning_rate": 6.435643564356436e-07, "loss": 1.3347, "step": 13 }, { "epoch": 0.00417910447761194, "grad_norm": 14.232011881175255, "learning_rate": 6.930693069306931e-07, "loss": 1.3869, "step": 14 }, { "epoch": 0.004477611940298508, "grad_norm": 13.382384812080517, "learning_rate": 7.425742574257426e-07, "loss": 1.2797, "step": 15 }, { "epoch": 0.004776119402985075, "grad_norm": 14.063303937583319, "learning_rate": 7.920792079207921e-07, "loss": 1.2945, "step": 16 }, { "epoch": 0.005074626865671642, "grad_norm": 12.599956283878015, "learning_rate": 8.415841584158417e-07, "loss": 1.2541, "step": 17 }, { "epoch": 0.005373134328358209, "grad_norm": 15.35594033376082, "learning_rate": 8.910891089108911e-07, "loss": 1.4035, "step": 18 }, { "epoch": 0.005671641791044776, "grad_norm": 13.703270603112776, "learning_rate": 9.405940594059406e-07, "loss": 1.3001, "step": 19 }, { "epoch": 0.005970149253731343, "grad_norm": 11.466111679630176, "learning_rate": 9.900990099009902e-07, "loss": 1.1074, "step": 20 }, { "epoch": 0.00626865671641791, "grad_norm": 12.969087798482644, "learning_rate": 1.0396039603960397e-06, "loss": 1.1151, "step": 21 }, { "epoch": 0.006567164179104477, "grad_norm": 11.305356480464539, "learning_rate": 1.0891089108910893e-06, "loss": 1.0693, "step": 22 }, { "epoch": 0.006865671641791045, "grad_norm": 10.571941049724712, "learning_rate": 1.1386138613861388e-06, "loss": 1.0079, "step": 23 }, { "epoch": 0.007164179104477612, "grad_norm": 13.611826826791107, "learning_rate": 1.1881188118811881e-06, "loss": 1.1241, "step": 24 }, { "epoch": 0.007462686567164179, "grad_norm": 10.580271079481518, "learning_rate": 1.2376237623762377e-06, "loss": 1.0046, "step": 25 }, { "epoch": 0.007761194029850746, "grad_norm": 14.626205948755526, "learning_rate": 1.2871287128712872e-06, "loss": 1.0083, "step": 26 }, { "epoch": 0.008059701492537314, "grad_norm": 9.659001814009544, "learning_rate": 1.3366336633663367e-06, "loss": 0.8455, "step": 27 }, { "epoch": 0.00835820895522388, "grad_norm": 9.492714737777815, "learning_rate": 1.3861386138613863e-06, "loss": 0.744, "step": 28 }, { "epoch": 0.008656716417910448, "grad_norm": 9.552713565555441, "learning_rate": 1.4356435643564356e-06, "loss": 0.7097, "step": 29 }, { "epoch": 0.008955223880597015, "grad_norm": 9.399332350213616, "learning_rate": 1.4851485148514852e-06, "loss": 0.8027, "step": 30 }, { "epoch": 0.009253731343283582, "grad_norm": 10.362788296964608, "learning_rate": 1.5346534653465347e-06, "loss": 0.8065, "step": 31 }, { "epoch": 0.00955223880597015, "grad_norm": 8.883431717385413, "learning_rate": 1.5841584158415842e-06, "loss": 0.6679, "step": 32 }, { "epoch": 0.009850746268656717, "grad_norm": 6.947939015528359, "learning_rate": 1.6336633663366338e-06, "loss": 0.6528, "step": 33 }, { "epoch": 0.010149253731343283, "grad_norm": 5.181069033173571, "learning_rate": 1.6831683168316833e-06, "loss": 0.6128, "step": 34 }, { "epoch": 0.010447761194029851, "grad_norm": 2.1652397986100635, "learning_rate": 1.7326732673267326e-06, "loss": 0.6552, "step": 35 }, { "epoch": 0.010746268656716417, "grad_norm": 4.632087693327325, "learning_rate": 1.7821782178217822e-06, "loss": 0.6027, "step": 36 }, { "epoch": 0.011044776119402985, "grad_norm": 7.422754544331454, "learning_rate": 1.8316831683168317e-06, "loss": 0.59, "step": 37 }, { "epoch": 0.011343283582089553, "grad_norm": 7.399958472269416, "learning_rate": 1.8811881188118813e-06, "loss": 0.586, "step": 38 }, { "epoch": 0.011641791044776119, "grad_norm": 3.656574253442115, "learning_rate": 1.930693069306931e-06, "loss": 0.5334, "step": 39 }, { "epoch": 0.011940298507462687, "grad_norm": 2.513197596004464, "learning_rate": 1.9801980198019803e-06, "loss": 0.5474, "step": 40 }, { "epoch": 0.012238805970149255, "grad_norm": 1.918336955189715, "learning_rate": 2.02970297029703e-06, "loss": 0.6019, "step": 41 }, { "epoch": 0.01253731343283582, "grad_norm": 1.4284644405554494, "learning_rate": 2.0792079207920794e-06, "loss": 0.5576, "step": 42 }, { "epoch": 0.012835820895522388, "grad_norm": 1.578482950062078, "learning_rate": 2.1287128712871288e-06, "loss": 0.5727, "step": 43 }, { "epoch": 0.013134328358208954, "grad_norm": 1.354481047717988, "learning_rate": 2.1782178217821785e-06, "loss": 0.5318, "step": 44 }, { "epoch": 0.013432835820895522, "grad_norm": 1.25497134625581, "learning_rate": 2.227722772277228e-06, "loss": 0.5266, "step": 45 }, { "epoch": 0.01373134328358209, "grad_norm": 1.3741989730447257, "learning_rate": 2.2772277227722776e-06, "loss": 0.5577, "step": 46 }, { "epoch": 0.014029850746268656, "grad_norm": 1.1498984206900507, "learning_rate": 2.326732673267327e-06, "loss": 0.4985, "step": 47 }, { "epoch": 0.014328358208955224, "grad_norm": 1.2303306117520938, "learning_rate": 2.3762376237623762e-06, "loss": 0.5211, "step": 48 }, { "epoch": 0.014626865671641792, "grad_norm": 1.11221660675085, "learning_rate": 2.425742574257426e-06, "loss": 0.4762, "step": 49 }, { "epoch": 0.014925373134328358, "grad_norm": 1.1204377252400624, "learning_rate": 2.4752475247524753e-06, "loss": 0.5147, "step": 50 }, { "epoch": 0.015223880597014926, "grad_norm": 1.3294182563507342, "learning_rate": 2.524752475247525e-06, "loss": 0.582, "step": 51 }, { "epoch": 0.015522388059701492, "grad_norm": 1.1126190487740035, "learning_rate": 2.5742574257425744e-06, "loss": 0.5393, "step": 52 }, { "epoch": 0.01582089552238806, "grad_norm": 1.0981865353995417, "learning_rate": 2.623762376237624e-06, "loss": 0.4749, "step": 53 }, { "epoch": 0.016119402985074627, "grad_norm": 1.0843096961998069, "learning_rate": 2.6732673267326735e-06, "loss": 0.521, "step": 54 }, { "epoch": 0.016417910447761194, "grad_norm": 1.0763196586153305, "learning_rate": 2.7227722772277232e-06, "loss": 0.4723, "step": 55 }, { "epoch": 0.01671641791044776, "grad_norm": 1.0864036348823496, "learning_rate": 2.7722772277227726e-06, "loss": 0.5065, "step": 56 }, { "epoch": 0.01701492537313433, "grad_norm": 1.2234419660497935, "learning_rate": 2.821782178217822e-06, "loss": 0.4942, "step": 57 }, { "epoch": 0.017313432835820895, "grad_norm": 1.0591972627781436, "learning_rate": 2.8712871287128712e-06, "loss": 0.4778, "step": 58 }, { "epoch": 0.01761194029850746, "grad_norm": 1.0721476207536529, "learning_rate": 2.920792079207921e-06, "loss": 0.5391, "step": 59 }, { "epoch": 0.01791044776119403, "grad_norm": 1.0985626878686374, "learning_rate": 2.9702970297029703e-06, "loss": 0.4476, "step": 60 }, { "epoch": 0.018208955223880597, "grad_norm": 1.1392243737050494, "learning_rate": 3.01980198019802e-06, "loss": 0.5497, "step": 61 }, { "epoch": 0.018507462686567163, "grad_norm": 1.0813836369765903, "learning_rate": 3.0693069306930694e-06, "loss": 0.5208, "step": 62 }, { "epoch": 0.018805970149253733, "grad_norm": 1.0889059705799737, "learning_rate": 3.118811881188119e-06, "loss": 0.4994, "step": 63 }, { "epoch": 0.0191044776119403, "grad_norm": 1.0362585523500178, "learning_rate": 3.1683168316831685e-06, "loss": 0.4829, "step": 64 }, { "epoch": 0.019402985074626865, "grad_norm": 1.0359519925978355, "learning_rate": 3.2178217821782182e-06, "loss": 0.476, "step": 65 }, { "epoch": 0.019701492537313434, "grad_norm": 0.9185525908299487, "learning_rate": 3.2673267326732676e-06, "loss": 0.4103, "step": 66 }, { "epoch": 0.02, "grad_norm": 1.0418468842058957, "learning_rate": 3.3168316831683173e-06, "loss": 0.4146, "step": 67 }, { "epoch": 0.020298507462686566, "grad_norm": 1.2661337234827934, "learning_rate": 3.3663366336633666e-06, "loss": 0.4533, "step": 68 }, { "epoch": 0.020597014925373136, "grad_norm": 0.9917842549873999, "learning_rate": 3.4158415841584164e-06, "loss": 0.4651, "step": 69 }, { "epoch": 0.020895522388059702, "grad_norm": 1.0318070695121893, "learning_rate": 3.4653465346534653e-06, "loss": 0.5135, "step": 70 }, { "epoch": 0.021194029850746268, "grad_norm": 0.9942332357926064, "learning_rate": 3.514851485148515e-06, "loss": 0.4812, "step": 71 }, { "epoch": 0.021492537313432834, "grad_norm": 0.9647372803003441, "learning_rate": 3.5643564356435644e-06, "loss": 0.5059, "step": 72 }, { "epoch": 0.021791044776119404, "grad_norm": 0.9756894687445946, "learning_rate": 3.613861386138614e-06, "loss": 0.5094, "step": 73 }, { "epoch": 0.02208955223880597, "grad_norm": 1.0710107538374112, "learning_rate": 3.6633663366336635e-06, "loss": 0.5479, "step": 74 }, { "epoch": 0.022388059701492536, "grad_norm": 1.0245672263567556, "learning_rate": 3.712871287128713e-06, "loss": 0.4638, "step": 75 }, { "epoch": 0.022686567164179106, "grad_norm": 0.972951009425826, "learning_rate": 3.7623762376237625e-06, "loss": 0.5037, "step": 76 }, { "epoch": 0.02298507462686567, "grad_norm": 0.8913835901316376, "learning_rate": 3.8118811881188123e-06, "loss": 0.4415, "step": 77 }, { "epoch": 0.023283582089552238, "grad_norm": 0.918140464986545, "learning_rate": 3.861386138613862e-06, "loss": 0.5016, "step": 78 }, { "epoch": 0.023582089552238807, "grad_norm": 0.9542502229677259, "learning_rate": 3.910891089108911e-06, "loss": 0.4639, "step": 79 }, { "epoch": 0.023880597014925373, "grad_norm": 0.9376495588439216, "learning_rate": 3.960396039603961e-06, "loss": 0.4696, "step": 80 }, { "epoch": 0.02417910447761194, "grad_norm": 1.0118829978197192, "learning_rate": 4.0099009900990104e-06, "loss": 0.4393, "step": 81 }, { "epoch": 0.02447761194029851, "grad_norm": 0.9312691594030761, "learning_rate": 4.05940594059406e-06, "loss": 0.4773, "step": 82 }, { "epoch": 0.024776119402985075, "grad_norm": 0.9718484289074919, "learning_rate": 4.108910891089109e-06, "loss": 0.4915, "step": 83 }, { "epoch": 0.02507462686567164, "grad_norm": 0.9125444747399516, "learning_rate": 4.158415841584159e-06, "loss": 0.4721, "step": 84 }, { "epoch": 0.025373134328358207, "grad_norm": 0.8900812924955422, "learning_rate": 4.207920792079208e-06, "loss": 0.4223, "step": 85 }, { "epoch": 0.025671641791044777, "grad_norm": 1.121907218319262, "learning_rate": 4.2574257425742575e-06, "loss": 0.427, "step": 86 }, { "epoch": 0.025970149253731343, "grad_norm": 1.0549443213006138, "learning_rate": 4.306930693069307e-06, "loss": 0.4944, "step": 87 }, { "epoch": 0.02626865671641791, "grad_norm": 0.9113639087013188, "learning_rate": 4.356435643564357e-06, "loss": 0.445, "step": 88 }, { "epoch": 0.02656716417910448, "grad_norm": 0.9366021839687358, "learning_rate": 4.405940594059406e-06, "loss": 0.419, "step": 89 }, { "epoch": 0.026865671641791045, "grad_norm": 1.021855680179713, "learning_rate": 4.455445544554456e-06, "loss": 0.5366, "step": 90 }, { "epoch": 0.02716417910447761, "grad_norm": 1.306674789879524, "learning_rate": 4.5049504950495054e-06, "loss": 0.4847, "step": 91 }, { "epoch": 0.02746268656716418, "grad_norm": 0.9466689723258144, "learning_rate": 4.554455445544555e-06, "loss": 0.4331, "step": 92 }, { "epoch": 0.027761194029850746, "grad_norm": 1.0130636044595012, "learning_rate": 4.603960396039605e-06, "loss": 0.5162, "step": 93 }, { "epoch": 0.028059701492537312, "grad_norm": 0.9085144452361592, "learning_rate": 4.653465346534654e-06, "loss": 0.4798, "step": 94 }, { "epoch": 0.028358208955223882, "grad_norm": 0.8665298826536127, "learning_rate": 4.702970297029703e-06, "loss": 0.4102, "step": 95 }, { "epoch": 0.028656716417910448, "grad_norm": 0.8922124742463072, "learning_rate": 4.7524752475247525e-06, "loss": 0.4531, "step": 96 }, { "epoch": 0.028955223880597014, "grad_norm": 1.0766078947850648, "learning_rate": 4.801980198019802e-06, "loss": 0.4298, "step": 97 }, { "epoch": 0.029253731343283584, "grad_norm": 0.887211185618867, "learning_rate": 4.851485148514852e-06, "loss": 0.4285, "step": 98 }, { "epoch": 0.02955223880597015, "grad_norm": 0.8297755867290657, "learning_rate": 4.900990099009901e-06, "loss": 0.3819, "step": 99 }, { "epoch": 0.029850746268656716, "grad_norm": 0.9717588300149205, "learning_rate": 4.950495049504951e-06, "loss": 0.5215, "step": 100 }, { "epoch": 0.030149253731343282, "grad_norm": 0.8764330876038892, "learning_rate": 5e-06, "loss": 0.397, "step": 101 }, { "epoch": 0.03044776119402985, "grad_norm": 0.9639801062167249, "learning_rate": 4.99999883128047e-06, "loss": 0.4042, "step": 102 }, { "epoch": 0.030746268656716418, "grad_norm": 0.9009910276004819, "learning_rate": 4.999995325122968e-06, "loss": 0.4648, "step": 103 }, { "epoch": 0.031044776119402984, "grad_norm": 0.9428548948928329, "learning_rate": 4.999989481530776e-06, "loss": 0.4345, "step": 104 }, { "epoch": 0.03134328358208955, "grad_norm": 0.8987738306915273, "learning_rate": 4.9999813005093556e-06, "loss": 0.4682, "step": 105 }, { "epoch": 0.03164179104477612, "grad_norm": 0.8997778964437559, "learning_rate": 4.999970782066357e-06, "loss": 0.4047, "step": 106 }, { "epoch": 0.03194029850746269, "grad_norm": 0.9569315241820582, "learning_rate": 4.999957926211613e-06, "loss": 0.4584, "step": 107 }, { "epoch": 0.032238805970149255, "grad_norm": 0.9078784862909539, "learning_rate": 4.9999427329571445e-06, "loss": 0.4117, "step": 108 }, { "epoch": 0.03253731343283582, "grad_norm": 0.9616336053244321, "learning_rate": 4.999925202317158e-06, "loss": 0.4677, "step": 109 }, { "epoch": 0.03283582089552239, "grad_norm": 0.8828990333654685, "learning_rate": 4.9999053343080424e-06, "loss": 0.454, "step": 110 }, { "epoch": 0.03313432835820895, "grad_norm": 0.8971703986475054, "learning_rate": 4.9998831289483745e-06, "loss": 0.4401, "step": 111 }, { "epoch": 0.03343283582089552, "grad_norm": 1.0410628753647846, "learning_rate": 4.9998585862589165e-06, "loss": 0.4655, "step": 112 }, { "epoch": 0.03373134328358209, "grad_norm": 0.9301629000296456, "learning_rate": 4.999831706262614e-06, "loss": 0.4763, "step": 113 }, { "epoch": 0.03402985074626866, "grad_norm": 0.8268955592230676, "learning_rate": 4.999802488984598e-06, "loss": 0.3696, "step": 114 }, { "epoch": 0.034328358208955224, "grad_norm": 0.8664996618072721, "learning_rate": 4.99977093445219e-06, "loss": 0.3828, "step": 115 }, { "epoch": 0.03462686567164179, "grad_norm": 1.0005293182043562, "learning_rate": 4.999737042694889e-06, "loss": 0.4978, "step": 116 }, { "epoch": 0.03492537313432836, "grad_norm": 0.9140644611648783, "learning_rate": 4.9997008137443845e-06, "loss": 0.4464, "step": 117 }, { "epoch": 0.03522388059701492, "grad_norm": 0.982768264944626, "learning_rate": 4.99966224763455e-06, "loss": 0.4059, "step": 118 }, { "epoch": 0.035522388059701496, "grad_norm": 0.7935157638732689, "learning_rate": 4.999621344401443e-06, "loss": 0.3914, "step": 119 }, { "epoch": 0.03582089552238806, "grad_norm": 0.8379591403713217, "learning_rate": 4.999578104083307e-06, "loss": 0.4461, "step": 120 }, { "epoch": 0.03611940298507463, "grad_norm": 0.9772888203631277, "learning_rate": 4.9995325267205715e-06, "loss": 0.4251, "step": 121 }, { "epoch": 0.036417910447761194, "grad_norm": 0.9050435971292533, "learning_rate": 4.999484612355849e-06, "loss": 0.4231, "step": 122 }, { "epoch": 0.03671641791044776, "grad_norm": 0.8579300964981285, "learning_rate": 4.99943436103394e-06, "loss": 0.4711, "step": 123 }, { "epoch": 0.037014925373134326, "grad_norm": 1.0200535489985578, "learning_rate": 4.999381772801827e-06, "loss": 0.4277, "step": 124 }, { "epoch": 0.03731343283582089, "grad_norm": 0.9442204717239281, "learning_rate": 4.999326847708678e-06, "loss": 0.4448, "step": 125 }, { "epoch": 0.037611940298507465, "grad_norm": 0.895546100855722, "learning_rate": 4.999269585805848e-06, "loss": 0.451, "step": 126 }, { "epoch": 0.03791044776119403, "grad_norm": 0.9401669590071482, "learning_rate": 4.999209987146876e-06, "loss": 0.4118, "step": 127 }, { "epoch": 0.0382089552238806, "grad_norm": 0.9947350109107067, "learning_rate": 4.999148051787483e-06, "loss": 0.4732, "step": 128 }, { "epoch": 0.038507462686567163, "grad_norm": 0.9239692387463458, "learning_rate": 4.999083779785579e-06, "loss": 0.382, "step": 129 }, { "epoch": 0.03880597014925373, "grad_norm": 0.8510728584830578, "learning_rate": 4.999017171201256e-06, "loss": 0.3563, "step": 130 }, { "epoch": 0.039104477611940296, "grad_norm": 0.9327467858570172, "learning_rate": 4.998948226096792e-06, "loss": 0.4404, "step": 131 }, { "epoch": 0.03940298507462687, "grad_norm": 0.9122306769001939, "learning_rate": 4.998876944536648e-06, "loss": 0.4465, "step": 132 }, { "epoch": 0.039701492537313435, "grad_norm": 0.8628534940861644, "learning_rate": 4.9988033265874714e-06, "loss": 0.3978, "step": 133 }, { "epoch": 0.04, "grad_norm": 0.9890246363319865, "learning_rate": 4.998727372318093e-06, "loss": 0.4522, "step": 134 }, { "epoch": 0.04029850746268657, "grad_norm": 0.8770139868884229, "learning_rate": 4.998649081799528e-06, "loss": 0.3612, "step": 135 }, { "epoch": 0.04059701492537313, "grad_norm": 0.8875725344879101, "learning_rate": 4.998568455104976e-06, "loss": 0.431, "step": 136 }, { "epoch": 0.0408955223880597, "grad_norm": 0.8689054951608837, "learning_rate": 4.998485492309821e-06, "loss": 0.4131, "step": 137 }, { "epoch": 0.04119402985074627, "grad_norm": 0.8315965070349196, "learning_rate": 4.998400193491632e-06, "loss": 0.4025, "step": 138 }, { "epoch": 0.04149253731343284, "grad_norm": 0.9156816933709063, "learning_rate": 4.9983125587301594e-06, "loss": 0.4201, "step": 139 }, { "epoch": 0.041791044776119404, "grad_norm": 0.919113537897832, "learning_rate": 4.998222588107342e-06, "loss": 0.3936, "step": 140 }, { "epoch": 0.04208955223880597, "grad_norm": 0.9372729637074827, "learning_rate": 4.9981302817072984e-06, "loss": 0.3789, "step": 141 }, { "epoch": 0.042388059701492536, "grad_norm": 0.8930146991739137, "learning_rate": 4.998035639616334e-06, "loss": 0.4525, "step": 142 }, { "epoch": 0.0426865671641791, "grad_norm": 0.9163631113724126, "learning_rate": 4.997938661922936e-06, "loss": 0.4033, "step": 143 }, { "epoch": 0.04298507462686567, "grad_norm": 1.0610564882169262, "learning_rate": 4.997839348717776e-06, "loss": 0.4699, "step": 144 }, { "epoch": 0.04328358208955224, "grad_norm": 0.9142847782134293, "learning_rate": 4.99773770009371e-06, "loss": 0.4134, "step": 145 }, { "epoch": 0.04358208955223881, "grad_norm": 0.8539068181792682, "learning_rate": 4.997633716145778e-06, "loss": 0.3879, "step": 146 }, { "epoch": 0.043880597014925374, "grad_norm": 0.908534305321226, "learning_rate": 4.9975273969712e-06, "loss": 0.4034, "step": 147 }, { "epoch": 0.04417910447761194, "grad_norm": 0.9065531552905202, "learning_rate": 4.997418742669383e-06, "loss": 0.4624, "step": 148 }, { "epoch": 0.044477611940298506, "grad_norm": 0.8900058271373764, "learning_rate": 4.997307753341918e-06, "loss": 0.4096, "step": 149 }, { "epoch": 0.04477611940298507, "grad_norm": 0.8850620387207646, "learning_rate": 4.997194429092573e-06, "loss": 0.3899, "step": 150 }, { "epoch": 0.045074626865671645, "grad_norm": 0.8701299536472148, "learning_rate": 4.997078770027308e-06, "loss": 0.4333, "step": 151 }, { "epoch": 0.04537313432835821, "grad_norm": 1.020005231391521, "learning_rate": 4.99696077625426e-06, "loss": 0.4582, "step": 152 }, { "epoch": 0.04567164179104478, "grad_norm": 1.042601527987692, "learning_rate": 4.996840447883748e-06, "loss": 0.4136, "step": 153 }, { "epoch": 0.04597014925373134, "grad_norm": 1.0231933296827058, "learning_rate": 4.996717785028278e-06, "loss": 0.4359, "step": 154 }, { "epoch": 0.04626865671641791, "grad_norm": 0.9352772150333463, "learning_rate": 4.9965927878025365e-06, "loss": 0.4572, "step": 155 }, { "epoch": 0.046567164179104475, "grad_norm": 0.8932031869034676, "learning_rate": 4.996465456323394e-06, "loss": 0.4292, "step": 156 }, { "epoch": 0.04686567164179104, "grad_norm": 0.9781906096105041, "learning_rate": 4.9963357907099e-06, "loss": 0.4399, "step": 157 }, { "epoch": 0.047164179104477615, "grad_norm": 0.8702251247937537, "learning_rate": 4.996203791083291e-06, "loss": 0.3471, "step": 158 }, { "epoch": 0.04746268656716418, "grad_norm": 0.891540485971903, "learning_rate": 4.996069457566982e-06, "loss": 0.3809, "step": 159 }, { "epoch": 0.04776119402985075, "grad_norm": 0.8390421395659049, "learning_rate": 4.995932790286572e-06, "loss": 0.3728, "step": 160 }, { "epoch": 0.04805970149253731, "grad_norm": 0.9759929778750225, "learning_rate": 4.995793789369842e-06, "loss": 0.3929, "step": 161 }, { "epoch": 0.04835820895522388, "grad_norm": 0.9001180874449742, "learning_rate": 4.9956524549467524e-06, "loss": 0.4161, "step": 162 }, { "epoch": 0.048656716417910445, "grad_norm": 1.0153327655108404, "learning_rate": 4.995508787149451e-06, "loss": 0.4279, "step": 163 }, { "epoch": 0.04895522388059702, "grad_norm": 0.9940714318433066, "learning_rate": 4.995362786112261e-06, "loss": 0.4293, "step": 164 }, { "epoch": 0.049253731343283584, "grad_norm": 0.8547509297200269, "learning_rate": 4.99521445197169e-06, "loss": 0.3878, "step": 165 }, { "epoch": 0.04955223880597015, "grad_norm": 0.8085492708878349, "learning_rate": 4.995063784866427e-06, "loss": 0.3565, "step": 166 }, { "epoch": 0.049850746268656716, "grad_norm": 0.9845859673203974, "learning_rate": 4.994910784937343e-06, "loss": 0.4446, "step": 167 }, { "epoch": 0.05014925373134328, "grad_norm": 1.0461284807321518, "learning_rate": 4.99475545232749e-06, "loss": 0.4254, "step": 168 }, { "epoch": 0.05044776119402985, "grad_norm": 0.9140198174691191, "learning_rate": 4.994597787182097e-06, "loss": 0.436, "step": 169 }, { "epoch": 0.050746268656716415, "grad_norm": 0.9629152651211416, "learning_rate": 4.99443778964858e-06, "loss": 0.4742, "step": 170 }, { "epoch": 0.05104477611940299, "grad_norm": 0.9693205236551692, "learning_rate": 4.994275459876531e-06, "loss": 0.4206, "step": 171 }, { "epoch": 0.051343283582089554, "grad_norm": 0.885322451195088, "learning_rate": 4.994110798017725e-06, "loss": 0.3567, "step": 172 }, { "epoch": 0.05164179104477612, "grad_norm": 0.9180041796566594, "learning_rate": 4.993943804226117e-06, "loss": 0.4015, "step": 173 }, { "epoch": 0.051940298507462686, "grad_norm": 0.8915204742324517, "learning_rate": 4.9937744786578425e-06, "loss": 0.4378, "step": 174 }, { "epoch": 0.05223880597014925, "grad_norm": 0.9057459288082191, "learning_rate": 4.993602821471216e-06, "loss": 0.3972, "step": 175 }, { "epoch": 0.05253731343283582, "grad_norm": 0.9672531963127454, "learning_rate": 4.993428832826734e-06, "loss": 0.4127, "step": 176 }, { "epoch": 0.05283582089552239, "grad_norm": 0.9087219850655597, "learning_rate": 4.993252512887069e-06, "loss": 0.4277, "step": 177 }, { "epoch": 0.05313432835820896, "grad_norm": 0.8440962947146048, "learning_rate": 4.993073861817078e-06, "loss": 0.4241, "step": 178 }, { "epoch": 0.05343283582089552, "grad_norm": 0.9406161881759132, "learning_rate": 4.992892879783795e-06, "loss": 0.4186, "step": 179 }, { "epoch": 0.05373134328358209, "grad_norm": 0.9103960721738174, "learning_rate": 4.992709566956435e-06, "loss": 0.4243, "step": 180 }, { "epoch": 0.054029850746268655, "grad_norm": 0.9301312469178198, "learning_rate": 4.992523923506388e-06, "loss": 0.4279, "step": 181 }, { "epoch": 0.05432835820895522, "grad_norm": 0.9894235486712242, "learning_rate": 4.992335949607229e-06, "loss": 0.4456, "step": 182 }, { "epoch": 0.054626865671641794, "grad_norm": 0.9146798098761367, "learning_rate": 4.992145645434708e-06, "loss": 0.3528, "step": 183 }, { "epoch": 0.05492537313432836, "grad_norm": 0.9418084586681869, "learning_rate": 4.991953011166753e-06, "loss": 0.4134, "step": 184 }, { "epoch": 0.05522388059701493, "grad_norm": 0.8443152449175418, "learning_rate": 4.991758046983476e-06, "loss": 0.3546, "step": 185 }, { "epoch": 0.05552238805970149, "grad_norm": 0.8070965145975226, "learning_rate": 4.991560753067161e-06, "loss": 0.3852, "step": 186 }, { "epoch": 0.05582089552238806, "grad_norm": 0.8907506048228214, "learning_rate": 4.991361129602274e-06, "loss": 0.4445, "step": 187 }, { "epoch": 0.056119402985074625, "grad_norm": 0.834671843672075, "learning_rate": 4.991159176775458e-06, "loss": 0.3976, "step": 188 }, { "epoch": 0.05641791044776119, "grad_norm": 0.9954785016124977, "learning_rate": 4.9909548947755334e-06, "loss": 0.4353, "step": 189 }, { "epoch": 0.056716417910447764, "grad_norm": 0.7445588575209131, "learning_rate": 4.990748283793499e-06, "loss": 0.3131, "step": 190 }, { "epoch": 0.05701492537313433, "grad_norm": 0.8570773114706813, "learning_rate": 4.990539344022531e-06, "loss": 0.3788, "step": 191 }, { "epoch": 0.057313432835820896, "grad_norm": 1.0005429174734601, "learning_rate": 4.990328075657985e-06, "loss": 0.487, "step": 192 }, { "epoch": 0.05761194029850746, "grad_norm": 0.8625742070670317, "learning_rate": 4.990114478897389e-06, "loss": 0.3654, "step": 193 }, { "epoch": 0.05791044776119403, "grad_norm": 0.9721768330250884, "learning_rate": 4.989898553940452e-06, "loss": 0.3933, "step": 194 }, { "epoch": 0.058208955223880594, "grad_norm": 0.9914169807245352, "learning_rate": 4.989680300989058e-06, "loss": 0.4732, "step": 195 }, { "epoch": 0.05850746268656717, "grad_norm": 0.8411421119088517, "learning_rate": 4.989459720247269e-06, "loss": 0.4478, "step": 196 }, { "epoch": 0.05880597014925373, "grad_norm": 0.924615837847109, "learning_rate": 4.989236811921322e-06, "loss": 0.4654, "step": 197 }, { "epoch": 0.0591044776119403, "grad_norm": 0.9328373422116539, "learning_rate": 4.989011576219632e-06, "loss": 0.448, "step": 198 }, { "epoch": 0.059402985074626866, "grad_norm": 0.9014890809533687, "learning_rate": 4.9887840133527874e-06, "loss": 0.4247, "step": 199 }, { "epoch": 0.05970149253731343, "grad_norm": 0.8132765320225406, "learning_rate": 4.988554123533554e-06, "loss": 0.3943, "step": 200 }, { "epoch": 0.06, "grad_norm": 0.9241012173718349, "learning_rate": 4.9883219069768744e-06, "loss": 0.3633, "step": 201 }, { "epoch": 0.060298507462686564, "grad_norm": 0.84126125400794, "learning_rate": 4.988087363899864e-06, "loss": 0.3297, "step": 202 }, { "epoch": 0.06059701492537314, "grad_norm": 0.8347638005889463, "learning_rate": 4.987850494521817e-06, "loss": 0.4134, "step": 203 }, { "epoch": 0.0608955223880597, "grad_norm": 0.8847082107220858, "learning_rate": 4.987611299064197e-06, "loss": 0.3754, "step": 204 }, { "epoch": 0.06119402985074627, "grad_norm": 0.8981615926938369, "learning_rate": 4.987369777750649e-06, "loss": 0.407, "step": 205 }, { "epoch": 0.061492537313432835, "grad_norm": 0.875779341191485, "learning_rate": 4.9871259308069885e-06, "loss": 0.4204, "step": 206 }, { "epoch": 0.0617910447761194, "grad_norm": 0.9212258634041721, "learning_rate": 4.986879758461207e-06, "loss": 0.4021, "step": 207 }, { "epoch": 0.06208955223880597, "grad_norm": 0.8857433024281737, "learning_rate": 4.986631260943469e-06, "loss": 0.3837, "step": 208 }, { "epoch": 0.06238805970149254, "grad_norm": 0.967859200633979, "learning_rate": 4.986380438486113e-06, "loss": 0.3977, "step": 209 }, { "epoch": 0.0626865671641791, "grad_norm": 0.925560346678998, "learning_rate": 4.986127291323653e-06, "loss": 0.4273, "step": 210 }, { "epoch": 0.06298507462686567, "grad_norm": 0.863600208601708, "learning_rate": 4.985871819692775e-06, "loss": 0.4031, "step": 211 }, { "epoch": 0.06328358208955225, "grad_norm": 0.9327927944187931, "learning_rate": 4.985614023832339e-06, "loss": 0.4167, "step": 212 }, { "epoch": 0.0635820895522388, "grad_norm": 0.891025459096459, "learning_rate": 4.985353903983377e-06, "loss": 0.4188, "step": 213 }, { "epoch": 0.06388059701492538, "grad_norm": 0.8627498456244966, "learning_rate": 4.985091460389096e-06, "loss": 0.3822, "step": 214 }, { "epoch": 0.06417910447761194, "grad_norm": 0.9843279624632778, "learning_rate": 4.9848266932948745e-06, "loss": 0.3868, "step": 215 }, { "epoch": 0.06447761194029851, "grad_norm": 0.9197216488420602, "learning_rate": 4.984559602948261e-06, "loss": 0.3866, "step": 216 }, { "epoch": 0.06477611940298507, "grad_norm": 1.0380980582080264, "learning_rate": 4.984290189598981e-06, "loss": 0.4032, "step": 217 }, { "epoch": 0.06507462686567164, "grad_norm": 0.8366437135304863, "learning_rate": 4.984018453498928e-06, "loss": 0.3525, "step": 218 }, { "epoch": 0.06537313432835822, "grad_norm": 0.9649147229825452, "learning_rate": 4.983744394902169e-06, "loss": 0.4686, "step": 219 }, { "epoch": 0.06567164179104477, "grad_norm": 0.8490377817349865, "learning_rate": 4.983468014064942e-06, "loss": 0.4265, "step": 220 }, { "epoch": 0.06597014925373135, "grad_norm": 0.7792537616515028, "learning_rate": 4.983189311245656e-06, "loss": 0.3949, "step": 221 }, { "epoch": 0.0662686567164179, "grad_norm": 0.8750639478343961, "learning_rate": 4.982908286704893e-06, "loss": 0.3828, "step": 222 }, { "epoch": 0.06656716417910448, "grad_norm": 0.9455155367225592, "learning_rate": 4.982624940705402e-06, "loss": 0.4166, "step": 223 }, { "epoch": 0.06686567164179104, "grad_norm": 0.9295734685485272, "learning_rate": 4.982339273512106e-06, "loss": 0.4153, "step": 224 }, { "epoch": 0.06716417910447761, "grad_norm": 0.8631686207686246, "learning_rate": 4.982051285392097e-06, "loss": 0.3615, "step": 225 }, { "epoch": 0.06746268656716418, "grad_norm": 0.9880414466922032, "learning_rate": 4.981760976614634e-06, "loss": 0.46, "step": 226 }, { "epoch": 0.06776119402985074, "grad_norm": 0.8872311404032606, "learning_rate": 4.981468347451154e-06, "loss": 0.3871, "step": 227 }, { "epoch": 0.06805970149253732, "grad_norm": 0.8943003479815301, "learning_rate": 4.981173398175252e-06, "loss": 0.371, "step": 228 }, { "epoch": 0.06835820895522388, "grad_norm": 0.9525334440362165, "learning_rate": 4.9808761290627035e-06, "loss": 0.4159, "step": 229 }, { "epoch": 0.06865671641791045, "grad_norm": 0.871749210147148, "learning_rate": 4.9805765403914455e-06, "loss": 0.3764, "step": 230 }, { "epoch": 0.06895522388059701, "grad_norm": 0.925600078237541, "learning_rate": 4.980274632441585e-06, "loss": 0.3768, "step": 231 }, { "epoch": 0.06925373134328358, "grad_norm": 0.9672291973292384, "learning_rate": 4.9799704054954015e-06, "loss": 0.477, "step": 232 }, { "epoch": 0.06955223880597015, "grad_norm": 0.768340028516794, "learning_rate": 4.979663859837337e-06, "loss": 0.3902, "step": 233 }, { "epoch": 0.06985074626865671, "grad_norm": 0.8740643362093679, "learning_rate": 4.979354995754006e-06, "loss": 0.3816, "step": 234 }, { "epoch": 0.07014925373134329, "grad_norm": 1.0163576539062806, "learning_rate": 4.979043813534189e-06, "loss": 0.4156, "step": 235 }, { "epoch": 0.07044776119402985, "grad_norm": 0.9103806310986855, "learning_rate": 4.978730313468832e-06, "loss": 0.4067, "step": 236 }, { "epoch": 0.07074626865671642, "grad_norm": 0.922939829872911, "learning_rate": 4.9784144958510515e-06, "loss": 0.369, "step": 237 }, { "epoch": 0.07104477611940299, "grad_norm": 1.0128842126660702, "learning_rate": 4.978096360976129e-06, "loss": 0.425, "step": 238 }, { "epoch": 0.07134328358208955, "grad_norm": 0.9083583140182622, "learning_rate": 4.977775909141513e-06, "loss": 0.4008, "step": 239 }, { "epoch": 0.07164179104477612, "grad_norm": 0.9204385171658356, "learning_rate": 4.9774531406468164e-06, "loss": 0.4098, "step": 240 }, { "epoch": 0.07194029850746268, "grad_norm": 0.8800378945968806, "learning_rate": 4.977128055793823e-06, "loss": 0.4207, "step": 241 }, { "epoch": 0.07223880597014926, "grad_norm": 0.9488253130303587, "learning_rate": 4.976800654886476e-06, "loss": 0.4467, "step": 242 }, { "epoch": 0.07253731343283581, "grad_norm": 0.924076052886822, "learning_rate": 4.976470938230889e-06, "loss": 0.4257, "step": 243 }, { "epoch": 0.07283582089552239, "grad_norm": 0.9270092762682594, "learning_rate": 4.976138906135341e-06, "loss": 0.4214, "step": 244 }, { "epoch": 0.07313432835820896, "grad_norm": 0.897415607649877, "learning_rate": 4.9758045589102696e-06, "loss": 0.3953, "step": 245 }, { "epoch": 0.07343283582089552, "grad_norm": 0.9972346268529109, "learning_rate": 4.975467896868284e-06, "loss": 0.4519, "step": 246 }, { "epoch": 0.0737313432835821, "grad_norm": 0.9907973485734135, "learning_rate": 4.9751289203241535e-06, "loss": 0.4678, "step": 247 }, { "epoch": 0.07402985074626865, "grad_norm": 0.8545804808558607, "learning_rate": 4.974787629594815e-06, "loss": 0.4088, "step": 248 }, { "epoch": 0.07432835820895523, "grad_norm": 0.9288807835854102, "learning_rate": 4.974444024999366e-06, "loss": 0.4414, "step": 249 }, { "epoch": 0.07462686567164178, "grad_norm": 0.7975187554309996, "learning_rate": 4.974098106859068e-06, "loss": 0.4158, "step": 250 }, { "epoch": 0.07492537313432836, "grad_norm": 0.9898504646944991, "learning_rate": 4.973749875497346e-06, "loss": 0.4242, "step": 251 }, { "epoch": 0.07522388059701493, "grad_norm": 0.8602699019118681, "learning_rate": 4.973399331239789e-06, "loss": 0.3789, "step": 252 }, { "epoch": 0.07552238805970149, "grad_norm": 0.8750135247666223, "learning_rate": 4.973046474414145e-06, "loss": 0.3936, "step": 253 }, { "epoch": 0.07582089552238806, "grad_norm": 0.9200590185086436, "learning_rate": 4.9726913053503285e-06, "loss": 0.4523, "step": 254 }, { "epoch": 0.07611940298507462, "grad_norm": 0.9256789347650409, "learning_rate": 4.972333824380414e-06, "loss": 0.4328, "step": 255 }, { "epoch": 0.0764179104477612, "grad_norm": 0.8363482584889479, "learning_rate": 4.9719740318386375e-06, "loss": 0.3833, "step": 256 }, { "epoch": 0.07671641791044777, "grad_norm": 0.8386638424623721, "learning_rate": 4.971611928061395e-06, "loss": 0.4052, "step": 257 }, { "epoch": 0.07701492537313433, "grad_norm": 0.7996227547339989, "learning_rate": 4.9712475133872455e-06, "loss": 0.3646, "step": 258 }, { "epoch": 0.0773134328358209, "grad_norm": 0.8768980961009939, "learning_rate": 4.970880788156906e-06, "loss": 0.3561, "step": 259 }, { "epoch": 0.07761194029850746, "grad_norm": 0.8551248770367569, "learning_rate": 4.97051175271326e-06, "loss": 0.3774, "step": 260 }, { "epoch": 0.07791044776119403, "grad_norm": 0.8253804217485359, "learning_rate": 4.970140407401343e-06, "loss": 0.3577, "step": 261 }, { "epoch": 0.07820895522388059, "grad_norm": 0.8907747036111339, "learning_rate": 4.969766752568355e-06, "loss": 0.3611, "step": 262 }, { "epoch": 0.07850746268656716, "grad_norm": 0.9257974072192369, "learning_rate": 4.969390788563653e-06, "loss": 0.4132, "step": 263 }, { "epoch": 0.07880597014925374, "grad_norm": 0.8701098416781633, "learning_rate": 4.969012515738757e-06, "loss": 0.3757, "step": 264 }, { "epoch": 0.0791044776119403, "grad_norm": 0.8589961683118461, "learning_rate": 4.9686319344473395e-06, "loss": 0.3971, "step": 265 }, { "epoch": 0.07940298507462687, "grad_norm": 0.9341286772453976, "learning_rate": 4.968249045045237e-06, "loss": 0.429, "step": 266 }, { "epoch": 0.07970149253731343, "grad_norm": 0.9938003598001335, "learning_rate": 4.967863847890441e-06, "loss": 0.396, "step": 267 }, { "epoch": 0.08, "grad_norm": 0.9011497351262049, "learning_rate": 4.9674763433431006e-06, "loss": 0.4308, "step": 268 }, { "epoch": 0.08029850746268656, "grad_norm": 0.977043100659228, "learning_rate": 4.9670865317655245e-06, "loss": 0.4258, "step": 269 }, { "epoch": 0.08059701492537313, "grad_norm": 0.9059609196950806, "learning_rate": 4.966694413522177e-06, "loss": 0.4267, "step": 270 }, { "epoch": 0.0808955223880597, "grad_norm": 1.0078844426437794, "learning_rate": 4.966299988979678e-06, "loss": 0.3847, "step": 271 }, { "epoch": 0.08119402985074627, "grad_norm": 0.9703165705523532, "learning_rate": 4.965903258506806e-06, "loss": 0.4097, "step": 272 }, { "epoch": 0.08149253731343284, "grad_norm": 0.8870735512921978, "learning_rate": 4.965504222474494e-06, "loss": 0.3928, "step": 273 }, { "epoch": 0.0817910447761194, "grad_norm": 0.8739388689016976, "learning_rate": 4.96510288125583e-06, "loss": 0.3731, "step": 274 }, { "epoch": 0.08208955223880597, "grad_norm": 1.0212824714114637, "learning_rate": 4.9646992352260595e-06, "loss": 0.3795, "step": 275 }, { "epoch": 0.08238805970149254, "grad_norm": 0.8754845959865762, "learning_rate": 4.964293284762581e-06, "loss": 0.3911, "step": 276 }, { "epoch": 0.0826865671641791, "grad_norm": 0.8698613925405617, "learning_rate": 4.9638850302449485e-06, "loss": 0.3532, "step": 277 }, { "epoch": 0.08298507462686568, "grad_norm": 0.9357074185617845, "learning_rate": 4.96347447205487e-06, "loss": 0.4101, "step": 278 }, { "epoch": 0.08328358208955224, "grad_norm": 0.8472354754739433, "learning_rate": 4.963061610576207e-06, "loss": 0.4406, "step": 279 }, { "epoch": 0.08358208955223881, "grad_norm": 0.8564250146540285, "learning_rate": 4.962646446194977e-06, "loss": 0.3793, "step": 280 }, { "epoch": 0.08388059701492537, "grad_norm": 0.8857581051447966, "learning_rate": 4.962228979299345e-06, "loss": 0.3822, "step": 281 }, { "epoch": 0.08417910447761194, "grad_norm": 0.890998048990312, "learning_rate": 4.961809210279634e-06, "loss": 0.3944, "step": 282 }, { "epoch": 0.08447761194029851, "grad_norm": 0.8601180120292999, "learning_rate": 4.9613871395283195e-06, "loss": 0.4009, "step": 283 }, { "epoch": 0.08477611940298507, "grad_norm": 0.8277195700046859, "learning_rate": 4.960962767440026e-06, "loss": 0.3564, "step": 284 }, { "epoch": 0.08507462686567165, "grad_norm": 0.8423871373655928, "learning_rate": 4.96053609441153e-06, "loss": 0.3747, "step": 285 }, { "epoch": 0.0853731343283582, "grad_norm": 0.9407464456549731, "learning_rate": 4.960107120841762e-06, "loss": 0.3858, "step": 286 }, { "epoch": 0.08567164179104478, "grad_norm": 0.9140065601660698, "learning_rate": 4.9596758471318e-06, "loss": 0.3961, "step": 287 }, { "epoch": 0.08597014925373134, "grad_norm": 0.858838528185795, "learning_rate": 4.959242273684878e-06, "loss": 0.3915, "step": 288 }, { "epoch": 0.08626865671641791, "grad_norm": 0.9199463736666107, "learning_rate": 4.958806400906372e-06, "loss": 0.4275, "step": 289 }, { "epoch": 0.08656716417910448, "grad_norm": 0.9243014960259777, "learning_rate": 4.958368229203816e-06, "loss": 0.3851, "step": 290 }, { "epoch": 0.08686567164179104, "grad_norm": 0.9091623645910903, "learning_rate": 4.957927758986888e-06, "loss": 0.3957, "step": 291 }, { "epoch": 0.08716417910447762, "grad_norm": 0.8915385371700681, "learning_rate": 4.9574849906674174e-06, "loss": 0.4437, "step": 292 }, { "epoch": 0.08746268656716417, "grad_norm": 0.8934354732288062, "learning_rate": 4.957039924659382e-06, "loss": 0.358, "step": 293 }, { "epoch": 0.08776119402985075, "grad_norm": 0.9061503598549887, "learning_rate": 4.956592561378907e-06, "loss": 0.3692, "step": 294 }, { "epoch": 0.0880597014925373, "grad_norm": 0.8830535399018076, "learning_rate": 4.956142901244268e-06, "loss": 0.4223, "step": 295 }, { "epoch": 0.08835820895522388, "grad_norm": 0.9413406518046492, "learning_rate": 4.955690944675882e-06, "loss": 0.3859, "step": 296 }, { "epoch": 0.08865671641791045, "grad_norm": 0.8947764680217752, "learning_rate": 4.955236692096324e-06, "loss": 0.3682, "step": 297 }, { "epoch": 0.08895522388059701, "grad_norm": 0.8405209768536294, "learning_rate": 4.954780143930303e-06, "loss": 0.3826, "step": 298 }, { "epoch": 0.08925373134328358, "grad_norm": 0.9396234783746453, "learning_rate": 4.954321300604683e-06, "loss": 0.4112, "step": 299 }, { "epoch": 0.08955223880597014, "grad_norm": 0.9114411631804026, "learning_rate": 4.953860162548472e-06, "loss": 0.3612, "step": 300 }, { "epoch": 0.08985074626865672, "grad_norm": 0.8817713479006289, "learning_rate": 4.953396730192821e-06, "loss": 0.4209, "step": 301 }, { "epoch": 0.09014925373134329, "grad_norm": 0.8726071936119882, "learning_rate": 4.952931003971029e-06, "loss": 0.4155, "step": 302 }, { "epoch": 0.09044776119402985, "grad_norm": 0.9048765061805111, "learning_rate": 4.952462984318539e-06, "loss": 0.3896, "step": 303 }, { "epoch": 0.09074626865671642, "grad_norm": 0.9804012864737665, "learning_rate": 4.9519926716729376e-06, "loss": 0.4211, "step": 304 }, { "epoch": 0.09104477611940298, "grad_norm": 0.8873017745256736, "learning_rate": 4.951520066473955e-06, "loss": 0.4544, "step": 305 }, { "epoch": 0.09134328358208955, "grad_norm": 0.8062695091689972, "learning_rate": 4.951045169163467e-06, "loss": 0.3992, "step": 306 }, { "epoch": 0.09164179104477611, "grad_norm": 0.8777340484612656, "learning_rate": 4.950567980185489e-06, "loss": 0.4128, "step": 307 }, { "epoch": 0.09194029850746269, "grad_norm": 0.9366219952901376, "learning_rate": 4.950088499986183e-06, "loss": 0.3942, "step": 308 }, { "epoch": 0.09223880597014926, "grad_norm": 0.8144148378243571, "learning_rate": 4.949606729013851e-06, "loss": 0.3958, "step": 309 }, { "epoch": 0.09253731343283582, "grad_norm": 0.8968060502801483, "learning_rate": 4.949122667718935e-06, "loss": 0.406, "step": 310 }, { "epoch": 0.09283582089552239, "grad_norm": 0.931747156022598, "learning_rate": 4.948636316554023e-06, "loss": 0.3839, "step": 311 }, { "epoch": 0.09313432835820895, "grad_norm": 0.7978130697742478, "learning_rate": 4.948147675973841e-06, "loss": 0.3653, "step": 312 }, { "epoch": 0.09343283582089552, "grad_norm": 0.7934615187323222, "learning_rate": 4.947656746435255e-06, "loss": 0.3813, "step": 313 }, { "epoch": 0.09373134328358208, "grad_norm": 0.9589131627871469, "learning_rate": 4.947163528397273e-06, "loss": 0.469, "step": 314 }, { "epoch": 0.09402985074626866, "grad_norm": 0.8464181634763774, "learning_rate": 4.946668022321042e-06, "loss": 0.3979, "step": 315 }, { "epoch": 0.09432835820895523, "grad_norm": 0.8899369392024041, "learning_rate": 4.946170228669847e-06, "loss": 0.3292, "step": 316 }, { "epoch": 0.09462686567164179, "grad_norm": 0.8553955124203195, "learning_rate": 4.9456701479091155e-06, "loss": 0.3925, "step": 317 }, { "epoch": 0.09492537313432836, "grad_norm": 0.8606099277635237, "learning_rate": 4.945167780506407e-06, "loss": 0.433, "step": 318 }, { "epoch": 0.09522388059701492, "grad_norm": 0.907825007701508, "learning_rate": 4.944663126931426e-06, "loss": 0.3645, "step": 319 }, { "epoch": 0.0955223880597015, "grad_norm": 0.8612034752290371, "learning_rate": 4.94415618765601e-06, "loss": 0.3607, "step": 320 }, { "epoch": 0.09582089552238807, "grad_norm": 0.9069258389724871, "learning_rate": 4.943646963154134e-06, "loss": 0.402, "step": 321 }, { "epoch": 0.09611940298507463, "grad_norm": 0.9862506026387452, "learning_rate": 4.943135453901911e-06, "loss": 0.4424, "step": 322 }, { "epoch": 0.0964179104477612, "grad_norm": 0.9122656478208079, "learning_rate": 4.942621660377592e-06, "loss": 0.4595, "step": 323 }, { "epoch": 0.09671641791044776, "grad_norm": 0.9125176637122425, "learning_rate": 4.942105583061558e-06, "loss": 0.3824, "step": 324 }, { "epoch": 0.09701492537313433, "grad_norm": 0.8593735642495912, "learning_rate": 4.941587222436331e-06, "loss": 0.3785, "step": 325 }, { "epoch": 0.09731343283582089, "grad_norm": 0.82674079067205, "learning_rate": 4.941066578986565e-06, "loss": 0.3416, "step": 326 }, { "epoch": 0.09761194029850746, "grad_norm": 0.9689144745422055, "learning_rate": 4.940543653199049e-06, "loss": 0.3368, "step": 327 }, { "epoch": 0.09791044776119404, "grad_norm": 0.9277054433752907, "learning_rate": 4.940018445562704e-06, "loss": 0.3761, "step": 328 }, { "epoch": 0.0982089552238806, "grad_norm": 0.7578752447120486, "learning_rate": 4.939490956568589e-06, "loss": 0.3509, "step": 329 }, { "epoch": 0.09850746268656717, "grad_norm": 0.8298981698879738, "learning_rate": 4.938961186709893e-06, "loss": 0.3969, "step": 330 }, { "epoch": 0.09880597014925373, "grad_norm": 0.9238719788718148, "learning_rate": 4.938429136481936e-06, "loss": 0.4294, "step": 331 }, { "epoch": 0.0991044776119403, "grad_norm": 0.8589172708753758, "learning_rate": 4.937894806382173e-06, "loss": 0.4139, "step": 332 }, { "epoch": 0.09940298507462686, "grad_norm": 0.8585178801953061, "learning_rate": 4.937358196910191e-06, "loss": 0.395, "step": 333 }, { "epoch": 0.09970149253731343, "grad_norm": 0.8099539599389111, "learning_rate": 4.936819308567705e-06, "loss": 0.3669, "step": 334 }, { "epoch": 0.1, "grad_norm": 0.839470127485901, "learning_rate": 4.9362781418585635e-06, "loss": 0.3418, "step": 335 }, { "epoch": 0.10029850746268656, "grad_norm": 0.9244240560709288, "learning_rate": 4.9357346972887425e-06, "loss": 0.3656, "step": 336 }, { "epoch": 0.10059701492537314, "grad_norm": 0.8543828203767136, "learning_rate": 4.935188975366352e-06, "loss": 0.3853, "step": 337 }, { "epoch": 0.1008955223880597, "grad_norm": 0.9403997055915649, "learning_rate": 4.934640976601627e-06, "loss": 0.4266, "step": 338 }, { "epoch": 0.10119402985074627, "grad_norm": 0.8656569961847842, "learning_rate": 4.934090701506933e-06, "loss": 0.3519, "step": 339 }, { "epoch": 0.10149253731343283, "grad_norm": 1.0259990423516157, "learning_rate": 4.9335381505967635e-06, "loss": 0.3739, "step": 340 }, { "epoch": 0.1017910447761194, "grad_norm": 0.9184253012427269, "learning_rate": 4.932983324387742e-06, "loss": 0.3891, "step": 341 }, { "epoch": 0.10208955223880598, "grad_norm": 0.8855470935863624, "learning_rate": 4.932426223398615e-06, "loss": 0.4003, "step": 342 }, { "epoch": 0.10238805970149253, "grad_norm": 0.8338863353994163, "learning_rate": 4.9318668481502604e-06, "loss": 0.3808, "step": 343 }, { "epoch": 0.10268656716417911, "grad_norm": 0.8659855930783819, "learning_rate": 4.93130519916568e-06, "loss": 0.4153, "step": 344 }, { "epoch": 0.10298507462686567, "grad_norm": 0.9328940913851155, "learning_rate": 4.930741276970001e-06, "loss": 0.4228, "step": 345 }, { "epoch": 0.10328358208955224, "grad_norm": 0.9559469401085737, "learning_rate": 4.930175082090477e-06, "loss": 0.4171, "step": 346 }, { "epoch": 0.10358208955223881, "grad_norm": 0.8361138110258766, "learning_rate": 4.929606615056488e-06, "loss": 0.3968, "step": 347 }, { "epoch": 0.10388059701492537, "grad_norm": 0.8329215393697887, "learning_rate": 4.929035876399535e-06, "loss": 0.3942, "step": 348 }, { "epoch": 0.10417910447761194, "grad_norm": 0.8675645842119557, "learning_rate": 4.9284628666532455e-06, "loss": 0.3621, "step": 349 }, { "epoch": 0.1044776119402985, "grad_norm": 0.8507052675194776, "learning_rate": 4.927887586353369e-06, "loss": 0.4286, "step": 350 }, { "epoch": 0.10477611940298508, "grad_norm": 0.837156171946447, "learning_rate": 4.92731003603778e-06, "loss": 0.3712, "step": 351 }, { "epoch": 0.10507462686567164, "grad_norm": 0.9356486848372598, "learning_rate": 4.926730216246472e-06, "loss": 0.3913, "step": 352 }, { "epoch": 0.10537313432835821, "grad_norm": 0.8223936751125491, "learning_rate": 4.926148127521565e-06, "loss": 0.3975, "step": 353 }, { "epoch": 0.10567164179104478, "grad_norm": 0.7881227208588917, "learning_rate": 4.925563770407295e-06, "loss": 0.3961, "step": 354 }, { "epoch": 0.10597014925373134, "grad_norm": 0.8453911169822759, "learning_rate": 4.924977145450023e-06, "loss": 0.3694, "step": 355 }, { "epoch": 0.10626865671641791, "grad_norm": 0.8734420055725053, "learning_rate": 4.924388253198229e-06, "loss": 0.3865, "step": 356 }, { "epoch": 0.10656716417910447, "grad_norm": 0.9201290119245594, "learning_rate": 4.923797094202514e-06, "loss": 0.4278, "step": 357 }, { "epoch": 0.10686567164179105, "grad_norm": 0.9389093005855279, "learning_rate": 4.923203669015594e-06, "loss": 0.3665, "step": 358 }, { "epoch": 0.1071641791044776, "grad_norm": 0.9042202134139647, "learning_rate": 4.92260797819231e-06, "loss": 0.3924, "step": 359 }, { "epoch": 0.10746268656716418, "grad_norm": 0.8657123819229612, "learning_rate": 4.922010022289618e-06, "loss": 0.3972, "step": 360 }, { "epoch": 0.10776119402985075, "grad_norm": 0.829479673932315, "learning_rate": 4.921409801866591e-06, "loss": 0.4135, "step": 361 }, { "epoch": 0.10805970149253731, "grad_norm": 0.8821755114348971, "learning_rate": 4.920807317484422e-06, "loss": 0.3944, "step": 362 }, { "epoch": 0.10835820895522388, "grad_norm": 0.8948274549142421, "learning_rate": 4.920202569706418e-06, "loss": 0.3826, "step": 363 }, { "epoch": 0.10865671641791044, "grad_norm": 0.8652728701574908, "learning_rate": 4.919595559098003e-06, "loss": 0.3766, "step": 364 }, { "epoch": 0.10895522388059702, "grad_norm": 0.9092751007745377, "learning_rate": 4.9189862862267205e-06, "loss": 0.4034, "step": 365 }, { "epoch": 0.10925373134328359, "grad_norm": 0.9254208863560162, "learning_rate": 4.918374751662221e-06, "loss": 0.4359, "step": 366 }, { "epoch": 0.10955223880597015, "grad_norm": 0.8168960720647488, "learning_rate": 4.917760955976277e-06, "loss": 0.4363, "step": 367 }, { "epoch": 0.10985074626865672, "grad_norm": 0.830653386436508, "learning_rate": 4.917144899742773e-06, "loss": 0.3931, "step": 368 }, { "epoch": 0.11014925373134328, "grad_norm": 0.840070982853219, "learning_rate": 4.916526583537705e-06, "loss": 0.3493, "step": 369 }, { "epoch": 0.11044776119402985, "grad_norm": 0.979140168040529, "learning_rate": 4.915906007939184e-06, "loss": 0.3741, "step": 370 }, { "epoch": 0.11074626865671641, "grad_norm": 0.9450117540952754, "learning_rate": 4.915283173527434e-06, "loss": 0.4575, "step": 371 }, { "epoch": 0.11104477611940299, "grad_norm": 0.877577439309941, "learning_rate": 4.9146580808847896e-06, "loss": 0.4069, "step": 372 }, { "epoch": 0.11134328358208956, "grad_norm": 0.8452549449469758, "learning_rate": 4.9140307305956964e-06, "loss": 0.3521, "step": 373 }, { "epoch": 0.11164179104477612, "grad_norm": 0.8406721896093431, "learning_rate": 4.913401123246713e-06, "loss": 0.3441, "step": 374 }, { "epoch": 0.11194029850746269, "grad_norm": 0.7975661106565912, "learning_rate": 4.912769259426505e-06, "loss": 0.3256, "step": 375 }, { "epoch": 0.11223880597014925, "grad_norm": 0.8427317452222252, "learning_rate": 4.912135139725851e-06, "loss": 0.3885, "step": 376 }, { "epoch": 0.11253731343283582, "grad_norm": 0.9053683742245764, "learning_rate": 4.9114987647376374e-06, "loss": 0.4163, "step": 377 }, { "epoch": 0.11283582089552238, "grad_norm": 0.8630516315249676, "learning_rate": 4.910860135056859e-06, "loss": 0.3739, "step": 378 }, { "epoch": 0.11313432835820895, "grad_norm": 0.9602461144012868, "learning_rate": 4.91021925128062e-06, "loss": 0.3943, "step": 379 }, { "epoch": 0.11343283582089553, "grad_norm": 0.9141865424324758, "learning_rate": 4.909576114008129e-06, "loss": 0.4037, "step": 380 }, { "epoch": 0.11373134328358209, "grad_norm": 0.8548880538979818, "learning_rate": 4.908930723840706e-06, "loss": 0.3818, "step": 381 }, { "epoch": 0.11402985074626866, "grad_norm": 1.0328188865088241, "learning_rate": 4.908283081381773e-06, "loss": 0.4682, "step": 382 }, { "epoch": 0.11432835820895522, "grad_norm": 0.9238076147148401, "learning_rate": 4.907633187236861e-06, "loss": 0.38, "step": 383 }, { "epoch": 0.11462686567164179, "grad_norm": 0.8455481748001109, "learning_rate": 4.906981042013605e-06, "loss": 0.3761, "step": 384 }, { "epoch": 0.11492537313432835, "grad_norm": 0.9073460304019739, "learning_rate": 4.9063266463217466e-06, "loss": 0.4005, "step": 385 }, { "epoch": 0.11522388059701492, "grad_norm": 0.8931846505792465, "learning_rate": 4.905670000773126e-06, "loss": 0.4288, "step": 386 }, { "epoch": 0.1155223880597015, "grad_norm": 0.851731424120729, "learning_rate": 4.905011105981694e-06, "loss": 0.4044, "step": 387 }, { "epoch": 0.11582089552238806, "grad_norm": 0.9161247111493643, "learning_rate": 4.9043499625635e-06, "loss": 0.3995, "step": 388 }, { "epoch": 0.11611940298507463, "grad_norm": 0.8797022877561539, "learning_rate": 4.903686571136697e-06, "loss": 0.3964, "step": 389 }, { "epoch": 0.11641791044776119, "grad_norm": 0.8500731359455521, "learning_rate": 4.903020932321541e-06, "loss": 0.3609, "step": 390 }, { "epoch": 0.11671641791044776, "grad_norm": 0.9945238156321736, "learning_rate": 4.9023530467403856e-06, "loss": 0.3801, "step": 391 }, { "epoch": 0.11701492537313433, "grad_norm": 0.9136420198050205, "learning_rate": 4.901682915017689e-06, "loss": 0.3566, "step": 392 }, { "epoch": 0.1173134328358209, "grad_norm": 0.8189636216415471, "learning_rate": 4.901010537780009e-06, "loss": 0.3132, "step": 393 }, { "epoch": 0.11761194029850747, "grad_norm": 0.9844874224310621, "learning_rate": 4.900335915656e-06, "loss": 0.4271, "step": 394 }, { "epoch": 0.11791044776119403, "grad_norm": 1.0487555124306993, "learning_rate": 4.899659049276418e-06, "loss": 0.4064, "step": 395 }, { "epoch": 0.1182089552238806, "grad_norm": 0.7812485151038709, "learning_rate": 4.898979939274118e-06, "loss": 0.2882, "step": 396 }, { "epoch": 0.11850746268656716, "grad_norm": 0.8336948833618268, "learning_rate": 4.898298586284049e-06, "loss": 0.3984, "step": 397 }, { "epoch": 0.11880597014925373, "grad_norm": 0.8979301268641166, "learning_rate": 4.897614990943261e-06, "loss": 0.3919, "step": 398 }, { "epoch": 0.1191044776119403, "grad_norm": 0.8427489040474724, "learning_rate": 4.896929153890898e-06, "loss": 0.3866, "step": 399 }, { "epoch": 0.11940298507462686, "grad_norm": 0.8211103743103699, "learning_rate": 4.896241075768201e-06, "loss": 0.3963, "step": 400 }, { "epoch": 0.11970149253731344, "grad_norm": 0.8445033296486258, "learning_rate": 4.895550757218507e-06, "loss": 0.3928, "step": 401 }, { "epoch": 0.12, "grad_norm": 0.8961945171020185, "learning_rate": 4.894858198887246e-06, "loss": 0.3794, "step": 402 }, { "epoch": 0.12029850746268657, "grad_norm": 0.9818424108109866, "learning_rate": 4.8941634014219454e-06, "loss": 0.39, "step": 403 }, { "epoch": 0.12059701492537313, "grad_norm": 0.8104054934733214, "learning_rate": 4.8934663654722205e-06, "loss": 0.2999, "step": 404 }, { "epoch": 0.1208955223880597, "grad_norm": 0.8316791108804557, "learning_rate": 4.892767091689786e-06, "loss": 0.3685, "step": 405 }, { "epoch": 0.12119402985074627, "grad_norm": 0.8310506029399883, "learning_rate": 4.892065580728444e-06, "loss": 0.3703, "step": 406 }, { "epoch": 0.12149253731343283, "grad_norm": 0.7567215169549178, "learning_rate": 4.8913618332440906e-06, "loss": 0.3268, "step": 407 }, { "epoch": 0.1217910447761194, "grad_norm": 0.8628920449752951, "learning_rate": 4.890655849894713e-06, "loss": 0.3912, "step": 408 }, { "epoch": 0.12208955223880597, "grad_norm": 0.910090152736456, "learning_rate": 4.889947631340388e-06, "loss": 0.4308, "step": 409 }, { "epoch": 0.12238805970149254, "grad_norm": 0.8591509812041254, "learning_rate": 4.889237178243283e-06, "loss": 0.3958, "step": 410 }, { "epoch": 0.12268656716417911, "grad_norm": 0.8618520589363041, "learning_rate": 4.888524491267653e-06, "loss": 0.4158, "step": 411 }, { "epoch": 0.12298507462686567, "grad_norm": 0.8884599985876102, "learning_rate": 4.887809571079845e-06, "loss": 0.3612, "step": 412 }, { "epoch": 0.12328358208955224, "grad_norm": 0.8985780635048177, "learning_rate": 4.88709241834829e-06, "loss": 0.3824, "step": 413 }, { "epoch": 0.1235820895522388, "grad_norm": 0.8673597426156886, "learning_rate": 4.88637303374351e-06, "loss": 0.3911, "step": 414 }, { "epoch": 0.12388059701492538, "grad_norm": 0.8761841886398808, "learning_rate": 4.885651417938112e-06, "loss": 0.3596, "step": 415 }, { "epoch": 0.12417910447761193, "grad_norm": 0.8576660751720805, "learning_rate": 4.884927571606788e-06, "loss": 0.4177, "step": 416 }, { "epoch": 0.12447761194029851, "grad_norm": 0.804474354480795, "learning_rate": 4.884201495426317e-06, "loss": 0.3581, "step": 417 }, { "epoch": 0.12477611940298508, "grad_norm": 0.8519755633647428, "learning_rate": 4.883473190075562e-06, "loss": 0.4146, "step": 418 }, { "epoch": 0.12507462686567164, "grad_norm": 0.864412561136686, "learning_rate": 4.882742656235474e-06, "loss": 0.3788, "step": 419 }, { "epoch": 0.1253731343283582, "grad_norm": 0.88627834971777, "learning_rate": 4.88200989458908e-06, "loss": 0.4063, "step": 420 }, { "epoch": 0.12567164179104479, "grad_norm": 0.8104996818816044, "learning_rate": 4.881274905821496e-06, "loss": 0.4179, "step": 421 }, { "epoch": 0.12597014925373134, "grad_norm": 0.7961290288194648, "learning_rate": 4.88053769061992e-06, "loss": 0.3743, "step": 422 }, { "epoch": 0.1262686567164179, "grad_norm": 0.8727255096641454, "learning_rate": 4.879798249673628e-06, "loss": 0.4076, "step": 423 }, { "epoch": 0.1265671641791045, "grad_norm": 0.805037045243907, "learning_rate": 4.87905658367398e-06, "loss": 0.3782, "step": 424 }, { "epoch": 0.12686567164179105, "grad_norm": 0.8467074530308887, "learning_rate": 4.878312693314417e-06, "loss": 0.3981, "step": 425 }, { "epoch": 0.1271641791044776, "grad_norm": 0.8591176817546119, "learning_rate": 4.877566579290456e-06, "loss": 0.406, "step": 426 }, { "epoch": 0.12746268656716417, "grad_norm": 0.7675194550534261, "learning_rate": 4.876818242299697e-06, "loss": 0.3041, "step": 427 }, { "epoch": 0.12776119402985076, "grad_norm": 0.974908306841169, "learning_rate": 4.876067683041817e-06, "loss": 0.3846, "step": 428 }, { "epoch": 0.12805970149253731, "grad_norm": 0.9283777465319998, "learning_rate": 4.875314902218569e-06, "loss": 0.3893, "step": 429 }, { "epoch": 0.12835820895522387, "grad_norm": 0.8466250701660518, "learning_rate": 4.874559900533786e-06, "loss": 0.3715, "step": 430 }, { "epoch": 0.12865671641791046, "grad_norm": 0.9436206286256429, "learning_rate": 4.8738026786933765e-06, "loss": 0.3827, "step": 431 }, { "epoch": 0.12895522388059702, "grad_norm": 0.8642639983887797, "learning_rate": 4.8730432374053245e-06, "loss": 0.4037, "step": 432 }, { "epoch": 0.12925373134328358, "grad_norm": 0.8620504916338158, "learning_rate": 4.872281577379688e-06, "loss": 0.4308, "step": 433 }, { "epoch": 0.12955223880597014, "grad_norm": 0.9813776269945593, "learning_rate": 4.8715176993286e-06, "loss": 0.4293, "step": 434 }, { "epoch": 0.12985074626865672, "grad_norm": 0.8634148297447023, "learning_rate": 4.8707516039662705e-06, "loss": 0.3476, "step": 435 }, { "epoch": 0.13014925373134328, "grad_norm": 0.8942167636606891, "learning_rate": 4.8699832920089785e-06, "loss": 0.3772, "step": 436 }, { "epoch": 0.13044776119402984, "grad_norm": 0.8696237331815483, "learning_rate": 4.869212764175076e-06, "loss": 0.384, "step": 437 }, { "epoch": 0.13074626865671643, "grad_norm": 0.8296034400995033, "learning_rate": 4.8684400211849895e-06, "loss": 0.3862, "step": 438 }, { "epoch": 0.131044776119403, "grad_norm": 0.9402286904796409, "learning_rate": 4.867665063761212e-06, "loss": 0.3838, "step": 439 }, { "epoch": 0.13134328358208955, "grad_norm": 0.8227141749687872, "learning_rate": 4.866887892628314e-06, "loss": 0.381, "step": 440 }, { "epoch": 0.1316417910447761, "grad_norm": 0.8661103190250566, "learning_rate": 4.866108508512929e-06, "loss": 0.395, "step": 441 }, { "epoch": 0.1319402985074627, "grad_norm": 0.8814800070280554, "learning_rate": 4.865326912143762e-06, "loss": 0.4317, "step": 442 }, { "epoch": 0.13223880597014925, "grad_norm": 0.803476919813946, "learning_rate": 4.864543104251587e-06, "loss": 0.3822, "step": 443 }, { "epoch": 0.1325373134328358, "grad_norm": 0.8519115345361534, "learning_rate": 4.863757085569246e-06, "loss": 0.4007, "step": 444 }, { "epoch": 0.1328358208955224, "grad_norm": 0.8983953481126902, "learning_rate": 4.862968856831646e-06, "loss": 0.388, "step": 445 }, { "epoch": 0.13313432835820896, "grad_norm": 0.7938477876103528, "learning_rate": 4.862178418775763e-06, "loss": 0.3488, "step": 446 }, { "epoch": 0.13343283582089552, "grad_norm": 0.8324718218816302, "learning_rate": 4.861385772140636e-06, "loss": 0.3509, "step": 447 }, { "epoch": 0.13373134328358208, "grad_norm": 0.8673532215135669, "learning_rate": 4.86059091766737e-06, "loss": 0.3997, "step": 448 }, { "epoch": 0.13402985074626866, "grad_norm": 0.8720599760670272, "learning_rate": 4.859793856099138e-06, "loss": 0.4078, "step": 449 }, { "epoch": 0.13432835820895522, "grad_norm": 0.8972789828786194, "learning_rate": 4.858994588181168e-06, "loss": 0.3935, "step": 450 }, { "epoch": 0.13462686567164178, "grad_norm": 0.8581455676420515, "learning_rate": 4.85819311466076e-06, "loss": 0.4133, "step": 451 }, { "epoch": 0.13492537313432837, "grad_norm": 0.8099642650298081, "learning_rate": 4.857389436287271e-06, "loss": 0.3684, "step": 452 }, { "epoch": 0.13522388059701493, "grad_norm": 0.80580267367791, "learning_rate": 4.85658355381212e-06, "loss": 0.3736, "step": 453 }, { "epoch": 0.1355223880597015, "grad_norm": 0.9175924370164683, "learning_rate": 4.855775467988788e-06, "loss": 0.3663, "step": 454 }, { "epoch": 0.13582089552238805, "grad_norm": 0.804865363972486, "learning_rate": 4.854965179572816e-06, "loss": 0.3669, "step": 455 }, { "epoch": 0.13611940298507463, "grad_norm": 0.8793877846060576, "learning_rate": 4.854152689321803e-06, "loss": 0.3981, "step": 456 }, { "epoch": 0.1364179104477612, "grad_norm": 0.83704531818067, "learning_rate": 4.853337997995408e-06, "loss": 0.3796, "step": 457 }, { "epoch": 0.13671641791044775, "grad_norm": 0.8197161694891909, "learning_rate": 4.852521106355348e-06, "loss": 0.395, "step": 458 }, { "epoch": 0.13701492537313434, "grad_norm": 0.9341284013808222, "learning_rate": 4.851702015165396e-06, "loss": 0.374, "step": 459 }, { "epoch": 0.1373134328358209, "grad_norm": 0.9043457115872434, "learning_rate": 4.850880725191383e-06, "loss": 0.3687, "step": 460 }, { "epoch": 0.13761194029850746, "grad_norm": 0.8185493546695799, "learning_rate": 4.850057237201194e-06, "loss": 0.3623, "step": 461 }, { "epoch": 0.13791044776119402, "grad_norm": 0.8454515033830348, "learning_rate": 4.849231551964771e-06, "loss": 0.345, "step": 462 }, { "epoch": 0.1382089552238806, "grad_norm": 0.9749776836104918, "learning_rate": 4.848403670254111e-06, "loss": 0.4182, "step": 463 }, { "epoch": 0.13850746268656716, "grad_norm": 0.8514730507575938, "learning_rate": 4.84757359284326e-06, "loss": 0.3707, "step": 464 }, { "epoch": 0.13880597014925372, "grad_norm": 0.8959722306249783, "learning_rate": 4.846741320508323e-06, "loss": 0.4077, "step": 465 }, { "epoch": 0.1391044776119403, "grad_norm": 0.7560664033693268, "learning_rate": 4.8459068540274525e-06, "loss": 0.3622, "step": 466 }, { "epoch": 0.13940298507462687, "grad_norm": 0.8636823785223218, "learning_rate": 4.845070194180856e-06, "loss": 0.3569, "step": 467 }, { "epoch": 0.13970149253731343, "grad_norm": 0.8161586910729753, "learning_rate": 4.844231341750787e-06, "loss": 0.3693, "step": 468 }, { "epoch": 0.14, "grad_norm": 0.8264271190333304, "learning_rate": 4.843390297521556e-06, "loss": 0.3826, "step": 469 }, { "epoch": 0.14029850746268657, "grad_norm": 0.8329895252389455, "learning_rate": 4.842547062279517e-06, "loss": 0.3835, "step": 470 }, { "epoch": 0.14059701492537313, "grad_norm": 0.8750985694869183, "learning_rate": 4.841701636813074e-06, "loss": 0.4365, "step": 471 }, { "epoch": 0.1408955223880597, "grad_norm": 0.923999448212016, "learning_rate": 4.84085402191268e-06, "loss": 0.3355, "step": 472 }, { "epoch": 0.14119402985074628, "grad_norm": 0.8593855485500195, "learning_rate": 4.840004218370833e-06, "loss": 0.3684, "step": 473 }, { "epoch": 0.14149253731343284, "grad_norm": 0.9253486202985267, "learning_rate": 4.83915222698208e-06, "loss": 0.4351, "step": 474 }, { "epoch": 0.1417910447761194, "grad_norm": 0.7704797438692463, "learning_rate": 4.838298048543012e-06, "loss": 0.3311, "step": 475 }, { "epoch": 0.14208955223880598, "grad_norm": 0.84958326911992, "learning_rate": 4.837441683852264e-06, "loss": 0.3443, "step": 476 }, { "epoch": 0.14238805970149254, "grad_norm": 0.9214398433351614, "learning_rate": 4.8365831337105185e-06, "loss": 0.3837, "step": 477 }, { "epoch": 0.1426865671641791, "grad_norm": 0.9020552668429409, "learning_rate": 4.835722398920496e-06, "loss": 0.3726, "step": 478 }, { "epoch": 0.14298507462686566, "grad_norm": 0.834953050440995, "learning_rate": 4.834859480286963e-06, "loss": 0.3459, "step": 479 }, { "epoch": 0.14328358208955225, "grad_norm": 0.8287857134100892, "learning_rate": 4.83399437861673e-06, "loss": 0.3889, "step": 480 }, { "epoch": 0.1435820895522388, "grad_norm": 0.8245206662676321, "learning_rate": 4.833127094718643e-06, "loss": 0.3667, "step": 481 }, { "epoch": 0.14388059701492537, "grad_norm": 0.832866477275499, "learning_rate": 4.832257629403592e-06, "loss": 0.4004, "step": 482 }, { "epoch": 0.14417910447761195, "grad_norm": 0.915872105196081, "learning_rate": 4.8313859834845085e-06, "loss": 0.4013, "step": 483 }, { "epoch": 0.1444776119402985, "grad_norm": 0.9789956349355645, "learning_rate": 4.830512157776357e-06, "loss": 0.4411, "step": 484 }, { "epoch": 0.14477611940298507, "grad_norm": 0.7690696697320152, "learning_rate": 4.829636153096143e-06, "loss": 0.3071, "step": 485 }, { "epoch": 0.14507462686567163, "grad_norm": 0.9027976378763707, "learning_rate": 4.828757970262913e-06, "loss": 0.3413, "step": 486 }, { "epoch": 0.14537313432835822, "grad_norm": 0.8300835301176763, "learning_rate": 4.827877610097743e-06, "loss": 0.3535, "step": 487 }, { "epoch": 0.14567164179104478, "grad_norm": 0.8562294310178254, "learning_rate": 4.826995073423749e-06, "loss": 0.355, "step": 488 }, { "epoch": 0.14597014925373133, "grad_norm": 0.8059680443715135, "learning_rate": 4.826110361066084e-06, "loss": 0.3198, "step": 489 }, { "epoch": 0.14626865671641792, "grad_norm": 0.9642630011245373, "learning_rate": 4.825223473851929e-06, "loss": 0.4476, "step": 490 }, { "epoch": 0.14656716417910448, "grad_norm": 0.925182517782558, "learning_rate": 4.824334412610504e-06, "loss": 0.3641, "step": 491 }, { "epoch": 0.14686567164179104, "grad_norm": 0.8931425394201947, "learning_rate": 4.823443178173058e-06, "loss": 0.3963, "step": 492 }, { "epoch": 0.1471641791044776, "grad_norm": 0.7616008631295983, "learning_rate": 4.822549771372875e-06, "loss": 0.3295, "step": 493 }, { "epoch": 0.1474626865671642, "grad_norm": 0.9164279418898156, "learning_rate": 4.821654193045268e-06, "loss": 0.3954, "step": 494 }, { "epoch": 0.14776119402985075, "grad_norm": 0.8242276478542254, "learning_rate": 4.8207564440275816e-06, "loss": 0.358, "step": 495 }, { "epoch": 0.1480597014925373, "grad_norm": 0.8015482008189594, "learning_rate": 4.819856525159187e-06, "loss": 0.3724, "step": 496 }, { "epoch": 0.1483582089552239, "grad_norm": 0.8798301252694813, "learning_rate": 4.818954437281489e-06, "loss": 0.3851, "step": 497 }, { "epoch": 0.14865671641791045, "grad_norm": 0.8789684495124492, "learning_rate": 4.818050181237916e-06, "loss": 0.3659, "step": 498 }, { "epoch": 0.148955223880597, "grad_norm": 0.7945015241329157, "learning_rate": 4.817143757873927e-06, "loss": 0.3572, "step": 499 }, { "epoch": 0.14925373134328357, "grad_norm": 0.8407928444830756, "learning_rate": 4.8162351680370046e-06, "loss": 0.408, "step": 500 }, { "epoch": 0.14955223880597016, "grad_norm": 0.8377445302981639, "learning_rate": 4.815324412576659e-06, "loss": 0.3594, "step": 501 }, { "epoch": 0.14985074626865671, "grad_norm": 0.9360976258979777, "learning_rate": 4.814411492344423e-06, "loss": 0.384, "step": 502 }, { "epoch": 0.15014925373134327, "grad_norm": 0.9337795927031363, "learning_rate": 4.813496408193855e-06, "loss": 0.407, "step": 503 }, { "epoch": 0.15044776119402986, "grad_norm": 0.9407994815333237, "learning_rate": 4.812579160980538e-06, "loss": 0.3348, "step": 504 }, { "epoch": 0.15074626865671642, "grad_norm": 0.9213462894334784, "learning_rate": 4.8116597515620735e-06, "loss": 0.3909, "step": 505 }, { "epoch": 0.15104477611940298, "grad_norm": 0.8106559781114875, "learning_rate": 4.810738180798089e-06, "loss": 0.3667, "step": 506 }, { "epoch": 0.15134328358208957, "grad_norm": 0.8651383542529051, "learning_rate": 4.8098144495502295e-06, "loss": 0.3689, "step": 507 }, { "epoch": 0.15164179104477613, "grad_norm": 1.0153425024352598, "learning_rate": 4.808888558682161e-06, "loss": 0.3986, "step": 508 }, { "epoch": 0.15194029850746268, "grad_norm": 0.8027998509692323, "learning_rate": 4.80796050905957e-06, "loss": 0.3671, "step": 509 }, { "epoch": 0.15223880597014924, "grad_norm": 0.9698401353337921, "learning_rate": 4.807030301550159e-06, "loss": 0.3358, "step": 510 }, { "epoch": 0.15253731343283583, "grad_norm": 0.8470582921938352, "learning_rate": 4.806097937023652e-06, "loss": 0.3692, "step": 511 }, { "epoch": 0.1528358208955224, "grad_norm": 0.9235947242592816, "learning_rate": 4.8051634163517825e-06, "loss": 0.3458, "step": 512 }, { "epoch": 0.15313432835820895, "grad_norm": 0.894327518789336, "learning_rate": 4.8042267404083085e-06, "loss": 0.4142, "step": 513 }, { "epoch": 0.15343283582089554, "grad_norm": 0.9285145110926873, "learning_rate": 4.803287910068997e-06, "loss": 0.3749, "step": 514 }, { "epoch": 0.1537313432835821, "grad_norm": 0.8506702933212649, "learning_rate": 4.802346926211634e-06, "loss": 0.3469, "step": 515 }, { "epoch": 0.15402985074626865, "grad_norm": 0.8264106915519583, "learning_rate": 4.8014037897160134e-06, "loss": 0.4164, "step": 516 }, { "epoch": 0.1543283582089552, "grad_norm": 0.855191956211793, "learning_rate": 4.800458501463946e-06, "loss": 0.3684, "step": 517 }, { "epoch": 0.1546268656716418, "grad_norm": 0.8553683728972094, "learning_rate": 4.7995110623392545e-06, "loss": 0.3469, "step": 518 }, { "epoch": 0.15492537313432836, "grad_norm": 0.8190621146939986, "learning_rate": 4.798561473227769e-06, "loss": 0.3669, "step": 519 }, { "epoch": 0.15522388059701492, "grad_norm": 0.8060862576423745, "learning_rate": 4.797609735017335e-06, "loss": 0.3829, "step": 520 }, { "epoch": 0.1555223880597015, "grad_norm": 0.8263221507606819, "learning_rate": 4.796655848597803e-06, "loss": 0.3787, "step": 521 }, { "epoch": 0.15582089552238806, "grad_norm": 0.9818542527439001, "learning_rate": 4.795699814861033e-06, "loss": 0.3943, "step": 522 }, { "epoch": 0.15611940298507462, "grad_norm": 0.9363390488612918, "learning_rate": 4.7947416347008936e-06, "loss": 0.3488, "step": 523 }, { "epoch": 0.15641791044776118, "grad_norm": 0.8719501569051828, "learning_rate": 4.793781309013261e-06, "loss": 0.3037, "step": 524 }, { "epoch": 0.15671641791044777, "grad_norm": 0.8188931174909418, "learning_rate": 4.7928188386960155e-06, "loss": 0.3433, "step": 525 }, { "epoch": 0.15701492537313433, "grad_norm": 0.8576926049561543, "learning_rate": 4.791854224649042e-06, "loss": 0.4148, "step": 526 }, { "epoch": 0.1573134328358209, "grad_norm": 0.9514097030300651, "learning_rate": 4.7908874677742335e-06, "loss": 0.4065, "step": 527 }, { "epoch": 0.15761194029850747, "grad_norm": 0.9034660588508406, "learning_rate": 4.789918568975483e-06, "loss": 0.3681, "step": 528 }, { "epoch": 0.15791044776119403, "grad_norm": 0.8634816650220751, "learning_rate": 4.788947529158687e-06, "loss": 0.3697, "step": 529 }, { "epoch": 0.1582089552238806, "grad_norm": 0.9824567340256468, "learning_rate": 4.787974349231745e-06, "loss": 0.3842, "step": 530 }, { "epoch": 0.15850746268656715, "grad_norm": 0.7947403079672669, "learning_rate": 4.786999030104555e-06, "loss": 0.3299, "step": 531 }, { "epoch": 0.15880597014925374, "grad_norm": 0.8529781467405901, "learning_rate": 4.786021572689019e-06, "loss": 0.3872, "step": 532 }, { "epoch": 0.1591044776119403, "grad_norm": 0.9209188488882705, "learning_rate": 4.785041977899033e-06, "loss": 0.3936, "step": 533 }, { "epoch": 0.15940298507462686, "grad_norm": 0.885355457355636, "learning_rate": 4.784060246650496e-06, "loss": 0.3924, "step": 534 }, { "epoch": 0.15970149253731344, "grad_norm": 0.9523084549565237, "learning_rate": 4.783076379861304e-06, "loss": 0.4318, "step": 535 }, { "epoch": 0.16, "grad_norm": 0.8863570574039847, "learning_rate": 4.782090378451345e-06, "loss": 0.4157, "step": 536 }, { "epoch": 0.16029850746268656, "grad_norm": 0.8117266215970601, "learning_rate": 4.781102243342508e-06, "loss": 0.3463, "step": 537 }, { "epoch": 0.16059701492537312, "grad_norm": 0.8515951237304888, "learning_rate": 4.780111975458677e-06, "loss": 0.4112, "step": 538 }, { "epoch": 0.1608955223880597, "grad_norm": 0.9510326848546506, "learning_rate": 4.779119575725726e-06, "loss": 0.4046, "step": 539 }, { "epoch": 0.16119402985074627, "grad_norm": 0.8449453300506576, "learning_rate": 4.7781250450715245e-06, "loss": 0.356, "step": 540 }, { "epoch": 0.16149253731343283, "grad_norm": 0.931947295073069, "learning_rate": 4.7771283844259365e-06, "loss": 0.3974, "step": 541 }, { "epoch": 0.1617910447761194, "grad_norm": 0.874181026365032, "learning_rate": 4.776129594720813e-06, "loss": 0.3629, "step": 542 }, { "epoch": 0.16208955223880597, "grad_norm": 0.855050176983507, "learning_rate": 4.77512867689e-06, "loss": 0.3853, "step": 543 }, { "epoch": 0.16238805970149253, "grad_norm": 0.8923651345310537, "learning_rate": 4.77412563186933e-06, "loss": 0.4007, "step": 544 }, { "epoch": 0.1626865671641791, "grad_norm": 0.9078676220956058, "learning_rate": 4.7731204605966265e-06, "loss": 0.3881, "step": 545 }, { "epoch": 0.16298507462686568, "grad_norm": 0.7538192348742706, "learning_rate": 4.7721131640116996e-06, "loss": 0.3036, "step": 546 }, { "epoch": 0.16328358208955224, "grad_norm": 0.8608203405755754, "learning_rate": 4.771103743056348e-06, "loss": 0.3457, "step": 547 }, { "epoch": 0.1635820895522388, "grad_norm": 0.8212962040784945, "learning_rate": 4.770092198674353e-06, "loss": 0.3273, "step": 548 }, { "epoch": 0.16388059701492538, "grad_norm": 0.9028550682650915, "learning_rate": 4.769078531811487e-06, "loss": 0.4282, "step": 549 }, { "epoch": 0.16417910447761194, "grad_norm": 0.8884438521513933, "learning_rate": 4.768062743415502e-06, "loss": 0.3837, "step": 550 }, { "epoch": 0.1644776119402985, "grad_norm": 0.876505852629387, "learning_rate": 4.767044834436137e-06, "loss": 0.3951, "step": 551 }, { "epoch": 0.1647761194029851, "grad_norm": 0.7663670395688694, "learning_rate": 4.76602480582511e-06, "loss": 0.3629, "step": 552 }, { "epoch": 0.16507462686567165, "grad_norm": 0.9554436497467657, "learning_rate": 4.7650026585361255e-06, "loss": 0.3752, "step": 553 }, { "epoch": 0.1653731343283582, "grad_norm": 0.8328045712770886, "learning_rate": 4.7639783935248635e-06, "loss": 0.363, "step": 554 }, { "epoch": 0.16567164179104477, "grad_norm": 1.3533426086417046, "learning_rate": 4.762952011748988e-06, "loss": 0.293, "step": 555 }, { "epoch": 0.16597014925373135, "grad_norm": 0.8397180497457704, "learning_rate": 4.761923514168142e-06, "loss": 0.3803, "step": 556 }, { "epoch": 0.1662686567164179, "grad_norm": 0.899522921671484, "learning_rate": 4.760892901743944e-06, "loss": 0.4168, "step": 557 }, { "epoch": 0.16656716417910447, "grad_norm": 0.8900270204673798, "learning_rate": 4.759860175439993e-06, "loss": 0.4041, "step": 558 }, { "epoch": 0.16686567164179106, "grad_norm": 0.9386190331772859, "learning_rate": 4.758825336221861e-06, "loss": 0.3569, "step": 559 }, { "epoch": 0.16716417910447762, "grad_norm": 1.4064679834783074, "learning_rate": 4.7577883850570995e-06, "loss": 0.4046, "step": 560 }, { "epoch": 0.16746268656716418, "grad_norm": 0.8752446720830109, "learning_rate": 4.7567493229152315e-06, "loss": 0.369, "step": 561 }, { "epoch": 0.16776119402985074, "grad_norm": 0.8921917475055471, "learning_rate": 4.755708150767754e-06, "loss": 0.3629, "step": 562 }, { "epoch": 0.16805970149253732, "grad_norm": 0.9071169605857111, "learning_rate": 4.754664869588139e-06, "loss": 0.3196, "step": 563 }, { "epoch": 0.16835820895522388, "grad_norm": 0.7429818510738487, "learning_rate": 4.75361948035183e-06, "loss": 0.322, "step": 564 }, { "epoch": 0.16865671641791044, "grad_norm": 0.8100900713596559, "learning_rate": 4.752571984036237e-06, "loss": 0.3758, "step": 565 }, { "epoch": 0.16895522388059703, "grad_norm": 0.8598955983620397, "learning_rate": 4.7515223816207455e-06, "loss": 0.3442, "step": 566 }, { "epoch": 0.1692537313432836, "grad_norm": 0.8783738770144779, "learning_rate": 4.750470674086709e-06, "loss": 0.322, "step": 567 }, { "epoch": 0.16955223880597015, "grad_norm": 0.9030983989078412, "learning_rate": 4.749416862417448e-06, "loss": 0.4017, "step": 568 }, { "epoch": 0.1698507462686567, "grad_norm": 0.8713947394750962, "learning_rate": 4.748360947598248e-06, "loss": 0.3646, "step": 569 }, { "epoch": 0.1701492537313433, "grad_norm": 0.8986926196540066, "learning_rate": 4.747302930616368e-06, "loss": 0.4171, "step": 570 }, { "epoch": 0.17044776119402985, "grad_norm": 0.8073489618852552, "learning_rate": 4.746242812461025e-06, "loss": 0.3999, "step": 571 }, { "epoch": 0.1707462686567164, "grad_norm": 0.9163536146278429, "learning_rate": 4.7451805941234055e-06, "loss": 0.4386, "step": 572 }, { "epoch": 0.171044776119403, "grad_norm": 0.828059671354957, "learning_rate": 4.744116276596656e-06, "loss": 0.3435, "step": 573 }, { "epoch": 0.17134328358208956, "grad_norm": 0.8696889215011467, "learning_rate": 4.743049860875889e-06, "loss": 0.3319, "step": 574 }, { "epoch": 0.17164179104477612, "grad_norm": 0.8480598380604834, "learning_rate": 4.741981347958175e-06, "loss": 0.3703, "step": 575 }, { "epoch": 0.17194029850746267, "grad_norm": 0.7909046861375709, "learning_rate": 4.740910738842551e-06, "loss": 0.3589, "step": 576 }, { "epoch": 0.17223880597014926, "grad_norm": 0.8917454585581824, "learning_rate": 4.739838034530008e-06, "loss": 0.3876, "step": 577 }, { "epoch": 0.17253731343283582, "grad_norm": 0.8783412360640082, "learning_rate": 4.738763236023498e-06, "loss": 0.3892, "step": 578 }, { "epoch": 0.17283582089552238, "grad_norm": 0.9299982449111176, "learning_rate": 4.737686344327932e-06, "loss": 0.3642, "step": 579 }, { "epoch": 0.17313432835820897, "grad_norm": 0.8702752107459392, "learning_rate": 4.736607360450179e-06, "loss": 0.357, "step": 580 }, { "epoch": 0.17343283582089553, "grad_norm": 0.8641201687866182, "learning_rate": 4.73552628539906e-06, "loss": 0.3881, "step": 581 }, { "epoch": 0.17373134328358208, "grad_norm": 0.7875411772022647, "learning_rate": 4.734443120185357e-06, "loss": 0.3886, "step": 582 }, { "epoch": 0.17402985074626864, "grad_norm": 0.7639534640961319, "learning_rate": 4.733357865821799e-06, "loss": 0.3635, "step": 583 }, { "epoch": 0.17432835820895523, "grad_norm": 0.8465197416628776, "learning_rate": 4.7322705233230765e-06, "loss": 0.3753, "step": 584 }, { "epoch": 0.1746268656716418, "grad_norm": 0.8612932322468133, "learning_rate": 4.731181093705825e-06, "loss": 0.3584, "step": 585 }, { "epoch": 0.17492537313432835, "grad_norm": 0.811960790500117, "learning_rate": 4.730089577988637e-06, "loss": 0.3794, "step": 586 }, { "epoch": 0.17522388059701494, "grad_norm": 0.7739231568933665, "learning_rate": 4.728995977192052e-06, "loss": 0.3499, "step": 587 }, { "epoch": 0.1755223880597015, "grad_norm": 0.8400654346049872, "learning_rate": 4.72790029233856e-06, "loss": 0.3675, "step": 588 }, { "epoch": 0.17582089552238805, "grad_norm": 0.8830538820611432, "learning_rate": 4.7268025244526e-06, "loss": 0.4198, "step": 589 }, { "epoch": 0.1761194029850746, "grad_norm": 0.8440850359948983, "learning_rate": 4.725702674560558e-06, "loss": 0.3436, "step": 590 }, { "epoch": 0.1764179104477612, "grad_norm": 0.923969252299939, "learning_rate": 4.724600743690766e-06, "loss": 0.3726, "step": 591 }, { "epoch": 0.17671641791044776, "grad_norm": 0.9285113562633005, "learning_rate": 4.723496732873504e-06, "loss": 0.3903, "step": 592 }, { "epoch": 0.17701492537313432, "grad_norm": 0.7967222216178841, "learning_rate": 4.722390643140995e-06, "loss": 0.3088, "step": 593 }, { "epoch": 0.1773134328358209, "grad_norm": 0.8941789785640152, "learning_rate": 4.721282475527405e-06, "loss": 0.3684, "step": 594 }, { "epoch": 0.17761194029850746, "grad_norm": 0.8812907651438229, "learning_rate": 4.720172231068845e-06, "loss": 0.3697, "step": 595 }, { "epoch": 0.17791044776119402, "grad_norm": 0.8984723387629163, "learning_rate": 4.719059910803364e-06, "loss": 0.3657, "step": 596 }, { "epoch": 0.1782089552238806, "grad_norm": 0.9108229355000194, "learning_rate": 4.717945515770958e-06, "loss": 0.3609, "step": 597 }, { "epoch": 0.17850746268656717, "grad_norm": 0.885813043986939, "learning_rate": 4.716829047013555e-06, "loss": 0.4141, "step": 598 }, { "epoch": 0.17880597014925373, "grad_norm": 0.8127480639060701, "learning_rate": 4.715710505575031e-06, "loss": 0.3529, "step": 599 }, { "epoch": 0.1791044776119403, "grad_norm": 0.8032237883986898, "learning_rate": 4.71458989250119e-06, "loss": 0.3496, "step": 600 }, { "epoch": 0.17940298507462688, "grad_norm": 0.819201792889001, "learning_rate": 4.713467208839782e-06, "loss": 0.3674, "step": 601 }, { "epoch": 0.17970149253731343, "grad_norm": 1.1406695776718605, "learning_rate": 4.712342455640486e-06, "loss": 0.3756, "step": 602 }, { "epoch": 0.18, "grad_norm": 0.8607621290309236, "learning_rate": 4.7112156339549205e-06, "loss": 0.3623, "step": 603 }, { "epoch": 0.18029850746268658, "grad_norm": 0.8516209936354656, "learning_rate": 4.710086744836635e-06, "loss": 0.3934, "step": 604 }, { "epoch": 0.18059701492537314, "grad_norm": 1.0789657160791544, "learning_rate": 4.708955789341115e-06, "loss": 0.3333, "step": 605 }, { "epoch": 0.1808955223880597, "grad_norm": 0.8465101170476235, "learning_rate": 4.707822768525775e-06, "loss": 0.3456, "step": 606 }, { "epoch": 0.18119402985074626, "grad_norm": 0.8021068603431047, "learning_rate": 4.706687683449961e-06, "loss": 0.3743, "step": 607 }, { "epoch": 0.18149253731343284, "grad_norm": 0.9396472673097465, "learning_rate": 4.705550535174952e-06, "loss": 0.3594, "step": 608 }, { "epoch": 0.1817910447761194, "grad_norm": 0.9308558717626013, "learning_rate": 4.704411324763954e-06, "loss": 0.3597, "step": 609 }, { "epoch": 0.18208955223880596, "grad_norm": 0.8780878715069544, "learning_rate": 4.7032700532820984e-06, "loss": 0.3794, "step": 610 }, { "epoch": 0.18238805970149255, "grad_norm": 0.8438709125133251, "learning_rate": 4.702126721796448e-06, "loss": 0.3534, "step": 611 }, { "epoch": 0.1826865671641791, "grad_norm": 0.7918705122194744, "learning_rate": 4.700981331375991e-06, "loss": 0.3512, "step": 612 }, { "epoch": 0.18298507462686567, "grad_norm": 0.8832849637084809, "learning_rate": 4.699833883091637e-06, "loss": 0.3664, "step": 613 }, { "epoch": 0.18328358208955223, "grad_norm": 0.7766965849001322, "learning_rate": 4.698684378016223e-06, "loss": 0.3536, "step": 614 }, { "epoch": 0.18358208955223881, "grad_norm": 0.8793943105130626, "learning_rate": 4.69753281722451e-06, "loss": 0.3859, "step": 615 }, { "epoch": 0.18388059701492537, "grad_norm": 0.9314513871786796, "learning_rate": 4.696379201793176e-06, "loss": 0.367, "step": 616 }, { "epoch": 0.18417910447761193, "grad_norm": 0.8801529809718072, "learning_rate": 4.695223532800825e-06, "loss": 0.4067, "step": 617 }, { "epoch": 0.18447761194029852, "grad_norm": 0.7999007221875536, "learning_rate": 4.694065811327982e-06, "loss": 0.3428, "step": 618 }, { "epoch": 0.18477611940298508, "grad_norm": 0.8859494993053622, "learning_rate": 4.692906038457084e-06, "loss": 0.3461, "step": 619 }, { "epoch": 0.18507462686567164, "grad_norm": 0.7920787783745799, "learning_rate": 4.6917442152724925e-06, "loss": 0.3338, "step": 620 }, { "epoch": 0.1853731343283582, "grad_norm": 0.8343099718824265, "learning_rate": 4.6905803428604835e-06, "loss": 0.386, "step": 621 }, { "epoch": 0.18567164179104478, "grad_norm": 0.8227479914592881, "learning_rate": 4.6894144223092496e-06, "loss": 0.3514, "step": 622 }, { "epoch": 0.18597014925373134, "grad_norm": 0.9320390654474898, "learning_rate": 4.6882464547088976e-06, "loss": 0.3922, "step": 623 }, { "epoch": 0.1862686567164179, "grad_norm": 0.7694880051993461, "learning_rate": 4.6870764411514495e-06, "loss": 0.3571, "step": 624 }, { "epoch": 0.1865671641791045, "grad_norm": 0.8228896792999334, "learning_rate": 4.685904382730839e-06, "loss": 0.3658, "step": 625 }, { "epoch": 0.18686567164179105, "grad_norm": 0.8303039994029542, "learning_rate": 4.684730280542912e-06, "loss": 0.3735, "step": 626 }, { "epoch": 0.1871641791044776, "grad_norm": 0.8427730863324351, "learning_rate": 4.6835541356854255e-06, "loss": 0.3918, "step": 627 }, { "epoch": 0.18746268656716417, "grad_norm": 0.8394164814398534, "learning_rate": 4.682375949258045e-06, "loss": 0.3075, "step": 628 }, { "epoch": 0.18776119402985075, "grad_norm": 0.8214529875587838, "learning_rate": 4.681195722362349e-06, "loss": 0.3551, "step": 629 }, { "epoch": 0.1880597014925373, "grad_norm": 0.810136115359384, "learning_rate": 4.68001345610182e-06, "loss": 0.3641, "step": 630 }, { "epoch": 0.18835820895522387, "grad_norm": 0.8803568899727253, "learning_rate": 4.678829151581846e-06, "loss": 0.3817, "step": 631 }, { "epoch": 0.18865671641791046, "grad_norm": 0.7930342754419026, "learning_rate": 4.677642809909725e-06, "loss": 0.4024, "step": 632 }, { "epoch": 0.18895522388059702, "grad_norm": 0.852158008550677, "learning_rate": 4.6764544321946565e-06, "loss": 0.4057, "step": 633 }, { "epoch": 0.18925373134328358, "grad_norm": 0.7444755651407072, "learning_rate": 4.675264019547745e-06, "loss": 0.3094, "step": 634 }, { "epoch": 0.18955223880597014, "grad_norm": 0.7810087717032494, "learning_rate": 4.674071573081998e-06, "loss": 0.3552, "step": 635 }, { "epoch": 0.18985074626865672, "grad_norm": 0.8457218590063859, "learning_rate": 4.672877093912323e-06, "loss": 0.3554, "step": 636 }, { "epoch": 0.19014925373134328, "grad_norm": 0.8702779931971437, "learning_rate": 4.671680583155528e-06, "loss": 0.4169, "step": 637 }, { "epoch": 0.19044776119402984, "grad_norm": 0.9714348750550799, "learning_rate": 4.670482041930324e-06, "loss": 0.348, "step": 638 }, { "epoch": 0.19074626865671643, "grad_norm": 0.8315101609756071, "learning_rate": 4.6692814713573155e-06, "loss": 0.3387, "step": 639 }, { "epoch": 0.191044776119403, "grad_norm": 0.8737001772219148, "learning_rate": 4.6680788725590086e-06, "loss": 0.3723, "step": 640 }, { "epoch": 0.19134328358208955, "grad_norm": 0.928685080600342, "learning_rate": 4.6668742466598015e-06, "loss": 0.3903, "step": 641 }, { "epoch": 0.19164179104477613, "grad_norm": 0.800239252131357, "learning_rate": 4.665667594785992e-06, "loss": 0.3408, "step": 642 }, { "epoch": 0.1919402985074627, "grad_norm": 0.8264024494717072, "learning_rate": 4.66445891806577e-06, "loss": 0.3119, "step": 643 }, { "epoch": 0.19223880597014925, "grad_norm": 0.782897351165225, "learning_rate": 4.663248217629218e-06, "loss": 0.3748, "step": 644 }, { "epoch": 0.1925373134328358, "grad_norm": 0.7565710294395597, "learning_rate": 4.662035494608313e-06, "loss": 0.3267, "step": 645 }, { "epoch": 0.1928358208955224, "grad_norm": 0.8522767167672216, "learning_rate": 4.660820750136918e-06, "loss": 0.3771, "step": 646 }, { "epoch": 0.19313432835820896, "grad_norm": 0.8777158029491957, "learning_rate": 4.6596039853507925e-06, "loss": 0.3605, "step": 647 }, { "epoch": 0.19343283582089552, "grad_norm": 0.8449256976352308, "learning_rate": 4.658385201387582e-06, "loss": 0.3454, "step": 648 }, { "epoch": 0.1937313432835821, "grad_norm": 0.9008268972405354, "learning_rate": 4.657164399386818e-06, "loss": 0.4138, "step": 649 }, { "epoch": 0.19402985074626866, "grad_norm": 0.8903381842895088, "learning_rate": 4.655941580489922e-06, "loss": 0.3699, "step": 650 }, { "epoch": 0.19432835820895522, "grad_norm": 0.8382545542778225, "learning_rate": 4.6547167458402e-06, "loss": 0.3738, "step": 651 }, { "epoch": 0.19462686567164178, "grad_norm": 0.9109044659426834, "learning_rate": 4.653489896582841e-06, "loss": 0.4004, "step": 652 }, { "epoch": 0.19492537313432837, "grad_norm": 0.9177470131519694, "learning_rate": 4.65226103386492e-06, "loss": 0.3654, "step": 653 }, { "epoch": 0.19522388059701493, "grad_norm": 0.8276383969775275, "learning_rate": 4.651030158835393e-06, "loss": 0.3513, "step": 654 }, { "epoch": 0.19552238805970149, "grad_norm": 0.8952139983790235, "learning_rate": 4.6497972726451005e-06, "loss": 0.3786, "step": 655 }, { "epoch": 0.19582089552238807, "grad_norm": 0.7985778698618001, "learning_rate": 4.648562376446759e-06, "loss": 0.3638, "step": 656 }, { "epoch": 0.19611940298507463, "grad_norm": 0.908801175999597, "learning_rate": 4.6473254713949665e-06, "loss": 0.397, "step": 657 }, { "epoch": 0.1964179104477612, "grad_norm": 0.894398615813818, "learning_rate": 4.6460865586462e-06, "loss": 0.4207, "step": 658 }, { "epoch": 0.19671641791044775, "grad_norm": 0.8685502281430703, "learning_rate": 4.644845639358812e-06, "loss": 0.3762, "step": 659 }, { "epoch": 0.19701492537313434, "grad_norm": 0.8915809779098176, "learning_rate": 4.6436027146930316e-06, "loss": 0.3427, "step": 660 }, { "epoch": 0.1973134328358209, "grad_norm": 0.8679116859759302, "learning_rate": 4.642357785810964e-06, "loss": 0.3944, "step": 661 }, { "epoch": 0.19761194029850745, "grad_norm": 0.7880457160052126, "learning_rate": 4.641110853876586e-06, "loss": 0.3987, "step": 662 }, { "epoch": 0.19791044776119404, "grad_norm": 0.864228831055506, "learning_rate": 4.6398619200557485e-06, "loss": 0.3996, "step": 663 }, { "epoch": 0.1982089552238806, "grad_norm": 0.8022579546091344, "learning_rate": 4.638610985516176e-06, "loss": 0.3521, "step": 664 }, { "epoch": 0.19850746268656716, "grad_norm": 0.7106263773090994, "learning_rate": 4.6373580514274605e-06, "loss": 0.3367, "step": 665 }, { "epoch": 0.19880597014925372, "grad_norm": 0.8985506899451846, "learning_rate": 4.636103118961065e-06, "loss": 0.4056, "step": 666 }, { "epoch": 0.1991044776119403, "grad_norm": 0.870760200813925, "learning_rate": 4.634846189290321e-06, "loss": 0.3646, "step": 667 }, { "epoch": 0.19940298507462687, "grad_norm": 0.8551014965491996, "learning_rate": 4.633587263590427e-06, "loss": 0.4102, "step": 668 }, { "epoch": 0.19970149253731342, "grad_norm": 0.8857508158230196, "learning_rate": 4.632326343038448e-06, "loss": 0.4184, "step": 669 }, { "epoch": 0.2, "grad_norm": 0.7779965326593492, "learning_rate": 4.631063428813314e-06, "loss": 0.3521, "step": 670 }, { "epoch": 0.20029850746268657, "grad_norm": 0.8349623707590051, "learning_rate": 4.629798522095818e-06, "loss": 0.381, "step": 671 }, { "epoch": 0.20059701492537313, "grad_norm": 1.0856988327864074, "learning_rate": 4.628531624068618e-06, "loss": 0.3597, "step": 672 }, { "epoch": 0.2008955223880597, "grad_norm": 0.855642649913472, "learning_rate": 4.627262735916233e-06, "loss": 0.3506, "step": 673 }, { "epoch": 0.20119402985074628, "grad_norm": 0.7945501228954227, "learning_rate": 4.625991858825042e-06, "loss": 0.3186, "step": 674 }, { "epoch": 0.20149253731343283, "grad_norm": 0.9656198406567287, "learning_rate": 4.624718993983284e-06, "loss": 0.349, "step": 675 }, { "epoch": 0.2017910447761194, "grad_norm": 0.9620523693551688, "learning_rate": 4.623444142581056e-06, "loss": 0.3566, "step": 676 }, { "epoch": 0.20208955223880598, "grad_norm": 0.7443588122091737, "learning_rate": 4.622167305810315e-06, "loss": 0.3195, "step": 677 }, { "epoch": 0.20238805970149254, "grad_norm": 0.9328190975144249, "learning_rate": 4.620888484864869e-06, "loss": 0.3975, "step": 678 }, { "epoch": 0.2026865671641791, "grad_norm": 0.8006835988448852, "learning_rate": 4.6196076809403875e-06, "loss": 0.3228, "step": 679 }, { "epoch": 0.20298507462686566, "grad_norm": 0.9990492755928724, "learning_rate": 4.618324895234391e-06, "loss": 0.3728, "step": 680 }, { "epoch": 0.20328358208955224, "grad_norm": 0.817711684027937, "learning_rate": 4.61704012894625e-06, "loss": 0.3876, "step": 681 }, { "epoch": 0.2035820895522388, "grad_norm": 0.7456982963035693, "learning_rate": 4.615753383277192e-06, "loss": 0.334, "step": 682 }, { "epoch": 0.20388059701492536, "grad_norm": 0.87509200555024, "learning_rate": 4.614464659430292e-06, "loss": 0.3982, "step": 683 }, { "epoch": 0.20417910447761195, "grad_norm": 0.7734942878648073, "learning_rate": 4.613173958610476e-06, "loss": 0.3653, "step": 684 }, { "epoch": 0.2044776119402985, "grad_norm": 1.006253095908581, "learning_rate": 4.611881282024518e-06, "loss": 0.4069, "step": 685 }, { "epoch": 0.20477611940298507, "grad_norm": 0.9446767733133795, "learning_rate": 4.6105866308810375e-06, "loss": 0.4212, "step": 686 }, { "epoch": 0.20507462686567166, "grad_norm": 0.9531967381845363, "learning_rate": 4.609290006390503e-06, "loss": 0.3525, "step": 687 }, { "epoch": 0.20537313432835821, "grad_norm": 0.7954289882558423, "learning_rate": 4.607991409765226e-06, "loss": 0.3446, "step": 688 }, { "epoch": 0.20567164179104477, "grad_norm": 0.7229225187581373, "learning_rate": 4.606690842219364e-06, "loss": 0.3562, "step": 689 }, { "epoch": 0.20597014925373133, "grad_norm": 0.7579897211017783, "learning_rate": 4.605388304968915e-06, "loss": 0.3447, "step": 690 }, { "epoch": 0.20626865671641792, "grad_norm": 0.9384371378838448, "learning_rate": 4.604083799231719e-06, "loss": 0.365, "step": 691 }, { "epoch": 0.20656716417910448, "grad_norm": 1.1276096522058603, "learning_rate": 4.602777326227459e-06, "loss": 0.3468, "step": 692 }, { "epoch": 0.20686567164179104, "grad_norm": 0.8690437426856318, "learning_rate": 4.6014688871776535e-06, "loss": 0.4067, "step": 693 }, { "epoch": 0.20716417910447762, "grad_norm": 0.8340116401411932, "learning_rate": 4.600158483305662e-06, "loss": 0.3303, "step": 694 }, { "epoch": 0.20746268656716418, "grad_norm": 0.9659161023612816, "learning_rate": 4.59884611583668e-06, "loss": 0.3845, "step": 695 }, { "epoch": 0.20776119402985074, "grad_norm": 0.8013822632682776, "learning_rate": 4.59753178599774e-06, "loss": 0.343, "step": 696 }, { "epoch": 0.2080597014925373, "grad_norm": 0.7638697285164096, "learning_rate": 4.5962154950177065e-06, "loss": 0.3371, "step": 697 }, { "epoch": 0.2083582089552239, "grad_norm": 0.8584456429925702, "learning_rate": 4.594897244127281e-06, "loss": 0.387, "step": 698 }, { "epoch": 0.20865671641791045, "grad_norm": 0.7652924682991881, "learning_rate": 4.593577034558995e-06, "loss": 0.3551, "step": 699 }, { "epoch": 0.208955223880597, "grad_norm": 0.8065003946035596, "learning_rate": 4.592254867547214e-06, "loss": 0.3438, "step": 700 }, { "epoch": 0.2092537313432836, "grad_norm": 0.9004428506328457, "learning_rate": 4.590930744328128e-06, "loss": 0.3713, "step": 701 }, { "epoch": 0.20955223880597015, "grad_norm": 0.8938815871012901, "learning_rate": 4.5896046661397645e-06, "loss": 0.3692, "step": 702 }, { "epoch": 0.2098507462686567, "grad_norm": 0.905050953213985, "learning_rate": 4.588276634221972e-06, "loss": 0.3596, "step": 703 }, { "epoch": 0.21014925373134327, "grad_norm": 0.7970736616034015, "learning_rate": 4.586946649816428e-06, "loss": 0.3495, "step": 704 }, { "epoch": 0.21044776119402986, "grad_norm": 0.8579016457008204, "learning_rate": 4.585614714166636e-06, "loss": 0.3707, "step": 705 }, { "epoch": 0.21074626865671642, "grad_norm": 0.9030100226974851, "learning_rate": 4.584280828517924e-06, "loss": 0.3828, "step": 706 }, { "epoch": 0.21104477611940298, "grad_norm": 0.9247619159595248, "learning_rate": 4.582944994117441e-06, "loss": 0.3792, "step": 707 }, { "epoch": 0.21134328358208956, "grad_norm": 0.9295226931550548, "learning_rate": 4.58160721221416e-06, "loss": 0.3958, "step": 708 }, { "epoch": 0.21164179104477612, "grad_norm": 0.894595136771482, "learning_rate": 4.580267484058876e-06, "loss": 0.3559, "step": 709 }, { "epoch": 0.21194029850746268, "grad_norm": 0.8774937178986986, "learning_rate": 4.5789258109042e-06, "loss": 0.4055, "step": 710 }, { "epoch": 0.21223880597014924, "grad_norm": 0.8187268569344242, "learning_rate": 4.577582194004565e-06, "loss": 0.3495, "step": 711 }, { "epoch": 0.21253731343283583, "grad_norm": 0.8813687056400052, "learning_rate": 4.576236634616219e-06, "loss": 0.3874, "step": 712 }, { "epoch": 0.2128358208955224, "grad_norm": 0.7817355048189055, "learning_rate": 4.574889133997229e-06, "loss": 0.348, "step": 713 }, { "epoch": 0.21313432835820895, "grad_norm": 0.7918141903283299, "learning_rate": 4.573539693407474e-06, "loss": 0.3711, "step": 714 }, { "epoch": 0.21343283582089553, "grad_norm": 0.7682344039631848, "learning_rate": 4.572188314108648e-06, "loss": 0.3661, "step": 715 }, { "epoch": 0.2137313432835821, "grad_norm": 0.7620495460154416, "learning_rate": 4.570834997364258e-06, "loss": 0.3651, "step": 716 }, { "epoch": 0.21402985074626865, "grad_norm": 0.9253366569905003, "learning_rate": 4.569479744439622e-06, "loss": 0.3356, "step": 717 }, { "epoch": 0.2143283582089552, "grad_norm": 0.9151048827100409, "learning_rate": 4.568122556601869e-06, "loss": 0.363, "step": 718 }, { "epoch": 0.2146268656716418, "grad_norm": 0.8490011151647137, "learning_rate": 4.566763435119936e-06, "loss": 0.3247, "step": 719 }, { "epoch": 0.21492537313432836, "grad_norm": 0.8786239642622696, "learning_rate": 4.565402381264569e-06, "loss": 0.4117, "step": 720 }, { "epoch": 0.21522388059701492, "grad_norm": 0.8402095797172084, "learning_rate": 4.564039396308319e-06, "loss": 0.3549, "step": 721 }, { "epoch": 0.2155223880597015, "grad_norm": 0.8965903934098873, "learning_rate": 4.5626744815255454e-06, "loss": 0.382, "step": 722 }, { "epoch": 0.21582089552238806, "grad_norm": 0.8307615244096349, "learning_rate": 4.5613076381924084e-06, "loss": 0.3527, "step": 723 }, { "epoch": 0.21611940298507462, "grad_norm": 0.8695011048660274, "learning_rate": 4.559938867586874e-06, "loss": 0.3689, "step": 724 }, { "epoch": 0.21641791044776118, "grad_norm": 0.9306482302064063, "learning_rate": 4.5585681709887104e-06, "loss": 0.391, "step": 725 }, { "epoch": 0.21671641791044777, "grad_norm": 0.828492781007522, "learning_rate": 4.557195549679484e-06, "loss": 0.3234, "step": 726 }, { "epoch": 0.21701492537313433, "grad_norm": 0.8573294375941618, "learning_rate": 4.555821004942563e-06, "loss": 0.386, "step": 727 }, { "epoch": 0.21731343283582089, "grad_norm": 0.875314700850979, "learning_rate": 4.554444538063113e-06, "loss": 0.3569, "step": 728 }, { "epoch": 0.21761194029850747, "grad_norm": 0.825263881051261, "learning_rate": 4.553066150328097e-06, "loss": 0.3716, "step": 729 }, { "epoch": 0.21791044776119403, "grad_norm": 0.9101676571634926, "learning_rate": 4.5516858430262745e-06, "loss": 0.3819, "step": 730 }, { "epoch": 0.2182089552238806, "grad_norm": 0.7788518704841725, "learning_rate": 4.550303617448198e-06, "loss": 0.34, "step": 731 }, { "epoch": 0.21850746268656718, "grad_norm": 0.7994914875640055, "learning_rate": 4.548919474886217e-06, "loss": 0.3317, "step": 732 }, { "epoch": 0.21880597014925374, "grad_norm": 0.8615980705638517, "learning_rate": 4.547533416634468e-06, "loss": 0.3488, "step": 733 }, { "epoch": 0.2191044776119403, "grad_norm": 0.8580797802618181, "learning_rate": 4.546145443988883e-06, "loss": 0.3386, "step": 734 }, { "epoch": 0.21940298507462686, "grad_norm": 0.8023378839095241, "learning_rate": 4.544755558247184e-06, "loss": 0.3822, "step": 735 }, { "epoch": 0.21970149253731344, "grad_norm": 0.8955427517527177, "learning_rate": 4.543363760708878e-06, "loss": 0.3656, "step": 736 }, { "epoch": 0.22, "grad_norm": 0.8836190670530261, "learning_rate": 4.541970052675262e-06, "loss": 0.3929, "step": 737 }, { "epoch": 0.22029850746268656, "grad_norm": 0.9099664733412756, "learning_rate": 4.540574435449421e-06, "loss": 0.3515, "step": 738 }, { "epoch": 0.22059701492537315, "grad_norm": 0.9197478364346269, "learning_rate": 4.539176910336221e-06, "loss": 0.4125, "step": 739 }, { "epoch": 0.2208955223880597, "grad_norm": 0.8096717598195738, "learning_rate": 4.537777478642317e-06, "loss": 0.3441, "step": 740 }, { "epoch": 0.22119402985074627, "grad_norm": 0.7424662504675141, "learning_rate": 4.53637614167614e-06, "loss": 0.3466, "step": 741 }, { "epoch": 0.22149253731343282, "grad_norm": 0.8596564765091457, "learning_rate": 4.534972900747907e-06, "loss": 0.3607, "step": 742 }, { "epoch": 0.2217910447761194, "grad_norm": 0.8235466276329729, "learning_rate": 4.533567757169615e-06, "loss": 0.3597, "step": 743 }, { "epoch": 0.22208955223880597, "grad_norm": 1.0365662627082053, "learning_rate": 4.532160712255037e-06, "loss": 0.3896, "step": 744 }, { "epoch": 0.22238805970149253, "grad_norm": 1.0076481822719652, "learning_rate": 4.530751767319729e-06, "loss": 0.3913, "step": 745 }, { "epoch": 0.22268656716417912, "grad_norm": 0.8582474689745617, "learning_rate": 4.529340923681016e-06, "loss": 0.3245, "step": 746 }, { "epoch": 0.22298507462686568, "grad_norm": 0.8822511043553072, "learning_rate": 4.527928182658006e-06, "loss": 0.3766, "step": 747 }, { "epoch": 0.22328358208955223, "grad_norm": 0.8534191167038851, "learning_rate": 4.526513545571576e-06, "loss": 0.3384, "step": 748 }, { "epoch": 0.2235820895522388, "grad_norm": 0.8670338466114618, "learning_rate": 4.525097013744377e-06, "loss": 0.3828, "step": 749 }, { "epoch": 0.22388059701492538, "grad_norm": 0.794997902918133, "learning_rate": 4.523678588500831e-06, "loss": 0.3439, "step": 750 }, { "epoch": 0.22417910447761194, "grad_norm": 0.8637367121235244, "learning_rate": 4.522258271167134e-06, "loss": 0.3854, "step": 751 }, { "epoch": 0.2244776119402985, "grad_norm": 0.8685702777896596, "learning_rate": 4.520836063071245e-06, "loss": 0.3887, "step": 752 }, { "epoch": 0.2247761194029851, "grad_norm": 0.772264036444534, "learning_rate": 4.519411965542895e-06, "loss": 0.3192, "step": 753 }, { "epoch": 0.22507462686567165, "grad_norm": 0.789258733130727, "learning_rate": 4.517985979913581e-06, "loss": 0.3527, "step": 754 }, { "epoch": 0.2253731343283582, "grad_norm": 0.7727803198307888, "learning_rate": 4.516558107516563e-06, "loss": 0.3764, "step": 755 }, { "epoch": 0.22567164179104476, "grad_norm": 0.8985233297487031, "learning_rate": 4.51512834968687e-06, "loss": 0.3319, "step": 756 }, { "epoch": 0.22597014925373135, "grad_norm": 0.7858913969509037, "learning_rate": 4.513696707761287e-06, "loss": 0.3422, "step": 757 }, { "epoch": 0.2262686567164179, "grad_norm": 0.9507634234561627, "learning_rate": 4.512263183078367e-06, "loss": 0.3781, "step": 758 }, { "epoch": 0.22656716417910447, "grad_norm": 0.8464279365702985, "learning_rate": 4.510827776978419e-06, "loss": 0.3927, "step": 759 }, { "epoch": 0.22686567164179106, "grad_norm": 0.8120744514290998, "learning_rate": 4.5093904908035145e-06, "loss": 0.3431, "step": 760 }, { "epoch": 0.22716417910447761, "grad_norm": 0.7500430603126108, "learning_rate": 4.50795132589748e-06, "loss": 0.2971, "step": 761 }, { "epoch": 0.22746268656716417, "grad_norm": 1.0783281815992556, "learning_rate": 4.5065102836059e-06, "loss": 0.377, "step": 762 }, { "epoch": 0.22776119402985073, "grad_norm": 0.8318583999356356, "learning_rate": 4.505067365276112e-06, "loss": 0.3356, "step": 763 }, { "epoch": 0.22805970149253732, "grad_norm": 0.8126656695454364, "learning_rate": 4.503622572257212e-06, "loss": 0.348, "step": 764 }, { "epoch": 0.22835820895522388, "grad_norm": 0.8301469658419047, "learning_rate": 4.502175905900046e-06, "loss": 0.3758, "step": 765 }, { "epoch": 0.22865671641791044, "grad_norm": 0.8038946408180568, "learning_rate": 4.50072736755721e-06, "loss": 0.3598, "step": 766 }, { "epoch": 0.22895522388059703, "grad_norm": 0.8541693014723826, "learning_rate": 4.499276958583054e-06, "loss": 0.3574, "step": 767 }, { "epoch": 0.22925373134328358, "grad_norm": 0.8246286524267744, "learning_rate": 4.497824680333674e-06, "loss": 0.3733, "step": 768 }, { "epoch": 0.22955223880597014, "grad_norm": 0.7782825828612279, "learning_rate": 4.496370534166915e-06, "loss": 0.3105, "step": 769 }, { "epoch": 0.2298507462686567, "grad_norm": 0.8857118941633463, "learning_rate": 4.494914521442368e-06, "loss": 0.373, "step": 770 }, { "epoch": 0.2301492537313433, "grad_norm": 0.8395319587604902, "learning_rate": 4.49345664352137e-06, "loss": 0.3468, "step": 771 }, { "epoch": 0.23044776119402985, "grad_norm": 0.8752411500887408, "learning_rate": 4.491996901766999e-06, "loss": 0.3696, "step": 772 }, { "epoch": 0.2307462686567164, "grad_norm": 0.9297321969188959, "learning_rate": 4.4905352975440815e-06, "loss": 0.382, "step": 773 }, { "epoch": 0.231044776119403, "grad_norm": 0.8328696745976142, "learning_rate": 4.489071832219181e-06, "loss": 0.3715, "step": 774 }, { "epoch": 0.23134328358208955, "grad_norm": 0.8741052478618722, "learning_rate": 4.487606507160599e-06, "loss": 0.3794, "step": 775 }, { "epoch": 0.2316417910447761, "grad_norm": 0.8662088883531012, "learning_rate": 4.486139323738382e-06, "loss": 0.3659, "step": 776 }, { "epoch": 0.2319402985074627, "grad_norm": 0.8934574672183584, "learning_rate": 4.48467028332431e-06, "loss": 0.3658, "step": 777 }, { "epoch": 0.23223880597014926, "grad_norm": 1.1016352512450371, "learning_rate": 4.483199387291898e-06, "loss": 0.3661, "step": 778 }, { "epoch": 0.23253731343283582, "grad_norm": 0.7828812345663573, "learning_rate": 4.4817266370164e-06, "loss": 0.3567, "step": 779 }, { "epoch": 0.23283582089552238, "grad_norm": 0.8193782009262484, "learning_rate": 4.480252033874801e-06, "loss": 0.3539, "step": 780 }, { "epoch": 0.23313432835820896, "grad_norm": 0.8073010853910519, "learning_rate": 4.47877557924582e-06, "loss": 0.3324, "step": 781 }, { "epoch": 0.23343283582089552, "grad_norm": 0.9202437087315363, "learning_rate": 4.477297274509904e-06, "loss": 0.3666, "step": 782 }, { "epoch": 0.23373134328358208, "grad_norm": 0.8160201474275349, "learning_rate": 4.475817121049234e-06, "loss": 0.3718, "step": 783 }, { "epoch": 0.23402985074626867, "grad_norm": 0.8630655153632912, "learning_rate": 4.474335120247716e-06, "loss": 0.4114, "step": 784 }, { "epoch": 0.23432835820895523, "grad_norm": 0.8729151161750691, "learning_rate": 4.472851273490985e-06, "loss": 0.3725, "step": 785 }, { "epoch": 0.2346268656716418, "grad_norm": 0.8813347017833638, "learning_rate": 4.471365582166401e-06, "loss": 0.3759, "step": 786 }, { "epoch": 0.23492537313432835, "grad_norm": 0.8663722741697655, "learning_rate": 4.46987804766305e-06, "loss": 0.3594, "step": 787 }, { "epoch": 0.23522388059701493, "grad_norm": 0.7836171249800367, "learning_rate": 4.46838867137174e-06, "loss": 0.3079, "step": 788 }, { "epoch": 0.2355223880597015, "grad_norm": 0.8419069340655513, "learning_rate": 4.466897454685003e-06, "loss": 0.3952, "step": 789 }, { "epoch": 0.23582089552238805, "grad_norm": 0.844324374913284, "learning_rate": 4.465404398997089e-06, "loss": 0.3685, "step": 790 }, { "epoch": 0.23611940298507464, "grad_norm": 0.8830236417572802, "learning_rate": 4.463909505703968e-06, "loss": 0.3616, "step": 791 }, { "epoch": 0.2364179104477612, "grad_norm": 0.8444454534060524, "learning_rate": 4.46241277620333e-06, "loss": 0.3783, "step": 792 }, { "epoch": 0.23671641791044776, "grad_norm": 0.7655987801662917, "learning_rate": 4.460914211894579e-06, "loss": 0.3183, "step": 793 }, { "epoch": 0.23701492537313432, "grad_norm": 0.7941247862341938, "learning_rate": 4.459413814178839e-06, "loss": 0.366, "step": 794 }, { "epoch": 0.2373134328358209, "grad_norm": 0.8222962634858313, "learning_rate": 4.4579115844589426e-06, "loss": 0.3349, "step": 795 }, { "epoch": 0.23761194029850746, "grad_norm": 0.8913670067022349, "learning_rate": 4.4564075241394386e-06, "loss": 0.3692, "step": 796 }, { "epoch": 0.23791044776119402, "grad_norm": 0.8571392122510596, "learning_rate": 4.454901634626587e-06, "loss": 0.3689, "step": 797 }, { "epoch": 0.2382089552238806, "grad_norm": 0.8061419047479605, "learning_rate": 4.4533939173283585e-06, "loss": 0.3451, "step": 798 }, { "epoch": 0.23850746268656717, "grad_norm": 0.83880369164832, "learning_rate": 4.451884373654431e-06, "loss": 0.3846, "step": 799 }, { "epoch": 0.23880597014925373, "grad_norm": 0.7991787716936328, "learning_rate": 4.450373005016191e-06, "loss": 0.3514, "step": 800 }, { "epoch": 0.23910447761194029, "grad_norm": 0.9096163448081129, "learning_rate": 4.448859812826732e-06, "loss": 0.3704, "step": 801 }, { "epoch": 0.23940298507462687, "grad_norm": 0.8414143701711667, "learning_rate": 4.447344798500852e-06, "loss": 0.3674, "step": 802 }, { "epoch": 0.23970149253731343, "grad_norm": 0.8352881489659175, "learning_rate": 4.445827963455051e-06, "loss": 0.3768, "step": 803 }, { "epoch": 0.24, "grad_norm": 0.8031794777452957, "learning_rate": 4.444309309107535e-06, "loss": 0.3423, "step": 804 }, { "epoch": 0.24029850746268658, "grad_norm": 0.8060785944842502, "learning_rate": 4.442788836878207e-06, "loss": 0.3131, "step": 805 }, { "epoch": 0.24059701492537314, "grad_norm": 0.8202351464416616, "learning_rate": 4.441266548188673e-06, "loss": 0.3566, "step": 806 }, { "epoch": 0.2408955223880597, "grad_norm": 0.905814083393673, "learning_rate": 4.439742444462234e-06, "loss": 0.3812, "step": 807 }, { "epoch": 0.24119402985074626, "grad_norm": 0.7839467463765396, "learning_rate": 4.43821652712389e-06, "loss": 0.3544, "step": 808 }, { "epoch": 0.24149253731343284, "grad_norm": 0.9113819484994593, "learning_rate": 4.436688797600338e-06, "loss": 0.4163, "step": 809 }, { "epoch": 0.2417910447761194, "grad_norm": 0.8245251973742547, "learning_rate": 4.435159257319968e-06, "loss": 0.349, "step": 810 }, { "epoch": 0.24208955223880596, "grad_norm": 0.8775639237639656, "learning_rate": 4.43362790771286e-06, "loss": 0.4072, "step": 811 }, { "epoch": 0.24238805970149255, "grad_norm": 0.877280517556995, "learning_rate": 4.432094750210791e-06, "loss": 0.3773, "step": 812 }, { "epoch": 0.2426865671641791, "grad_norm": 0.8453157272615807, "learning_rate": 4.430559786247227e-06, "loss": 0.4092, "step": 813 }, { "epoch": 0.24298507462686567, "grad_norm": 0.8066208315679838, "learning_rate": 4.429023017257319e-06, "loss": 0.3573, "step": 814 }, { "epoch": 0.24328358208955222, "grad_norm": 0.8304243577622432, "learning_rate": 4.42748444467791e-06, "loss": 0.3086, "step": 815 }, { "epoch": 0.2435820895522388, "grad_norm": 0.8380366969931612, "learning_rate": 4.425944069947528e-06, "loss": 0.3458, "step": 816 }, { "epoch": 0.24388059701492537, "grad_norm": 0.7673079862377152, "learning_rate": 4.424401894506386e-06, "loss": 0.3514, "step": 817 }, { "epoch": 0.24417910447761193, "grad_norm": 0.8657431046670215, "learning_rate": 4.4228579197963795e-06, "loss": 0.3648, "step": 818 }, { "epoch": 0.24447761194029852, "grad_norm": 0.8310942286263857, "learning_rate": 4.421312147261087e-06, "loss": 0.2978, "step": 819 }, { "epoch": 0.24477611940298508, "grad_norm": 0.8444445715616047, "learning_rate": 4.4197645783457695e-06, "loss": 0.3907, "step": 820 }, { "epoch": 0.24507462686567164, "grad_norm": 0.9112359004688853, "learning_rate": 4.418215214497366e-06, "loss": 0.3833, "step": 821 }, { "epoch": 0.24537313432835822, "grad_norm": 0.8107922970059999, "learning_rate": 4.416664057164493e-06, "loss": 0.3672, "step": 822 }, { "epoch": 0.24567164179104478, "grad_norm": 0.8722125495392787, "learning_rate": 4.415111107797445e-06, "loss": 0.3617, "step": 823 }, { "epoch": 0.24597014925373134, "grad_norm": 1.061183148831863, "learning_rate": 4.413556367848193e-06, "loss": 0.413, "step": 824 }, { "epoch": 0.2462686567164179, "grad_norm": 0.7884747931084407, "learning_rate": 4.41199983877038e-06, "loss": 0.375, "step": 825 }, { "epoch": 0.2465671641791045, "grad_norm": 0.93921749874484, "learning_rate": 4.410441522019322e-06, "loss": 0.3671, "step": 826 }, { "epoch": 0.24686567164179105, "grad_norm": 0.8800331687997713, "learning_rate": 4.4088814190520085e-06, "loss": 0.347, "step": 827 }, { "epoch": 0.2471641791044776, "grad_norm": 0.8608882108195471, "learning_rate": 4.407319531327097e-06, "loss": 0.3705, "step": 828 }, { "epoch": 0.2474626865671642, "grad_norm": 0.8578378741311872, "learning_rate": 4.405755860304915e-06, "loss": 0.3574, "step": 829 }, { "epoch": 0.24776119402985075, "grad_norm": 0.8321521972748156, "learning_rate": 4.404190407447456e-06, "loss": 0.3639, "step": 830 }, { "epoch": 0.2480597014925373, "grad_norm": 0.8170637261740442, "learning_rate": 4.402623174218381e-06, "loss": 0.3568, "step": 831 }, { "epoch": 0.24835820895522387, "grad_norm": 0.8552394496252084, "learning_rate": 4.401054162083015e-06, "loss": 0.3982, "step": 832 }, { "epoch": 0.24865671641791046, "grad_norm": 0.8273057774794584, "learning_rate": 4.399483372508345e-06, "loss": 0.3223, "step": 833 }, { "epoch": 0.24895522388059702, "grad_norm": 0.8012455720488535, "learning_rate": 4.3979108069630226e-06, "loss": 0.3675, "step": 834 }, { "epoch": 0.24925373134328357, "grad_norm": 0.8788275931412666, "learning_rate": 4.396336466917357e-06, "loss": 0.3587, "step": 835 }, { "epoch": 0.24955223880597016, "grad_norm": 0.8481959047937334, "learning_rate": 4.394760353843318e-06, "loss": 0.3916, "step": 836 }, { "epoch": 0.24985074626865672, "grad_norm": 0.820980214165406, "learning_rate": 4.393182469214533e-06, "loss": 0.3731, "step": 837 }, { "epoch": 0.2501492537313433, "grad_norm": 0.8685340899908907, "learning_rate": 4.391602814506285e-06, "loss": 0.3422, "step": 838 }, { "epoch": 0.25044776119402984, "grad_norm": 0.8722156332327601, "learning_rate": 4.390021391195514e-06, "loss": 0.3848, "step": 839 }, { "epoch": 0.2507462686567164, "grad_norm": 0.8131879266539699, "learning_rate": 4.388438200760812e-06, "loss": 0.3473, "step": 840 }, { "epoch": 0.251044776119403, "grad_norm": 0.8738620972075055, "learning_rate": 4.386853244682422e-06, "loss": 0.3505, "step": 841 }, { "epoch": 0.25134328358208957, "grad_norm": 0.8455634576936628, "learning_rate": 4.385266524442241e-06, "loss": 0.3656, "step": 842 }, { "epoch": 0.25164179104477613, "grad_norm": 0.9232241608401891, "learning_rate": 4.383678041523813e-06, "loss": 0.3308, "step": 843 }, { "epoch": 0.2519402985074627, "grad_norm": 0.7802438443664448, "learning_rate": 4.382087797412331e-06, "loss": 0.3177, "step": 844 }, { "epoch": 0.25223880597014925, "grad_norm": 0.840623387098109, "learning_rate": 4.380495793594634e-06, "loss": 0.3978, "step": 845 }, { "epoch": 0.2525373134328358, "grad_norm": 0.8598753419489917, "learning_rate": 4.378902031559208e-06, "loss": 0.3643, "step": 846 }, { "epoch": 0.25283582089552237, "grad_norm": 0.8502479164780553, "learning_rate": 4.377306512796179e-06, "loss": 0.3116, "step": 847 }, { "epoch": 0.253134328358209, "grad_norm": 0.8471892836766642, "learning_rate": 4.375709238797322e-06, "loss": 0.3459, "step": 848 }, { "epoch": 0.25343283582089554, "grad_norm": 0.8949946135476528, "learning_rate": 4.3741102110560465e-06, "loss": 0.4197, "step": 849 }, { "epoch": 0.2537313432835821, "grad_norm": 0.8390558579599957, "learning_rate": 4.372509431067404e-06, "loss": 0.3641, "step": 850 }, { "epoch": 0.25402985074626866, "grad_norm": 0.8295363073039969, "learning_rate": 4.370906900328087e-06, "loss": 0.3085, "step": 851 }, { "epoch": 0.2543283582089552, "grad_norm": 0.8920142421824904, "learning_rate": 4.369302620336422e-06, "loss": 0.3605, "step": 852 }, { "epoch": 0.2546268656716418, "grad_norm": 0.8314417922487575, "learning_rate": 4.367696592592371e-06, "loss": 0.3648, "step": 853 }, { "epoch": 0.25492537313432834, "grad_norm": 0.9416237229806054, "learning_rate": 4.36608881859753e-06, "loss": 0.4322, "step": 854 }, { "epoch": 0.25522388059701495, "grad_norm": 0.8552595456528146, "learning_rate": 4.364479299855131e-06, "loss": 0.376, "step": 855 }, { "epoch": 0.2555223880597015, "grad_norm": 0.7986846174676006, "learning_rate": 4.362868037870033e-06, "loss": 0.3129, "step": 856 }, { "epoch": 0.25582089552238807, "grad_norm": 0.8367097908730075, "learning_rate": 4.361255034148727e-06, "loss": 0.34, "step": 857 }, { "epoch": 0.25611940298507463, "grad_norm": 0.8933913196848361, "learning_rate": 4.359640290199331e-06, "loss": 0.3574, "step": 858 }, { "epoch": 0.2564179104477612, "grad_norm": 0.8314203802986557, "learning_rate": 4.3580238075315954e-06, "loss": 0.3454, "step": 859 }, { "epoch": 0.25671641791044775, "grad_norm": 0.8392401641828928, "learning_rate": 4.356405587656886e-06, "loss": 0.3616, "step": 860 }, { "epoch": 0.2570149253731343, "grad_norm": 0.8057344142131634, "learning_rate": 4.354785632088204e-06, "loss": 0.3429, "step": 861 }, { "epoch": 0.2573134328358209, "grad_norm": 0.7265185841371562, "learning_rate": 4.353163942340166e-06, "loss": 0.3316, "step": 862 }, { "epoch": 0.2576119402985075, "grad_norm": 0.8861442387020552, "learning_rate": 4.351540519929013e-06, "loss": 0.3682, "step": 863 }, { "epoch": 0.25791044776119404, "grad_norm": 0.8207676152529324, "learning_rate": 4.349915366372605e-06, "loss": 0.3465, "step": 864 }, { "epoch": 0.2582089552238806, "grad_norm": 0.8401159847300563, "learning_rate": 4.348288483190422e-06, "loss": 0.3628, "step": 865 }, { "epoch": 0.25850746268656716, "grad_norm": 0.9061691939819366, "learning_rate": 4.346659871903558e-06, "loss": 0.3269, "step": 866 }, { "epoch": 0.2588059701492537, "grad_norm": 0.9554654260605913, "learning_rate": 4.345029534034727e-06, "loss": 0.3688, "step": 867 }, { "epoch": 0.2591044776119403, "grad_norm": 0.8768435732446233, "learning_rate": 4.343397471108254e-06, "loss": 0.3467, "step": 868 }, { "epoch": 0.2594029850746269, "grad_norm": 0.83519783168284, "learning_rate": 4.341763684650078e-06, "loss": 0.3664, "step": 869 }, { "epoch": 0.25970149253731345, "grad_norm": 0.8733824091023256, "learning_rate": 4.340128176187751e-06, "loss": 0.3894, "step": 870 }, { "epoch": 0.26, "grad_norm": 0.7435126731069427, "learning_rate": 4.338490947250431e-06, "loss": 0.3265, "step": 871 }, { "epoch": 0.26029850746268657, "grad_norm": 0.7602478532739807, "learning_rate": 4.33685199936889e-06, "loss": 0.3255, "step": 872 }, { "epoch": 0.2605970149253731, "grad_norm": 0.8396951710141938, "learning_rate": 4.335211334075502e-06, "loss": 0.3772, "step": 873 }, { "epoch": 0.2608955223880597, "grad_norm": 0.8535455460088422, "learning_rate": 4.33356895290425e-06, "loss": 0.3221, "step": 874 }, { "epoch": 0.26119402985074625, "grad_norm": 0.934656189548925, "learning_rate": 4.331924857390722e-06, "loss": 0.3899, "step": 875 }, { "epoch": 0.26149253731343286, "grad_norm": 0.7839885412025712, "learning_rate": 4.330279049072103e-06, "loss": 0.3278, "step": 876 }, { "epoch": 0.2617910447761194, "grad_norm": 0.8288101118788596, "learning_rate": 4.328631529487188e-06, "loss": 0.359, "step": 877 }, { "epoch": 0.262089552238806, "grad_norm": 0.7635692010308125, "learning_rate": 4.326982300176366e-06, "loss": 0.3315, "step": 878 }, { "epoch": 0.26238805970149254, "grad_norm": 0.8317735162263449, "learning_rate": 4.325331362681624e-06, "loss": 0.3464, "step": 879 }, { "epoch": 0.2626865671641791, "grad_norm": 0.8075219201660886, "learning_rate": 4.323678718546552e-06, "loss": 0.3477, "step": 880 }, { "epoch": 0.26298507462686566, "grad_norm": 0.8112126719014313, "learning_rate": 4.3220243693163305e-06, "loss": 0.4148, "step": 881 }, { "epoch": 0.2632835820895522, "grad_norm": 0.8176840947105901, "learning_rate": 4.3203683165377355e-06, "loss": 0.4178, "step": 882 }, { "epoch": 0.26358208955223883, "grad_norm": 1.0009446140687355, "learning_rate": 4.318710561759137e-06, "loss": 0.3289, "step": 883 }, { "epoch": 0.2638805970149254, "grad_norm": 0.7534551657111695, "learning_rate": 4.317051106530492e-06, "loss": 0.3402, "step": 884 }, { "epoch": 0.26417910447761195, "grad_norm": 0.9072522401528668, "learning_rate": 4.315389952403355e-06, "loss": 0.3817, "step": 885 }, { "epoch": 0.2644776119402985, "grad_norm": 0.8682117406925437, "learning_rate": 4.313727100930862e-06, "loss": 0.3915, "step": 886 }, { "epoch": 0.26477611940298507, "grad_norm": 0.8515997109671464, "learning_rate": 4.312062553667739e-06, "loss": 0.3397, "step": 887 }, { "epoch": 0.2650746268656716, "grad_norm": 0.837060052477179, "learning_rate": 4.310396312170298e-06, "loss": 0.3495, "step": 888 }, { "epoch": 0.2653731343283582, "grad_norm": 0.960847057443914, "learning_rate": 4.308728377996433e-06, "loss": 0.345, "step": 889 }, { "epoch": 0.2656716417910448, "grad_norm": 0.8594679219941264, "learning_rate": 4.307058752705623e-06, "loss": 0.3193, "step": 890 }, { "epoch": 0.26597014925373136, "grad_norm": 0.8579668256137557, "learning_rate": 4.3053874378589265e-06, "loss": 0.3587, "step": 891 }, { "epoch": 0.2662686567164179, "grad_norm": 0.815761166827642, "learning_rate": 4.303714435018981e-06, "loss": 0.392, "step": 892 }, { "epoch": 0.2665671641791045, "grad_norm": 0.7691072442891018, "learning_rate": 4.3020397457500055e-06, "loss": 0.3449, "step": 893 }, { "epoch": 0.26686567164179104, "grad_norm": 0.8546963433650799, "learning_rate": 4.300363371617792e-06, "loss": 0.3615, "step": 894 }, { "epoch": 0.2671641791044776, "grad_norm": 0.8954925431678488, "learning_rate": 4.29868531418971e-06, "loss": 0.387, "step": 895 }, { "epoch": 0.26746268656716415, "grad_norm": 0.8890912758541257, "learning_rate": 4.297005575034701e-06, "loss": 0.338, "step": 896 }, { "epoch": 0.26776119402985077, "grad_norm": 0.9013891259842057, "learning_rate": 4.295324155723283e-06, "loss": 0.3627, "step": 897 }, { "epoch": 0.26805970149253733, "grad_norm": 0.7909956097157833, "learning_rate": 4.29364105782754e-06, "loss": 0.3708, "step": 898 }, { "epoch": 0.2683582089552239, "grad_norm": 0.8080884146208309, "learning_rate": 4.291956282921129e-06, "loss": 0.3257, "step": 899 }, { "epoch": 0.26865671641791045, "grad_norm": 0.7819915704985843, "learning_rate": 4.2902698325792715e-06, "loss": 0.3916, "step": 900 }, { "epoch": 0.268955223880597, "grad_norm": 0.8683299968211471, "learning_rate": 4.2885817083787584e-06, "loss": 0.3669, "step": 901 }, { "epoch": 0.26925373134328356, "grad_norm": 0.8085758598925459, "learning_rate": 4.286891911897944e-06, "loss": 0.3187, "step": 902 }, { "epoch": 0.2695522388059701, "grad_norm": 0.8590837328246107, "learning_rate": 4.285200444716749e-06, "loss": 0.3785, "step": 903 }, { "epoch": 0.26985074626865674, "grad_norm": 0.8551915445353413, "learning_rate": 4.283507308416651e-06, "loss": 0.3542, "step": 904 }, { "epoch": 0.2701492537313433, "grad_norm": 0.9079671987533409, "learning_rate": 4.281812504580694e-06, "loss": 0.3634, "step": 905 }, { "epoch": 0.27044776119402986, "grad_norm": 0.8369860857576275, "learning_rate": 4.280116034793477e-06, "loss": 0.3708, "step": 906 }, { "epoch": 0.2707462686567164, "grad_norm": 0.8664538534678876, "learning_rate": 4.278417900641157e-06, "loss": 0.4132, "step": 907 }, { "epoch": 0.271044776119403, "grad_norm": 0.805731173102517, "learning_rate": 4.2767181037114494e-06, "loss": 0.3946, "step": 908 }, { "epoch": 0.27134328358208953, "grad_norm": 0.8111842698029063, "learning_rate": 4.275016645593622e-06, "loss": 0.3616, "step": 909 }, { "epoch": 0.2716417910447761, "grad_norm": 0.7534971014345517, "learning_rate": 4.2733135278784975e-06, "loss": 0.3284, "step": 910 }, { "epoch": 0.2719402985074627, "grad_norm": 0.8332630746157706, "learning_rate": 4.271608752158448e-06, "loss": 0.3577, "step": 911 }, { "epoch": 0.27223880597014927, "grad_norm": 0.8441430505402697, "learning_rate": 4.269902320027399e-06, "loss": 0.3927, "step": 912 }, { "epoch": 0.2725373134328358, "grad_norm": 0.8088212649154801, "learning_rate": 4.268194233080823e-06, "loss": 0.3604, "step": 913 }, { "epoch": 0.2728358208955224, "grad_norm": 0.8504371247895061, "learning_rate": 4.266484492915738e-06, "loss": 0.328, "step": 914 }, { "epoch": 0.27313432835820894, "grad_norm": 0.8838721420845274, "learning_rate": 4.264773101130711e-06, "loss": 0.3491, "step": 915 }, { "epoch": 0.2734328358208955, "grad_norm": 0.8378168662917916, "learning_rate": 4.26306005932585e-06, "loss": 0.349, "step": 916 }, { "epoch": 0.27373134328358206, "grad_norm": 0.8952997918529593, "learning_rate": 4.2613453691028085e-06, "loss": 0.4315, "step": 917 }, { "epoch": 0.2740298507462687, "grad_norm": 0.8262214290892325, "learning_rate": 4.2596290320647795e-06, "loss": 0.3247, "step": 918 }, { "epoch": 0.27432835820895524, "grad_norm": 0.7936317313144646, "learning_rate": 4.257911049816497e-06, "loss": 0.3697, "step": 919 }, { "epoch": 0.2746268656716418, "grad_norm": 0.8359230938626984, "learning_rate": 4.256191423964231e-06, "loss": 0.3641, "step": 920 }, { "epoch": 0.27492537313432835, "grad_norm": 0.8439085515909566, "learning_rate": 4.254470156115792e-06, "loss": 0.3295, "step": 921 }, { "epoch": 0.2752238805970149, "grad_norm": 0.9191921854597581, "learning_rate": 4.252747247880521e-06, "loss": 0.4339, "step": 922 }, { "epoch": 0.2755223880597015, "grad_norm": 0.9149683459792712, "learning_rate": 4.2510227008692974e-06, "loss": 0.3599, "step": 923 }, { "epoch": 0.27582089552238803, "grad_norm": 0.8114463730873631, "learning_rate": 4.2492965166945295e-06, "loss": 0.3911, "step": 924 }, { "epoch": 0.27611940298507465, "grad_norm": 0.8792190926539251, "learning_rate": 4.247568696970158e-06, "loss": 0.3544, "step": 925 }, { "epoch": 0.2764179104477612, "grad_norm": 0.8336709348423672, "learning_rate": 4.2458392433116525e-06, "loss": 0.3322, "step": 926 }, { "epoch": 0.27671641791044777, "grad_norm": 1.020710396219329, "learning_rate": 4.244108157336009e-06, "loss": 0.3604, "step": 927 }, { "epoch": 0.2770149253731343, "grad_norm": 0.8143571913624713, "learning_rate": 4.2423754406617505e-06, "loss": 0.3495, "step": 928 }, { "epoch": 0.2773134328358209, "grad_norm": 0.8356882733846251, "learning_rate": 4.2406410949089255e-06, "loss": 0.368, "step": 929 }, { "epoch": 0.27761194029850744, "grad_norm": 0.8645784597745637, "learning_rate": 4.238905121699105e-06, "loss": 0.3739, "step": 930 }, { "epoch": 0.27791044776119406, "grad_norm": 0.8593811145607515, "learning_rate": 4.237167522655382e-06, "loss": 0.3174, "step": 931 }, { "epoch": 0.2782089552238806, "grad_norm": 0.9107381802719423, "learning_rate": 4.235428299402369e-06, "loss": 0.4168, "step": 932 }, { "epoch": 0.2785074626865672, "grad_norm": 0.8146960301870598, "learning_rate": 4.2336874535661966e-06, "loss": 0.3381, "step": 933 }, { "epoch": 0.27880597014925373, "grad_norm": 0.7260268749376332, "learning_rate": 4.231944986774513e-06, "loss": 0.2668, "step": 934 }, { "epoch": 0.2791044776119403, "grad_norm": 0.7381450828289706, "learning_rate": 4.2302009006564845e-06, "loss": 0.3379, "step": 935 }, { "epoch": 0.27940298507462685, "grad_norm": 0.906697687644743, "learning_rate": 4.228455196842787e-06, "loss": 0.3525, "step": 936 }, { "epoch": 0.2797014925373134, "grad_norm": 0.8801394409694048, "learning_rate": 4.2267078769656115e-06, "loss": 0.3684, "step": 937 }, { "epoch": 0.28, "grad_norm": 0.8229105599808183, "learning_rate": 4.22495894265866e-06, "loss": 0.3688, "step": 938 }, { "epoch": 0.2802985074626866, "grad_norm": 0.8866551931642531, "learning_rate": 4.223208395557142e-06, "loss": 0.3939, "step": 939 }, { "epoch": 0.28059701492537314, "grad_norm": 0.796873195210361, "learning_rate": 4.221456237297779e-06, "loss": 0.3246, "step": 940 }, { "epoch": 0.2808955223880597, "grad_norm": 0.8777704725183845, "learning_rate": 4.219702469518794e-06, "loss": 0.3148, "step": 941 }, { "epoch": 0.28119402985074626, "grad_norm": 0.8149960092397086, "learning_rate": 4.217947093859917e-06, "loss": 0.3261, "step": 942 }, { "epoch": 0.2814925373134328, "grad_norm": 0.782356882918818, "learning_rate": 4.216190111962383e-06, "loss": 0.3594, "step": 943 }, { "epoch": 0.2817910447761194, "grad_norm": 0.7838115738847105, "learning_rate": 4.2144315254689265e-06, "loss": 0.3658, "step": 944 }, { "epoch": 0.282089552238806, "grad_norm": 0.861027660876004, "learning_rate": 4.2126713360237835e-06, "loss": 0.3216, "step": 945 }, { "epoch": 0.28238805970149256, "grad_norm": 0.8979440300386647, "learning_rate": 4.210909545272687e-06, "loss": 0.347, "step": 946 }, { "epoch": 0.2826865671641791, "grad_norm": 0.8046473678420953, "learning_rate": 4.2091461548628695e-06, "loss": 0.3817, "step": 947 }, { "epoch": 0.2829850746268657, "grad_norm": 0.8999763252157099, "learning_rate": 4.207381166443058e-06, "loss": 0.3384, "step": 948 }, { "epoch": 0.28328358208955223, "grad_norm": 1.0558611307630636, "learning_rate": 4.205614581663472e-06, "loss": 0.3764, "step": 949 }, { "epoch": 0.2835820895522388, "grad_norm": 0.8250495089946933, "learning_rate": 4.203846402175828e-06, "loss": 0.3466, "step": 950 }, { "epoch": 0.28388059701492535, "grad_norm": 0.8080126407334502, "learning_rate": 4.202076629633329e-06, "loss": 0.3379, "step": 951 }, { "epoch": 0.28417910447761197, "grad_norm": 0.8189735637468861, "learning_rate": 4.200305265690669e-06, "loss": 0.3717, "step": 952 }, { "epoch": 0.2844776119402985, "grad_norm": 0.8206896400863205, "learning_rate": 4.198532312004031e-06, "loss": 0.3424, "step": 953 }, { "epoch": 0.2847761194029851, "grad_norm": 0.8182301432265623, "learning_rate": 4.1967577702310826e-06, "loss": 0.3459, "step": 954 }, { "epoch": 0.28507462686567164, "grad_norm": 0.860760231935904, "learning_rate": 4.194981642030978e-06, "loss": 0.3934, "step": 955 }, { "epoch": 0.2853731343283582, "grad_norm": 0.89467249534368, "learning_rate": 4.1932039290643534e-06, "loss": 0.3867, "step": 956 }, { "epoch": 0.28567164179104476, "grad_norm": 0.9126227744137325, "learning_rate": 4.191424632993326e-06, "loss": 0.3452, "step": 957 }, { "epoch": 0.2859701492537313, "grad_norm": 0.8112959766461156, "learning_rate": 4.189643755481497e-06, "loss": 0.2939, "step": 958 }, { "epoch": 0.28626865671641794, "grad_norm": 0.9304054160150231, "learning_rate": 4.18786129819394e-06, "loss": 0.3461, "step": 959 }, { "epoch": 0.2865671641791045, "grad_norm": 0.7922770615613615, "learning_rate": 4.1860772627972125e-06, "loss": 0.3435, "step": 960 }, { "epoch": 0.28686567164179105, "grad_norm": 0.7959362445567673, "learning_rate": 4.184291650959341e-06, "loss": 0.3534, "step": 961 }, { "epoch": 0.2871641791044776, "grad_norm": 0.8002993493679609, "learning_rate": 4.182504464349832e-06, "loss": 0.3316, "step": 962 }, { "epoch": 0.28746268656716417, "grad_norm": 0.8167637473996379, "learning_rate": 4.180715704639659e-06, "loss": 0.3539, "step": 963 }, { "epoch": 0.28776119402985073, "grad_norm": 0.802203149893345, "learning_rate": 4.178925373501269e-06, "loss": 0.3449, "step": 964 }, { "epoch": 0.2880597014925373, "grad_norm": 0.8592664185496729, "learning_rate": 4.17713347260858e-06, "loss": 0.3828, "step": 965 }, { "epoch": 0.2883582089552239, "grad_norm": 0.8304560933393987, "learning_rate": 4.175340003636974e-06, "loss": 0.3821, "step": 966 }, { "epoch": 0.28865671641791046, "grad_norm": 0.9391386547612607, "learning_rate": 4.173544968263301e-06, "loss": 0.3846, "step": 967 }, { "epoch": 0.288955223880597, "grad_norm": 0.8262795778364892, "learning_rate": 4.171748368165875e-06, "loss": 0.3383, "step": 968 }, { "epoch": 0.2892537313432836, "grad_norm": 0.841522653230346, "learning_rate": 4.169950205024474e-06, "loss": 0.3355, "step": 969 }, { "epoch": 0.28955223880597014, "grad_norm": 0.9913354298562964, "learning_rate": 4.168150480520337e-06, "loss": 0.419, "step": 970 }, { "epoch": 0.2898507462686567, "grad_norm": 0.8071892265145763, "learning_rate": 4.16634919633616e-06, "loss": 0.3449, "step": 971 }, { "epoch": 0.29014925373134326, "grad_norm": 0.8508150549887834, "learning_rate": 4.164546354156104e-06, "loss": 0.3672, "step": 972 }, { "epoch": 0.2904477611940299, "grad_norm": 0.7899568543277075, "learning_rate": 4.162741955665779e-06, "loss": 0.3329, "step": 973 }, { "epoch": 0.29074626865671643, "grad_norm": 0.80104254741407, "learning_rate": 4.160936002552255e-06, "loss": 0.3565, "step": 974 }, { "epoch": 0.291044776119403, "grad_norm": 0.7742895075156045, "learning_rate": 4.159128496504054e-06, "loss": 0.3156, "step": 975 }, { "epoch": 0.29134328358208955, "grad_norm": 0.8652205416649817, "learning_rate": 4.157319439211151e-06, "loss": 0.3567, "step": 976 }, { "epoch": 0.2916417910447761, "grad_norm": 0.8552104838671654, "learning_rate": 4.155508832364968e-06, "loss": 0.316, "step": 977 }, { "epoch": 0.29194029850746267, "grad_norm": 0.8757415112296788, "learning_rate": 4.153696677658381e-06, "loss": 0.3494, "step": 978 }, { "epoch": 0.29223880597014923, "grad_norm": 0.8389906004653142, "learning_rate": 4.151882976785709e-06, "loss": 0.3291, "step": 979 }, { "epoch": 0.29253731343283584, "grad_norm": 0.7800988745138925, "learning_rate": 4.150067731442717e-06, "loss": 0.3167, "step": 980 }, { "epoch": 0.2928358208955224, "grad_norm": 0.8294199151333148, "learning_rate": 4.148250943326619e-06, "loss": 0.3623, "step": 981 }, { "epoch": 0.29313432835820896, "grad_norm": 0.8578101736701281, "learning_rate": 4.146432614136064e-06, "loss": 0.307, "step": 982 }, { "epoch": 0.2934328358208955, "grad_norm": 0.7856364156171273, "learning_rate": 4.144612745571146e-06, "loss": 0.3886, "step": 983 }, { "epoch": 0.2937313432835821, "grad_norm": 0.8634192899478178, "learning_rate": 4.1427913393333985e-06, "loss": 0.3953, "step": 984 }, { "epoch": 0.29402985074626864, "grad_norm": 0.8485105584492334, "learning_rate": 4.140968397125793e-06, "loss": 0.3629, "step": 985 }, { "epoch": 0.2943283582089552, "grad_norm": 0.9742112909739822, "learning_rate": 4.139143920652734e-06, "loss": 0.3835, "step": 986 }, { "epoch": 0.2946268656716418, "grad_norm": 0.9878802852692959, "learning_rate": 4.137317911620063e-06, "loss": 0.4003, "step": 987 }, { "epoch": 0.2949253731343284, "grad_norm": 0.8815419197549593, "learning_rate": 4.1354903717350556e-06, "loss": 0.344, "step": 988 }, { "epoch": 0.29522388059701493, "grad_norm": 0.834846080220852, "learning_rate": 4.133661302706415e-06, "loss": 0.3696, "step": 989 }, { "epoch": 0.2955223880597015, "grad_norm": 0.845007866793147, "learning_rate": 4.131830706244276e-06, "loss": 0.3561, "step": 990 }, { "epoch": 0.29582089552238805, "grad_norm": 0.8925684451459167, "learning_rate": 4.129998584060204e-06, "loss": 0.3805, "step": 991 }, { "epoch": 0.2961194029850746, "grad_norm": 0.8371742760782053, "learning_rate": 4.128164937867187e-06, "loss": 0.3452, "step": 992 }, { "epoch": 0.29641791044776117, "grad_norm": 0.8300330502109989, "learning_rate": 4.12632976937964e-06, "loss": 0.3614, "step": 993 }, { "epoch": 0.2967164179104478, "grad_norm": 0.8341386461290142, "learning_rate": 4.1244930803134e-06, "loss": 0.3697, "step": 994 }, { "epoch": 0.29701492537313434, "grad_norm": 0.9247230906706114, "learning_rate": 4.122654872385726e-06, "loss": 0.403, "step": 995 }, { "epoch": 0.2973134328358209, "grad_norm": 0.815687144485872, "learning_rate": 4.1208151473153e-06, "loss": 0.3467, "step": 996 }, { "epoch": 0.29761194029850746, "grad_norm": 0.9159383505804287, "learning_rate": 4.118973906822218e-06, "loss": 0.3492, "step": 997 }, { "epoch": 0.297910447761194, "grad_norm": 0.8149117723593948, "learning_rate": 4.117131152627996e-06, "loss": 0.3371, "step": 998 }, { "epoch": 0.2982089552238806, "grad_norm": 0.7227696644393177, "learning_rate": 4.1152868864555626e-06, "loss": 0.3277, "step": 999 }, { "epoch": 0.29850746268656714, "grad_norm": 0.827257070561528, "learning_rate": 4.113441110029265e-06, "loss": 0.3227, "step": 1000 }, { "epoch": 0.29880597014925375, "grad_norm": 0.8467214707843915, "learning_rate": 4.111593825074856e-06, "loss": 0.337, "step": 1001 }, { "epoch": 0.2991044776119403, "grad_norm": 0.8920138785669369, "learning_rate": 4.109745033319504e-06, "loss": 0.3982, "step": 1002 }, { "epoch": 0.29940298507462687, "grad_norm": 0.8027122577683616, "learning_rate": 4.107894736491783e-06, "loss": 0.3513, "step": 1003 }, { "epoch": 0.29970149253731343, "grad_norm": 1.2167741273718207, "learning_rate": 4.106042936321676e-06, "loss": 0.3689, "step": 1004 }, { "epoch": 0.3, "grad_norm": 0.8627016240432611, "learning_rate": 4.10418963454057e-06, "loss": 0.3495, "step": 1005 }, { "epoch": 0.30029850746268655, "grad_norm": 0.8585195931421936, "learning_rate": 4.10233483288126e-06, "loss": 0.317, "step": 1006 }, { "epoch": 0.3005970149253731, "grad_norm": 0.8429386990797031, "learning_rate": 4.100478533077936e-06, "loss": 0.3306, "step": 1007 }, { "epoch": 0.3008955223880597, "grad_norm": 0.7972123992318357, "learning_rate": 4.098620736866196e-06, "loss": 0.3493, "step": 1008 }, { "epoch": 0.3011940298507463, "grad_norm": 0.8661016284011678, "learning_rate": 4.096761445983034e-06, "loss": 0.3824, "step": 1009 }, { "epoch": 0.30149253731343284, "grad_norm": 0.8703037201689672, "learning_rate": 4.0949006621668405e-06, "loss": 0.3902, "step": 1010 }, { "epoch": 0.3017910447761194, "grad_norm": 0.9376297455888316, "learning_rate": 4.093038387157404e-06, "loss": 0.3609, "step": 1011 }, { "epoch": 0.30208955223880596, "grad_norm": 0.7998064580888031, "learning_rate": 4.091174622695906e-06, "loss": 0.3598, "step": 1012 }, { "epoch": 0.3023880597014925, "grad_norm": 0.8428236863313218, "learning_rate": 4.089309370524921e-06, "loss": 0.3676, "step": 1013 }, { "epoch": 0.30268656716417913, "grad_norm": 0.8469734624648289, "learning_rate": 4.087442632388413e-06, "loss": 0.3791, "step": 1014 }, { "epoch": 0.3029850746268657, "grad_norm": 0.8517774956533634, "learning_rate": 4.085574410031739e-06, "loss": 0.3745, "step": 1015 }, { "epoch": 0.30328358208955225, "grad_norm": 0.8281651202406228, "learning_rate": 4.083704705201639e-06, "loss": 0.3557, "step": 1016 }, { "epoch": 0.3035820895522388, "grad_norm": 0.9004766310930208, "learning_rate": 4.081833519646242e-06, "loss": 0.3408, "step": 1017 }, { "epoch": 0.30388059701492537, "grad_norm": 0.7898205886854617, "learning_rate": 4.0799608551150625e-06, "loss": 0.37, "step": 1018 }, { "epoch": 0.30417910447761193, "grad_norm": 0.8032659295487834, "learning_rate": 4.078086713358994e-06, "loss": 0.3506, "step": 1019 }, { "epoch": 0.3044776119402985, "grad_norm": 0.9141743806902359, "learning_rate": 4.076211096130316e-06, "loss": 0.3704, "step": 1020 }, { "epoch": 0.3047761194029851, "grad_norm": 0.8282796502709796, "learning_rate": 4.074334005182682e-06, "loss": 0.3571, "step": 1021 }, { "epoch": 0.30507462686567166, "grad_norm": 0.8496527957007282, "learning_rate": 4.072455442271128e-06, "loss": 0.3163, "step": 1022 }, { "epoch": 0.3053731343283582, "grad_norm": 0.8977935551607475, "learning_rate": 4.070575409152064e-06, "loss": 0.4037, "step": 1023 }, { "epoch": 0.3056716417910448, "grad_norm": 0.8847365927783407, "learning_rate": 4.068693907583276e-06, "loss": 0.4017, "step": 1024 }, { "epoch": 0.30597014925373134, "grad_norm": 0.7747661612730282, "learning_rate": 4.06681093932392e-06, "loss": 0.3397, "step": 1025 }, { "epoch": 0.3062686567164179, "grad_norm": 0.8825845190947826, "learning_rate": 4.064926506134528e-06, "loss": 0.3298, "step": 1026 }, { "epoch": 0.30656716417910446, "grad_norm": 0.8592497304478822, "learning_rate": 4.063040609776998e-06, "loss": 0.349, "step": 1027 }, { "epoch": 0.30686567164179107, "grad_norm": 0.8612997055386207, "learning_rate": 4.0611532520145965e-06, "loss": 0.3972, "step": 1028 }, { "epoch": 0.30716417910447763, "grad_norm": 0.928710595753594, "learning_rate": 4.059264434611957e-06, "loss": 0.3751, "step": 1029 }, { "epoch": 0.3074626865671642, "grad_norm": 0.8652473522503297, "learning_rate": 4.05737415933508e-06, "loss": 0.3688, "step": 1030 }, { "epoch": 0.30776119402985075, "grad_norm": 0.8978250178705632, "learning_rate": 4.055482427951324e-06, "loss": 0.3916, "step": 1031 }, { "epoch": 0.3080597014925373, "grad_norm": 0.9090489721659124, "learning_rate": 4.053589242229412e-06, "loss": 0.354, "step": 1032 }, { "epoch": 0.30835820895522387, "grad_norm": 0.8801900400550225, "learning_rate": 4.051694603939429e-06, "loss": 0.3504, "step": 1033 }, { "epoch": 0.3086567164179104, "grad_norm": 0.8960014398639581, "learning_rate": 4.049798514852812e-06, "loss": 0.3423, "step": 1034 }, { "epoch": 0.30895522388059704, "grad_norm": 0.798818576158037, "learning_rate": 4.047900976742362e-06, "loss": 0.3443, "step": 1035 }, { "epoch": 0.3092537313432836, "grad_norm": 0.8059330952772015, "learning_rate": 4.046001991382227e-06, "loss": 0.3743, "step": 1036 }, { "epoch": 0.30955223880597016, "grad_norm": 0.8226038835739496, "learning_rate": 4.044101560547915e-06, "loss": 0.3515, "step": 1037 }, { "epoch": 0.3098507462686567, "grad_norm": 0.8230993349145885, "learning_rate": 4.042199686016281e-06, "loss": 0.3701, "step": 1038 }, { "epoch": 0.3101492537313433, "grad_norm": 0.7911941573133365, "learning_rate": 4.040296369565532e-06, "loss": 0.3396, "step": 1039 }, { "epoch": 0.31044776119402984, "grad_norm": 0.8712443502975892, "learning_rate": 4.038391612975222e-06, "loss": 0.3927, "step": 1040 }, { "epoch": 0.3107462686567164, "grad_norm": 0.8233453064590726, "learning_rate": 4.036485418026253e-06, "loss": 0.3659, "step": 1041 }, { "epoch": 0.311044776119403, "grad_norm": 0.8448815164890928, "learning_rate": 4.034577786500869e-06, "loss": 0.3512, "step": 1042 }, { "epoch": 0.31134328358208957, "grad_norm": 0.8327563911984053, "learning_rate": 4.0326687201826605e-06, "loss": 0.3255, "step": 1043 }, { "epoch": 0.31164179104477613, "grad_norm": 0.8220120310659925, "learning_rate": 4.030758220856558e-06, "loss": 0.3298, "step": 1044 }, { "epoch": 0.3119402985074627, "grad_norm": 0.8028153864586365, "learning_rate": 4.02884629030883e-06, "loss": 0.3678, "step": 1045 }, { "epoch": 0.31223880597014925, "grad_norm": 0.8349137515362423, "learning_rate": 4.026932930327088e-06, "loss": 0.3783, "step": 1046 }, { "epoch": 0.3125373134328358, "grad_norm": 0.7766428030622539, "learning_rate": 4.025018142700273e-06, "loss": 0.2854, "step": 1047 }, { "epoch": 0.31283582089552237, "grad_norm": 0.9182239119842932, "learning_rate": 4.0231019292186685e-06, "loss": 0.4211, "step": 1048 }, { "epoch": 0.313134328358209, "grad_norm": 0.8518537676125332, "learning_rate": 4.0211842916738855e-06, "loss": 0.3695, "step": 1049 }, { "epoch": 0.31343283582089554, "grad_norm": 0.7665539603822111, "learning_rate": 4.019265231858869e-06, "loss": 0.3235, "step": 1050 }, { "epoch": 0.3137313432835821, "grad_norm": 0.7995496595040446, "learning_rate": 4.017344751567892e-06, "loss": 0.3384, "step": 1051 }, { "epoch": 0.31402985074626866, "grad_norm": 0.7860027976534756, "learning_rate": 4.0154228525965585e-06, "loss": 0.2767, "step": 1052 }, { "epoch": 0.3143283582089552, "grad_norm": 0.8580440875177763, "learning_rate": 4.0134995367417965e-06, "loss": 0.3525, "step": 1053 }, { "epoch": 0.3146268656716418, "grad_norm": 0.9336926190628849, "learning_rate": 4.011574805801858e-06, "loss": 0.3394, "step": 1054 }, { "epoch": 0.31492537313432833, "grad_norm": 0.8425176180393162, "learning_rate": 4.009648661576321e-06, "loss": 0.3511, "step": 1055 }, { "epoch": 0.31522388059701495, "grad_norm": 0.8276026034880272, "learning_rate": 4.007721105866084e-06, "loss": 0.3602, "step": 1056 }, { "epoch": 0.3155223880597015, "grad_norm": 0.787475800014448, "learning_rate": 4.005792140473363e-06, "loss": 0.3188, "step": 1057 }, { "epoch": 0.31582089552238807, "grad_norm": 0.8880428207150297, "learning_rate": 4.003861767201695e-06, "loss": 0.3342, "step": 1058 }, { "epoch": 0.3161194029850746, "grad_norm": 0.8017106892627521, "learning_rate": 4.001929987855931e-06, "loss": 0.3502, "step": 1059 }, { "epoch": 0.3164179104477612, "grad_norm": 0.7848482690732528, "learning_rate": 3.999996804242238e-06, "loss": 0.3337, "step": 1060 }, { "epoch": 0.31671641791044775, "grad_norm": 0.8099722326612471, "learning_rate": 3.998062218168096e-06, "loss": 0.3358, "step": 1061 }, { "epoch": 0.3170149253731343, "grad_norm": 0.8230124063451144, "learning_rate": 3.996126231442295e-06, "loss": 0.3732, "step": 1062 }, { "epoch": 0.3173134328358209, "grad_norm": 0.7714716419614884, "learning_rate": 3.994188845874936e-06, "loss": 0.3289, "step": 1063 }, { "epoch": 0.3176119402985075, "grad_norm": 0.7922647776658421, "learning_rate": 3.992250063277427e-06, "loss": 0.3154, "step": 1064 }, { "epoch": 0.31791044776119404, "grad_norm": 0.8044498494994091, "learning_rate": 3.990309885462481e-06, "loss": 0.3378, "step": 1065 }, { "epoch": 0.3182089552238806, "grad_norm": 0.8820721478686536, "learning_rate": 3.98836831424412e-06, "loss": 0.3511, "step": 1066 }, { "epoch": 0.31850746268656716, "grad_norm": 0.8572321551754879, "learning_rate": 3.9864253514376634e-06, "loss": 0.3646, "step": 1067 }, { "epoch": 0.3188059701492537, "grad_norm": 0.8802364307844034, "learning_rate": 3.9844809988597355e-06, "loss": 0.3448, "step": 1068 }, { "epoch": 0.3191044776119403, "grad_norm": 0.8291543922384458, "learning_rate": 3.9825352583282585e-06, "loss": 0.3219, "step": 1069 }, { "epoch": 0.3194029850746269, "grad_norm": 0.8993675406838366, "learning_rate": 3.980588131662451e-06, "loss": 0.4098, "step": 1070 }, { "epoch": 0.31970149253731345, "grad_norm": 0.8157166363381144, "learning_rate": 3.978639620682829e-06, "loss": 0.3402, "step": 1071 }, { "epoch": 0.32, "grad_norm": 0.8073929451435011, "learning_rate": 3.976689727211205e-06, "loss": 0.3802, "step": 1072 }, { "epoch": 0.32029850746268657, "grad_norm": 0.8488047650093633, "learning_rate": 3.97473845307068e-06, "loss": 0.3231, "step": 1073 }, { "epoch": 0.3205970149253731, "grad_norm": 0.8608697980068266, "learning_rate": 3.972785800085647e-06, "loss": 0.3523, "step": 1074 }, { "epoch": 0.3208955223880597, "grad_norm": 0.8021106185518122, "learning_rate": 3.970831770081791e-06, "loss": 0.3536, "step": 1075 }, { "epoch": 0.32119402985074624, "grad_norm": 0.8796976566955086, "learning_rate": 3.968876364886082e-06, "loss": 0.3275, "step": 1076 }, { "epoch": 0.32149253731343286, "grad_norm": 0.8058440193793543, "learning_rate": 3.966919586326775e-06, "loss": 0.375, "step": 1077 }, { "epoch": 0.3217910447761194, "grad_norm": 0.8775522173367613, "learning_rate": 3.964961436233412e-06, "loss": 0.3588, "step": 1078 }, { "epoch": 0.322089552238806, "grad_norm": 0.9098939150341124, "learning_rate": 3.963001916436814e-06, "loss": 0.3587, "step": 1079 }, { "epoch": 0.32238805970149254, "grad_norm": 0.8473555634681235, "learning_rate": 3.961041028769085e-06, "loss": 0.3376, "step": 1080 }, { "epoch": 0.3226865671641791, "grad_norm": 0.9150706720781269, "learning_rate": 3.959078775063607e-06, "loss": 0.3745, "step": 1081 }, { "epoch": 0.32298507462686565, "grad_norm": 0.9386215098514821, "learning_rate": 3.95711515715504e-06, "loss": 0.3778, "step": 1082 }, { "epoch": 0.3232835820895522, "grad_norm": 0.7483070851000795, "learning_rate": 3.955150176879316e-06, "loss": 0.3397, "step": 1083 }, { "epoch": 0.3235820895522388, "grad_norm": 0.6912249624197083, "learning_rate": 3.953183836073649e-06, "loss": 0.3304, "step": 1084 }, { "epoch": 0.3238805970149254, "grad_norm": 0.8348347584691441, "learning_rate": 3.951216136576515e-06, "loss": 0.3316, "step": 1085 }, { "epoch": 0.32417910447761195, "grad_norm": 0.8690430526369205, "learning_rate": 3.949247080227666e-06, "loss": 0.3461, "step": 1086 }, { "epoch": 0.3244776119402985, "grad_norm": 0.8050979365143089, "learning_rate": 3.947276668868124e-06, "loss": 0.3642, "step": 1087 }, { "epoch": 0.32477611940298506, "grad_norm": 0.7662634270514135, "learning_rate": 3.945304904340174e-06, "loss": 0.3445, "step": 1088 }, { "epoch": 0.3250746268656716, "grad_norm": 0.84107358699445, "learning_rate": 3.943331788487366e-06, "loss": 0.358, "step": 1089 }, { "epoch": 0.3253731343283582, "grad_norm": 0.824621628564205, "learning_rate": 3.941357323154519e-06, "loss": 0.2967, "step": 1090 }, { "epoch": 0.3256716417910448, "grad_norm": 0.806684118104856, "learning_rate": 3.9393815101877076e-06, "loss": 0.3335, "step": 1091 }, { "epoch": 0.32597014925373136, "grad_norm": 0.8342281660142485, "learning_rate": 3.937404351434269e-06, "loss": 0.3748, "step": 1092 }, { "epoch": 0.3262686567164179, "grad_norm": 0.873757164412416, "learning_rate": 3.935425848742797e-06, "loss": 0.3692, "step": 1093 }, { "epoch": 0.3265671641791045, "grad_norm": 0.7814545073329886, "learning_rate": 3.933446003963147e-06, "loss": 0.3294, "step": 1094 }, { "epoch": 0.32686567164179103, "grad_norm": 0.8482386135188483, "learning_rate": 3.9314648189464226e-06, "loss": 0.3259, "step": 1095 }, { "epoch": 0.3271641791044776, "grad_norm": 0.8036333351717462, "learning_rate": 3.929482295544985e-06, "loss": 0.3496, "step": 1096 }, { "epoch": 0.32746268656716415, "grad_norm": 0.9025969332953864, "learning_rate": 3.927498435612444e-06, "loss": 0.3823, "step": 1097 }, { "epoch": 0.32776119402985077, "grad_norm": 0.8330683787016878, "learning_rate": 3.925513241003663e-06, "loss": 0.3654, "step": 1098 }, { "epoch": 0.3280597014925373, "grad_norm": 0.9303082381552658, "learning_rate": 3.923526713574747e-06, "loss": 0.3674, "step": 1099 }, { "epoch": 0.3283582089552239, "grad_norm": 1.181233124032353, "learning_rate": 3.921538855183053e-06, "loss": 0.364, "step": 1100 }, { "epoch": 0.32865671641791044, "grad_norm": 0.8588441469224205, "learning_rate": 3.919549667687178e-06, "loss": 0.3635, "step": 1101 }, { "epoch": 0.328955223880597, "grad_norm": 0.7804288737599779, "learning_rate": 3.917559152946966e-06, "loss": 0.3459, "step": 1102 }, { "epoch": 0.32925373134328356, "grad_norm": 0.9027645900655169, "learning_rate": 3.9155673128235e-06, "loss": 0.3516, "step": 1103 }, { "epoch": 0.3295522388059702, "grad_norm": 0.8393324136480259, "learning_rate": 3.9135741491791e-06, "loss": 0.3685, "step": 1104 }, { "epoch": 0.32985074626865674, "grad_norm": 0.7473370868680391, "learning_rate": 3.9115796638773275e-06, "loss": 0.2815, "step": 1105 }, { "epoch": 0.3301492537313433, "grad_norm": 0.8919025147790259, "learning_rate": 3.9095838587829756e-06, "loss": 0.3857, "step": 1106 }, { "epoch": 0.33044776119402985, "grad_norm": 0.817241418807688, "learning_rate": 3.907586735762074e-06, "loss": 0.3732, "step": 1107 }, { "epoch": 0.3307462686567164, "grad_norm": 0.9218470406185262, "learning_rate": 3.9055882966818855e-06, "loss": 0.3376, "step": 1108 }, { "epoch": 0.331044776119403, "grad_norm": 0.8177748051426704, "learning_rate": 3.9035885434109014e-06, "loss": 0.3184, "step": 1109 }, { "epoch": 0.33134328358208953, "grad_norm": 0.7164318821917856, "learning_rate": 3.90158747781884e-06, "loss": 0.3142, "step": 1110 }, { "epoch": 0.33164179104477615, "grad_norm": 0.8400280769287423, "learning_rate": 3.899585101776652e-06, "loss": 0.302, "step": 1111 }, { "epoch": 0.3319402985074627, "grad_norm": 0.7773008415105561, "learning_rate": 3.8975814171565075e-06, "loss": 0.3799, "step": 1112 }, { "epoch": 0.33223880597014926, "grad_norm": 0.897234720453567, "learning_rate": 3.895576425831805e-06, "loss": 0.3883, "step": 1113 }, { "epoch": 0.3325373134328358, "grad_norm": 0.8625016931833325, "learning_rate": 3.893570129677161e-06, "loss": 0.337, "step": 1114 }, { "epoch": 0.3328358208955224, "grad_norm": 0.8269153711097461, "learning_rate": 3.8915625305684145e-06, "loss": 0.3452, "step": 1115 }, { "epoch": 0.33313432835820894, "grad_norm": 0.8632059531163434, "learning_rate": 3.889553630382621e-06, "loss": 0.3655, "step": 1116 }, { "epoch": 0.3334328358208955, "grad_norm": 0.7706052114027439, "learning_rate": 3.8875434309980545e-06, "loss": 0.3209, "step": 1117 }, { "epoch": 0.3337313432835821, "grad_norm": 0.8172417996393967, "learning_rate": 3.8855319342942e-06, "loss": 0.3588, "step": 1118 }, { "epoch": 0.3340298507462687, "grad_norm": 0.7457140782960014, "learning_rate": 3.883519142151761e-06, "loss": 0.3206, "step": 1119 }, { "epoch": 0.33432835820895523, "grad_norm": 0.9075055402661403, "learning_rate": 3.881505056452646e-06, "loss": 0.388, "step": 1120 }, { "epoch": 0.3346268656716418, "grad_norm": 0.8633987237770478, "learning_rate": 3.879489679079977e-06, "loss": 0.3335, "step": 1121 }, { "epoch": 0.33492537313432835, "grad_norm": 0.8015175637999997, "learning_rate": 3.877473011918084e-06, "loss": 0.3608, "step": 1122 }, { "epoch": 0.3352238805970149, "grad_norm": 0.9650469277739403, "learning_rate": 3.8754550568525006e-06, "loss": 0.3826, "step": 1123 }, { "epoch": 0.33552238805970147, "grad_norm": 0.8531369547701335, "learning_rate": 3.8734358157699666e-06, "loss": 0.3907, "step": 1124 }, { "epoch": 0.3358208955223881, "grad_norm": 0.8203181528488143, "learning_rate": 3.871415290558421e-06, "loss": 0.341, "step": 1125 }, { "epoch": 0.33611940298507464, "grad_norm": 0.9506865101073372, "learning_rate": 3.869393483107008e-06, "loss": 0.3766, "step": 1126 }, { "epoch": 0.3364179104477612, "grad_norm": 0.7959518196575798, "learning_rate": 3.8673703953060685e-06, "loss": 0.3215, "step": 1127 }, { "epoch": 0.33671641791044776, "grad_norm": 0.9587562044134879, "learning_rate": 3.865346029047137e-06, "loss": 0.425, "step": 1128 }, { "epoch": 0.3370149253731343, "grad_norm": 0.7933800883348958, "learning_rate": 3.863320386222949e-06, "loss": 0.321, "step": 1129 }, { "epoch": 0.3373134328358209, "grad_norm": 0.7786165000499077, "learning_rate": 3.861293468727432e-06, "loss": 0.3249, "step": 1130 }, { "epoch": 0.33761194029850744, "grad_norm": 0.9211593511912554, "learning_rate": 3.859265278455701e-06, "loss": 0.3879, "step": 1131 }, { "epoch": 0.33791044776119405, "grad_norm": 0.86664700650382, "learning_rate": 3.8572358173040695e-06, "loss": 0.3119, "step": 1132 }, { "epoch": 0.3382089552238806, "grad_norm": 0.8560710542669949, "learning_rate": 3.85520508717003e-06, "loss": 0.3335, "step": 1133 }, { "epoch": 0.3385074626865672, "grad_norm": 0.9378308935231966, "learning_rate": 3.853173089952268e-06, "loss": 0.3437, "step": 1134 }, { "epoch": 0.33880597014925373, "grad_norm": 0.9021474935248037, "learning_rate": 3.85113982755065e-06, "loss": 0.3441, "step": 1135 }, { "epoch": 0.3391044776119403, "grad_norm": 0.8505218970552499, "learning_rate": 3.849105301866227e-06, "loss": 0.3595, "step": 1136 }, { "epoch": 0.33940298507462685, "grad_norm": 1.0019110671578144, "learning_rate": 3.847069514801232e-06, "loss": 0.348, "step": 1137 }, { "epoch": 0.3397014925373134, "grad_norm": 0.896112478403269, "learning_rate": 3.845032468259075e-06, "loss": 0.3562, "step": 1138 }, { "epoch": 0.34, "grad_norm": 0.8272485945752788, "learning_rate": 3.842994164144346e-06, "loss": 0.3216, "step": 1139 }, { "epoch": 0.3402985074626866, "grad_norm": 0.7890421842215967, "learning_rate": 3.840954604362809e-06, "loss": 0.3341, "step": 1140 }, { "epoch": 0.34059701492537314, "grad_norm": 0.8688050179607455, "learning_rate": 3.838913790821402e-06, "loss": 0.3232, "step": 1141 }, { "epoch": 0.3408955223880597, "grad_norm": 1.0530125896366147, "learning_rate": 3.8368717254282364e-06, "loss": 0.389, "step": 1142 }, { "epoch": 0.34119402985074626, "grad_norm": 0.9015300393082843, "learning_rate": 3.834828410092595e-06, "loss": 0.3431, "step": 1143 }, { "epoch": 0.3414925373134328, "grad_norm": 0.8299009904531546, "learning_rate": 3.8327838467249255e-06, "loss": 0.3644, "step": 1144 }, { "epoch": 0.3417910447761194, "grad_norm": 0.8043231321701094, "learning_rate": 3.830738037236848e-06, "loss": 0.3477, "step": 1145 }, { "epoch": 0.342089552238806, "grad_norm": 0.753965995052167, "learning_rate": 3.82869098354114e-06, "loss": 0.3473, "step": 1146 }, { "epoch": 0.34238805970149255, "grad_norm": 0.7796417555071672, "learning_rate": 3.826642687551751e-06, "loss": 0.3402, "step": 1147 }, { "epoch": 0.3426865671641791, "grad_norm": 0.8401115556379568, "learning_rate": 3.824593151183785e-06, "loss": 0.3521, "step": 1148 }, { "epoch": 0.34298507462686567, "grad_norm": 0.8843481013387703, "learning_rate": 3.82254237635351e-06, "loss": 0.3826, "step": 1149 }, { "epoch": 0.34328358208955223, "grad_norm": 0.8732317874736554, "learning_rate": 3.82049036497835e-06, "loss": 0.3005, "step": 1150 }, { "epoch": 0.3435820895522388, "grad_norm": 0.8189473191628437, "learning_rate": 3.8184371189768855e-06, "loss": 0.3464, "step": 1151 }, { "epoch": 0.34388059701492535, "grad_norm": 1.107894296285839, "learning_rate": 3.816382640268852e-06, "loss": 0.3991, "step": 1152 }, { "epoch": 0.34417910447761196, "grad_norm": 0.8200875925736727, "learning_rate": 3.8143269307751373e-06, "loss": 0.3489, "step": 1153 }, { "epoch": 0.3444776119402985, "grad_norm": 0.9460453425867347, "learning_rate": 3.8122699924177786e-06, "loss": 0.3836, "step": 1154 }, { "epoch": 0.3447761194029851, "grad_norm": 0.772503033014387, "learning_rate": 3.8102118271199638e-06, "loss": 0.3285, "step": 1155 }, { "epoch": 0.34507462686567164, "grad_norm": 0.8815073767971605, "learning_rate": 3.8081524368060273e-06, "loss": 0.3988, "step": 1156 }, { "epoch": 0.3453731343283582, "grad_norm": 0.8885895081542389, "learning_rate": 3.806091823401448e-06, "loss": 0.3049, "step": 1157 }, { "epoch": 0.34567164179104476, "grad_norm": 0.895020437102532, "learning_rate": 3.80402998883285e-06, "loss": 0.4085, "step": 1158 }, { "epoch": 0.3459701492537313, "grad_norm": 0.9171169870830792, "learning_rate": 3.8019669350279985e-06, "loss": 0.3629, "step": 1159 }, { "epoch": 0.34626865671641793, "grad_norm": 0.8081376377551374, "learning_rate": 3.7999026639157983e-06, "loss": 0.3319, "step": 1160 }, { "epoch": 0.3465671641791045, "grad_norm": 0.8402653067564256, "learning_rate": 3.797837177426292e-06, "loss": 0.3931, "step": 1161 }, { "epoch": 0.34686567164179105, "grad_norm": 0.837482981106821, "learning_rate": 3.79577047749066e-06, "loss": 0.3157, "step": 1162 }, { "epoch": 0.3471641791044776, "grad_norm": 0.7786230841490198, "learning_rate": 3.793702566041216e-06, "loss": 0.3587, "step": 1163 }, { "epoch": 0.34746268656716417, "grad_norm": 0.8720932882663056, "learning_rate": 3.7916334450114073e-06, "loss": 0.3514, "step": 1164 }, { "epoch": 0.34776119402985073, "grad_norm": 0.8359944457321459, "learning_rate": 3.7895631163358106e-06, "loss": 0.345, "step": 1165 }, { "epoch": 0.3480597014925373, "grad_norm": 0.8019229336175024, "learning_rate": 3.787491581950133e-06, "loss": 0.3482, "step": 1166 }, { "epoch": 0.3483582089552239, "grad_norm": 0.8820518367174046, "learning_rate": 3.7854188437912097e-06, "loss": 0.3733, "step": 1167 }, { "epoch": 0.34865671641791046, "grad_norm": 0.8403047783123814, "learning_rate": 3.783344903796999e-06, "loss": 0.3402, "step": 1168 }, { "epoch": 0.348955223880597, "grad_norm": 0.7415680720483135, "learning_rate": 3.7812697639065843e-06, "loss": 0.3043, "step": 1169 }, { "epoch": 0.3492537313432836, "grad_norm": 0.743125129225478, "learning_rate": 3.779193426060172e-06, "loss": 0.322, "step": 1170 }, { "epoch": 0.34955223880597014, "grad_norm": 0.8684344515884909, "learning_rate": 3.7771158921990865e-06, "loss": 0.4043, "step": 1171 }, { "epoch": 0.3498507462686567, "grad_norm": 0.8825012733228704, "learning_rate": 3.7750371642657722e-06, "loss": 0.3566, "step": 1172 }, { "epoch": 0.35014925373134326, "grad_norm": 0.8095109148739124, "learning_rate": 3.7729572442037877e-06, "loss": 0.3221, "step": 1173 }, { "epoch": 0.35044776119402987, "grad_norm": 0.8568019262787748, "learning_rate": 3.7708761339578082e-06, "loss": 0.3464, "step": 1174 }, { "epoch": 0.35074626865671643, "grad_norm": 0.8431627633291477, "learning_rate": 3.768793835473622e-06, "loss": 0.3341, "step": 1175 }, { "epoch": 0.351044776119403, "grad_norm": 0.8852857190511301, "learning_rate": 3.766710350698125e-06, "loss": 0.4152, "step": 1176 }, { "epoch": 0.35134328358208955, "grad_norm": 0.85299125587319, "learning_rate": 3.764625681579327e-06, "loss": 0.3475, "step": 1177 }, { "epoch": 0.3516417910447761, "grad_norm": 0.9703649850814429, "learning_rate": 3.762539830066343e-06, "loss": 0.3788, "step": 1178 }, { "epoch": 0.35194029850746267, "grad_norm": 0.8714521980948533, "learning_rate": 3.760452798109391e-06, "loss": 0.388, "step": 1179 }, { "epoch": 0.3522388059701492, "grad_norm": 0.7822951433929752, "learning_rate": 3.758364587659796e-06, "loss": 0.3682, "step": 1180 }, { "epoch": 0.35253731343283584, "grad_norm": 0.785928197254092, "learning_rate": 3.756275200669986e-06, "loss": 0.3522, "step": 1181 }, { "epoch": 0.3528358208955224, "grad_norm": 0.8654717177672067, "learning_rate": 3.754184639093484e-06, "loss": 0.3752, "step": 1182 }, { "epoch": 0.35313432835820896, "grad_norm": 0.8035897529969991, "learning_rate": 3.7520929048849154e-06, "loss": 0.3194, "step": 1183 }, { "epoch": 0.3534328358208955, "grad_norm": 0.8793098053746985, "learning_rate": 3.7500000000000005e-06, "loss": 0.3511, "step": 1184 }, { "epoch": 0.3537313432835821, "grad_norm": 0.9325267349782886, "learning_rate": 3.747905926395554e-06, "loss": 0.3763, "step": 1185 }, { "epoch": 0.35402985074626864, "grad_norm": 0.948428300474812, "learning_rate": 3.745810686029485e-06, "loss": 0.3896, "step": 1186 }, { "epoch": 0.3543283582089552, "grad_norm": 0.7393311008440809, "learning_rate": 3.743714280860791e-06, "loss": 0.3037, "step": 1187 }, { "epoch": 0.3546268656716418, "grad_norm": 0.8041532895009262, "learning_rate": 3.7416167128495596e-06, "loss": 0.3509, "step": 1188 }, { "epoch": 0.35492537313432837, "grad_norm": 0.7550348359422494, "learning_rate": 3.7395179839569656e-06, "loss": 0.3718, "step": 1189 }, { "epoch": 0.35522388059701493, "grad_norm": 0.8634479052711203, "learning_rate": 3.7374180961452704e-06, "loss": 0.355, "step": 1190 }, { "epoch": 0.3555223880597015, "grad_norm": 0.7576581623194972, "learning_rate": 3.735317051377818e-06, "loss": 0.3351, "step": 1191 }, { "epoch": 0.35582089552238805, "grad_norm": 0.8046261548713685, "learning_rate": 3.7332148516190327e-06, "loss": 0.358, "step": 1192 }, { "epoch": 0.3561194029850746, "grad_norm": 0.8406094996665042, "learning_rate": 3.731111498834421e-06, "loss": 0.4038, "step": 1193 }, { "epoch": 0.3564179104477612, "grad_norm": 0.7827745650131553, "learning_rate": 3.7290069949905665e-06, "loss": 0.317, "step": 1194 }, { "epoch": 0.3567164179104478, "grad_norm": 0.8857669556458677, "learning_rate": 3.7269013420551286e-06, "loss": 0.3502, "step": 1195 }, { "epoch": 0.35701492537313434, "grad_norm": 0.9284361811395723, "learning_rate": 3.7247945419968416e-06, "loss": 0.3782, "step": 1196 }, { "epoch": 0.3573134328358209, "grad_norm": 0.8341033368677571, "learning_rate": 3.722686596785513e-06, "loss": 0.3861, "step": 1197 }, { "epoch": 0.35761194029850746, "grad_norm": 0.9798340588994884, "learning_rate": 3.720577508392018e-06, "loss": 0.3544, "step": 1198 }, { "epoch": 0.357910447761194, "grad_norm": 0.831648222529357, "learning_rate": 3.7184672787883058e-06, "loss": 0.3828, "step": 1199 }, { "epoch": 0.3582089552238806, "grad_norm": 0.8402477136138807, "learning_rate": 3.7163559099473874e-06, "loss": 0.339, "step": 1200 }, { "epoch": 0.3585074626865672, "grad_norm": 0.7486776097613386, "learning_rate": 3.7142434038433415e-06, "loss": 0.3296, "step": 1201 }, { "epoch": 0.35880597014925375, "grad_norm": 0.8479281742963486, "learning_rate": 3.7121297624513108e-06, "loss": 0.3301, "step": 1202 }, { "epoch": 0.3591044776119403, "grad_norm": 0.9005107528987859, "learning_rate": 3.710014987747498e-06, "loss": 0.4088, "step": 1203 }, { "epoch": 0.35940298507462687, "grad_norm": 0.8059051563499257, "learning_rate": 3.707899081709165e-06, "loss": 0.3517, "step": 1204 }, { "epoch": 0.3597014925373134, "grad_norm": 0.8552050817452692, "learning_rate": 3.705782046314634e-06, "loss": 0.3445, "step": 1205 }, { "epoch": 0.36, "grad_norm": 0.9019044436471652, "learning_rate": 3.7036638835432803e-06, "loss": 0.3489, "step": 1206 }, { "epoch": 0.36029850746268655, "grad_norm": 0.843202812648593, "learning_rate": 3.7015445953755347e-06, "loss": 0.3417, "step": 1207 }, { "epoch": 0.36059701492537316, "grad_norm": 0.7443346275252014, "learning_rate": 3.6994241837928803e-06, "loss": 0.3425, "step": 1208 }, { "epoch": 0.3608955223880597, "grad_norm": 0.8737220707930936, "learning_rate": 3.6973026507778497e-06, "loss": 0.3388, "step": 1209 }, { "epoch": 0.3611940298507463, "grad_norm": 0.8319570014436627, "learning_rate": 3.695179998314026e-06, "loss": 0.3783, "step": 1210 }, { "epoch": 0.36149253731343284, "grad_norm": 1.0254313443105356, "learning_rate": 3.6930562283860356e-06, "loss": 0.3262, "step": 1211 }, { "epoch": 0.3617910447761194, "grad_norm": 0.8868819070775161, "learning_rate": 3.690931342979552e-06, "loss": 0.3693, "step": 1212 }, { "epoch": 0.36208955223880596, "grad_norm": 0.8512110198615549, "learning_rate": 3.6888053440812928e-06, "loss": 0.3534, "step": 1213 }, { "epoch": 0.3623880597014925, "grad_norm": 0.9606477539687734, "learning_rate": 3.6866782336790137e-06, "loss": 0.335, "step": 1214 }, { "epoch": 0.36268656716417913, "grad_norm": 0.8299706662665464, "learning_rate": 3.684550013761511e-06, "loss": 0.3069, "step": 1215 }, { "epoch": 0.3629850746268657, "grad_norm": 0.8583028517456109, "learning_rate": 3.6824206863186195e-06, "loss": 0.3534, "step": 1216 }, { "epoch": 0.36328358208955225, "grad_norm": 0.795689265721912, "learning_rate": 3.680290253341207e-06, "loss": 0.3217, "step": 1217 }, { "epoch": 0.3635820895522388, "grad_norm": 0.8155242986949494, "learning_rate": 3.6781587168211785e-06, "loss": 0.3139, "step": 1218 }, { "epoch": 0.36388059701492537, "grad_norm": 0.9284470749372372, "learning_rate": 3.676026078751466e-06, "loss": 0.3994, "step": 1219 }, { "epoch": 0.3641791044776119, "grad_norm": 0.7846735459117187, "learning_rate": 3.673892341126036e-06, "loss": 0.42, "step": 1220 }, { "epoch": 0.3644776119402985, "grad_norm": 0.7394437630997585, "learning_rate": 3.6717575059398818e-06, "loss": 0.3425, "step": 1221 }, { "epoch": 0.3647761194029851, "grad_norm": 0.8410128872441591, "learning_rate": 3.66962157518902e-06, "loss": 0.309, "step": 1222 }, { "epoch": 0.36507462686567166, "grad_norm": 0.7127446629430065, "learning_rate": 3.6674845508704954e-06, "loss": 0.3246, "step": 1223 }, { "epoch": 0.3653731343283582, "grad_norm": 0.8303226055174142, "learning_rate": 3.665346434982373e-06, "loss": 0.3568, "step": 1224 }, { "epoch": 0.3656716417910448, "grad_norm": 0.8759974317217685, "learning_rate": 3.6632072295237385e-06, "loss": 0.3725, "step": 1225 }, { "epoch": 0.36597014925373134, "grad_norm": 0.9100058785675172, "learning_rate": 3.6610669364946993e-06, "loss": 0.3733, "step": 1226 }, { "epoch": 0.3662686567164179, "grad_norm": 0.8872689350119385, "learning_rate": 3.6589255578963744e-06, "loss": 0.3834, "step": 1227 }, { "epoch": 0.36656716417910445, "grad_norm": 0.859809747889946, "learning_rate": 3.656783095730902e-06, "loss": 0.3207, "step": 1228 }, { "epoch": 0.36686567164179107, "grad_norm": 0.743367008555944, "learning_rate": 3.6546395520014324e-06, "loss": 0.3311, "step": 1229 }, { "epoch": 0.36716417910447763, "grad_norm": 0.8284032271379078, "learning_rate": 3.6524949287121247e-06, "loss": 0.3271, "step": 1230 }, { "epoch": 0.3674626865671642, "grad_norm": 0.9040431058647576, "learning_rate": 3.650349227868151e-06, "loss": 0.3805, "step": 1231 }, { "epoch": 0.36776119402985075, "grad_norm": 0.788723325583723, "learning_rate": 3.6482024514756905e-06, "loss": 0.3369, "step": 1232 }, { "epoch": 0.3680597014925373, "grad_norm": 0.8292299897136874, "learning_rate": 3.646054601541924e-06, "loss": 0.3526, "step": 1233 }, { "epoch": 0.36835820895522386, "grad_norm": 0.7780280218539215, "learning_rate": 3.6439056800750406e-06, "loss": 0.3404, "step": 1234 }, { "epoch": 0.3686567164179104, "grad_norm": 0.8640955670674403, "learning_rate": 3.641755689084229e-06, "loss": 0.359, "step": 1235 }, { "epoch": 0.36895522388059704, "grad_norm": 0.9296153051752855, "learning_rate": 3.6396046305796783e-06, "loss": 0.3492, "step": 1236 }, { "epoch": 0.3692537313432836, "grad_norm": 0.8539806741089728, "learning_rate": 3.6374525065725763e-06, "loss": 0.3732, "step": 1237 }, { "epoch": 0.36955223880597016, "grad_norm": 0.8791568970858674, "learning_rate": 3.635299319075106e-06, "loss": 0.3471, "step": 1238 }, { "epoch": 0.3698507462686567, "grad_norm": 0.8410283775191358, "learning_rate": 3.6331450701004444e-06, "loss": 0.3714, "step": 1239 }, { "epoch": 0.3701492537313433, "grad_norm": 0.8135682137125765, "learning_rate": 3.6309897616627644e-06, "loss": 0.3211, "step": 1240 }, { "epoch": 0.37044776119402983, "grad_norm": 1.0986621561108818, "learning_rate": 3.628833395777224e-06, "loss": 0.3671, "step": 1241 }, { "epoch": 0.3707462686567164, "grad_norm": 0.8336925612419427, "learning_rate": 3.626675974459974e-06, "loss": 0.3488, "step": 1242 }, { "epoch": 0.371044776119403, "grad_norm": 0.7967364289439731, "learning_rate": 3.624517499728151e-06, "loss": 0.3496, "step": 1243 }, { "epoch": 0.37134328358208957, "grad_norm": 0.8235464715233171, "learning_rate": 3.622357973599875e-06, "loss": 0.3435, "step": 1244 }, { "epoch": 0.3716417910447761, "grad_norm": 0.7670146106690454, "learning_rate": 3.6201973980942525e-06, "loss": 0.3054, "step": 1245 }, { "epoch": 0.3719402985074627, "grad_norm": 0.8127305346359484, "learning_rate": 3.618035775231367e-06, "loss": 0.337, "step": 1246 }, { "epoch": 0.37223880597014924, "grad_norm": 0.8739704252887466, "learning_rate": 3.6158731070322833e-06, "loss": 0.3904, "step": 1247 }, { "epoch": 0.3725373134328358, "grad_norm": 0.8589201227163017, "learning_rate": 3.613709395519045e-06, "loss": 0.3251, "step": 1248 }, { "epoch": 0.37283582089552236, "grad_norm": 1.0287866995944308, "learning_rate": 3.611544642714668e-06, "loss": 0.342, "step": 1249 }, { "epoch": 0.373134328358209, "grad_norm": 0.784684328253123, "learning_rate": 3.609378850643144e-06, "loss": 0.329, "step": 1250 }, { "epoch": 0.37343283582089554, "grad_norm": 0.9957577337951871, "learning_rate": 3.607212021329436e-06, "loss": 0.3318, "step": 1251 }, { "epoch": 0.3737313432835821, "grad_norm": 0.8067111270985596, "learning_rate": 3.6050441567994766e-06, "loss": 0.291, "step": 1252 }, { "epoch": 0.37402985074626866, "grad_norm": 0.814611603363173, "learning_rate": 3.6028752590801653e-06, "loss": 0.3724, "step": 1253 }, { "epoch": 0.3743283582089552, "grad_norm": 0.8212971184460834, "learning_rate": 3.60070533019937e-06, "loss": 0.3413, "step": 1254 }, { "epoch": 0.3746268656716418, "grad_norm": 0.8492409029801361, "learning_rate": 3.5985343721859205e-06, "loss": 0.4186, "step": 1255 }, { "epoch": 0.37492537313432833, "grad_norm": 0.8293206499269064, "learning_rate": 3.59636238706961e-06, "loss": 0.3273, "step": 1256 }, { "epoch": 0.37522388059701495, "grad_norm": 0.7439436947261291, "learning_rate": 3.5941893768811915e-06, "loss": 0.3171, "step": 1257 }, { "epoch": 0.3755223880597015, "grad_norm": 0.8417772206584229, "learning_rate": 3.5920153436523762e-06, "loss": 0.3727, "step": 1258 }, { "epoch": 0.37582089552238807, "grad_norm": 0.7863852090186266, "learning_rate": 3.589840289415833e-06, "loss": 0.3473, "step": 1259 }, { "epoch": 0.3761194029850746, "grad_norm": 1.2126822142651668, "learning_rate": 3.5876642162051833e-06, "loss": 0.3347, "step": 1260 }, { "epoch": 0.3764179104477612, "grad_norm": 0.8029602835003353, "learning_rate": 3.585487126055004e-06, "loss": 0.3459, "step": 1261 }, { "epoch": 0.37671641791044774, "grad_norm": 0.9206998569587098, "learning_rate": 3.5833090210008204e-06, "loss": 0.3634, "step": 1262 }, { "epoch": 0.3770149253731343, "grad_norm": 0.8641555534787082, "learning_rate": 3.5811299030791074e-06, "loss": 0.361, "step": 1263 }, { "epoch": 0.3773134328358209, "grad_norm": 0.7872304084887846, "learning_rate": 3.578949774327288e-06, "loss": 0.3268, "step": 1264 }, { "epoch": 0.3776119402985075, "grad_norm": 0.8680512566886363, "learning_rate": 3.5767686367837295e-06, "loss": 0.3436, "step": 1265 }, { "epoch": 0.37791044776119403, "grad_norm": 0.826653018279734, "learning_rate": 3.5745864924877412e-06, "loss": 0.3587, "step": 1266 }, { "epoch": 0.3782089552238806, "grad_norm": 0.8881579540530804, "learning_rate": 3.572403343479576e-06, "loss": 0.3683, "step": 1267 }, { "epoch": 0.37850746268656715, "grad_norm": 0.811695971069644, "learning_rate": 3.570219191800424e-06, "loss": 0.3417, "step": 1268 }, { "epoch": 0.3788059701492537, "grad_norm": 0.7952960741104704, "learning_rate": 3.5680340394924144e-06, "loss": 0.366, "step": 1269 }, { "epoch": 0.37910447761194027, "grad_norm": 0.812111528734765, "learning_rate": 3.565847888598612e-06, "loss": 0.3031, "step": 1270 }, { "epoch": 0.3794029850746269, "grad_norm": 0.8238499604306999, "learning_rate": 3.5636607411630133e-06, "loss": 0.3639, "step": 1271 }, { "epoch": 0.37970149253731345, "grad_norm": 1.2980492858910848, "learning_rate": 3.5614725992305487e-06, "loss": 0.3809, "step": 1272 }, { "epoch": 0.38, "grad_norm": 0.7995909221221797, "learning_rate": 3.5592834648470763e-06, "loss": 0.3372, "step": 1273 }, { "epoch": 0.38029850746268656, "grad_norm": 0.8051516225042442, "learning_rate": 3.557093340059385e-06, "loss": 0.3444, "step": 1274 }, { "epoch": 0.3805970149253731, "grad_norm": 0.7977178460505244, "learning_rate": 3.5549022269151876e-06, "loss": 0.3087, "step": 1275 }, { "epoch": 0.3808955223880597, "grad_norm": 0.8617753129014368, "learning_rate": 3.552710127463121e-06, "loss": 0.399, "step": 1276 }, { "epoch": 0.38119402985074624, "grad_norm": 0.8085602277043268, "learning_rate": 3.550517043752745e-06, "loss": 0.3762, "step": 1277 }, { "epoch": 0.38149253731343286, "grad_norm": 0.8125284069150092, "learning_rate": 3.5483229778345403e-06, "loss": 0.3532, "step": 1278 }, { "epoch": 0.3817910447761194, "grad_norm": 0.7339040692073175, "learning_rate": 3.546127931759903e-06, "loss": 0.3073, "step": 1279 }, { "epoch": 0.382089552238806, "grad_norm": 0.7778739951423395, "learning_rate": 3.5439319075811496e-06, "loss": 0.3276, "step": 1280 }, { "epoch": 0.38238805970149253, "grad_norm": 0.7892393982929026, "learning_rate": 3.5417349073515085e-06, "loss": 0.3358, "step": 1281 }, { "epoch": 0.3826865671641791, "grad_norm": 0.8241466550279929, "learning_rate": 3.5395369331251205e-06, "loss": 0.3427, "step": 1282 }, { "epoch": 0.38298507462686565, "grad_norm": 0.8892819954376281, "learning_rate": 3.53733798695704e-06, "loss": 0.3166, "step": 1283 }, { "epoch": 0.38328358208955227, "grad_norm": 0.8269062884932116, "learning_rate": 3.5351380709032265e-06, "loss": 0.3722, "step": 1284 }, { "epoch": 0.3835820895522388, "grad_norm": 0.905649324964948, "learning_rate": 3.5329371870205477e-06, "loss": 0.3142, "step": 1285 }, { "epoch": 0.3838805970149254, "grad_norm": 0.8106636299372714, "learning_rate": 3.5307353373667772e-06, "loss": 0.3387, "step": 1286 }, { "epoch": 0.38417910447761194, "grad_norm": 0.811430184665767, "learning_rate": 3.528532524000591e-06, "loss": 0.3286, "step": 1287 }, { "epoch": 0.3844776119402985, "grad_norm": 0.8218271629980521, "learning_rate": 3.5263287489815643e-06, "loss": 0.3835, "step": 1288 }, { "epoch": 0.38477611940298506, "grad_norm": 0.816710352160504, "learning_rate": 3.524124014370175e-06, "loss": 0.3582, "step": 1289 }, { "epoch": 0.3850746268656716, "grad_norm": 0.7305897895271865, "learning_rate": 3.5219183222277954e-06, "loss": 0.3257, "step": 1290 }, { "epoch": 0.38537313432835824, "grad_norm": 0.7546275525313664, "learning_rate": 3.519711674616694e-06, "loss": 0.3344, "step": 1291 }, { "epoch": 0.3856716417910448, "grad_norm": 0.8631937455194517, "learning_rate": 3.517504073600031e-06, "loss": 0.351, "step": 1292 }, { "epoch": 0.38597014925373135, "grad_norm": 0.8204252773546967, "learning_rate": 3.5152955212418616e-06, "loss": 0.2986, "step": 1293 }, { "epoch": 0.3862686567164179, "grad_norm": 0.8154202257697561, "learning_rate": 3.5130860196071283e-06, "loss": 0.3418, "step": 1294 }, { "epoch": 0.38656716417910447, "grad_norm": 0.8526837643437616, "learning_rate": 3.51087557076166e-06, "loss": 0.3821, "step": 1295 }, { "epoch": 0.38686567164179103, "grad_norm": 0.8428109150390479, "learning_rate": 3.508664176772173e-06, "loss": 0.3234, "step": 1296 }, { "epoch": 0.3871641791044776, "grad_norm": 0.8008585623503791, "learning_rate": 3.506451839706268e-06, "loss": 0.3399, "step": 1297 }, { "epoch": 0.3874626865671642, "grad_norm": 0.8269646097065997, "learning_rate": 3.5042385616324243e-06, "loss": 0.3505, "step": 1298 }, { "epoch": 0.38776119402985076, "grad_norm": 0.7972302337986306, "learning_rate": 3.5020243446200034e-06, "loss": 0.3302, "step": 1299 }, { "epoch": 0.3880597014925373, "grad_norm": 0.8833042412930123, "learning_rate": 3.4998091907392463e-06, "loss": 0.3793, "step": 1300 }, { "epoch": 0.3883582089552239, "grad_norm": 0.9850943543964298, "learning_rate": 3.497593102061264e-06, "loss": 0.3532, "step": 1301 }, { "epoch": 0.38865671641791044, "grad_norm": 0.9676051683553516, "learning_rate": 3.4953760806580494e-06, "loss": 0.3635, "step": 1302 }, { "epoch": 0.388955223880597, "grad_norm": 0.8090901217456243, "learning_rate": 3.4931581286024607e-06, "loss": 0.2871, "step": 1303 }, { "epoch": 0.38925373134328356, "grad_norm": 0.8348404220329789, "learning_rate": 3.4909392479682303e-06, "loss": 0.3315, "step": 1304 }, { "epoch": 0.3895522388059702, "grad_norm": 0.7991008815236104, "learning_rate": 3.488719440829958e-06, "loss": 0.36, "step": 1305 }, { "epoch": 0.38985074626865673, "grad_norm": 0.8438228763160631, "learning_rate": 3.4864987092631074e-06, "loss": 0.3869, "step": 1306 }, { "epoch": 0.3901492537313433, "grad_norm": 0.8489110544841167, "learning_rate": 3.4842770553440103e-06, "loss": 0.3675, "step": 1307 }, { "epoch": 0.39044776119402985, "grad_norm": 0.7917206514304985, "learning_rate": 3.4820544811498584e-06, "loss": 0.3385, "step": 1308 }, { "epoch": 0.3907462686567164, "grad_norm": 1.0183165833490218, "learning_rate": 3.479830988758704e-06, "loss": 0.351, "step": 1309 }, { "epoch": 0.39104477611940297, "grad_norm": 0.7666673934748648, "learning_rate": 3.4776065802494585e-06, "loss": 0.3123, "step": 1310 }, { "epoch": 0.39134328358208953, "grad_norm": 0.8481681061835175, "learning_rate": 3.47538125770189e-06, "loss": 0.3304, "step": 1311 }, { "epoch": 0.39164179104477614, "grad_norm": 0.8626463063388562, "learning_rate": 3.4731550231966193e-06, "loss": 0.3429, "step": 1312 }, { "epoch": 0.3919402985074627, "grad_norm": 0.7994976728058045, "learning_rate": 3.470927878815124e-06, "loss": 0.3368, "step": 1313 }, { "epoch": 0.39223880597014926, "grad_norm": 0.8542479136676837, "learning_rate": 3.4686998266397275e-06, "loss": 0.3284, "step": 1314 }, { "epoch": 0.3925373134328358, "grad_norm": 0.8028452068951644, "learning_rate": 3.466470868753606e-06, "loss": 0.3699, "step": 1315 }, { "epoch": 0.3928358208955224, "grad_norm": 0.8192750229513925, "learning_rate": 3.4642410072407797e-06, "loss": 0.3374, "step": 1316 }, { "epoch": 0.39313432835820894, "grad_norm": 0.7911937120174724, "learning_rate": 3.4620102441861147e-06, "loss": 0.3068, "step": 1317 }, { "epoch": 0.3934328358208955, "grad_norm": 0.7856414622293901, "learning_rate": 3.4597785816753203e-06, "loss": 0.3815, "step": 1318 }, { "epoch": 0.3937313432835821, "grad_norm": 0.8719965350651078, "learning_rate": 3.4575460217949475e-06, "loss": 0.3249, "step": 1319 }, { "epoch": 0.3940298507462687, "grad_norm": 0.8038659900828976, "learning_rate": 3.4553125666323828e-06, "loss": 0.3609, "step": 1320 }, { "epoch": 0.39432835820895523, "grad_norm": 0.8437738135633144, "learning_rate": 3.453078218275856e-06, "loss": 0.3547, "step": 1321 }, { "epoch": 0.3946268656716418, "grad_norm": 0.8893980819638573, "learning_rate": 3.4508429788144255e-06, "loss": 0.3835, "step": 1322 }, { "epoch": 0.39492537313432835, "grad_norm": 0.7966994623889064, "learning_rate": 3.4486068503379864e-06, "loss": 0.3278, "step": 1323 }, { "epoch": 0.3952238805970149, "grad_norm": 0.8837643559806236, "learning_rate": 3.4463698349372655e-06, "loss": 0.4, "step": 1324 }, { "epoch": 0.39552238805970147, "grad_norm": 0.8420254919427764, "learning_rate": 3.444131934703816e-06, "loss": 0.3145, "step": 1325 }, { "epoch": 0.3958208955223881, "grad_norm": 0.8412477035174881, "learning_rate": 3.4418931517300207e-06, "loss": 0.3318, "step": 1326 }, { "epoch": 0.39611940298507464, "grad_norm": 0.7378862322828338, "learning_rate": 3.4396534881090882e-06, "loss": 0.3295, "step": 1327 }, { "epoch": 0.3964179104477612, "grad_norm": 0.814528536394005, "learning_rate": 3.437412945935047e-06, "loss": 0.3715, "step": 1328 }, { "epoch": 0.39671641791044776, "grad_norm": 0.8472682635407821, "learning_rate": 3.435171527302752e-06, "loss": 0.3495, "step": 1329 }, { "epoch": 0.3970149253731343, "grad_norm": 0.7719473237614821, "learning_rate": 3.4329292343078733e-06, "loss": 0.3388, "step": 1330 }, { "epoch": 0.3973134328358209, "grad_norm": 0.8467959201969155, "learning_rate": 3.430686069046901e-06, "loss": 0.3357, "step": 1331 }, { "epoch": 0.39761194029850744, "grad_norm": 0.8099659593829108, "learning_rate": 3.4284420336171393e-06, "loss": 0.3495, "step": 1332 }, { "epoch": 0.39791044776119405, "grad_norm": 0.8144878750567746, "learning_rate": 3.426197130116707e-06, "loss": 0.3371, "step": 1333 }, { "epoch": 0.3982089552238806, "grad_norm": 0.7434099123184534, "learning_rate": 3.423951360644534e-06, "loss": 0.2841, "step": 1334 }, { "epoch": 0.39850746268656717, "grad_norm": 0.8453291988577794, "learning_rate": 3.4217047273003605e-06, "loss": 0.3219, "step": 1335 }, { "epoch": 0.39880597014925373, "grad_norm": 0.9447504107148086, "learning_rate": 3.4194572321847336e-06, "loss": 0.3831, "step": 1336 }, { "epoch": 0.3991044776119403, "grad_norm": 0.8778360194938137, "learning_rate": 3.417208877399006e-06, "loss": 0.363, "step": 1337 }, { "epoch": 0.39940298507462685, "grad_norm": 0.858110018415135, "learning_rate": 3.4149596650453354e-06, "loss": 0.345, "step": 1338 }, { "epoch": 0.3997014925373134, "grad_norm": 0.8216385808533025, "learning_rate": 3.4127095972266795e-06, "loss": 0.4099, "step": 1339 }, { "epoch": 0.4, "grad_norm": 0.8344309431851922, "learning_rate": 3.4104586760467984e-06, "loss": 0.3873, "step": 1340 }, { "epoch": 0.4002985074626866, "grad_norm": 0.8372552783746257, "learning_rate": 3.408206903610247e-06, "loss": 0.3612, "step": 1341 }, { "epoch": 0.40059701492537314, "grad_norm": 0.8372600966982839, "learning_rate": 3.4059542820223782e-06, "loss": 0.3692, "step": 1342 }, { "epoch": 0.4008955223880597, "grad_norm": 0.9587410316998514, "learning_rate": 3.4037008133893395e-06, "loss": 0.3882, "step": 1343 }, { "epoch": 0.40119402985074626, "grad_norm": 0.7735563287004625, "learning_rate": 3.4014464998180673e-06, "loss": 0.3486, "step": 1344 }, { "epoch": 0.4014925373134328, "grad_norm": 0.7836991921229477, "learning_rate": 3.3991913434162905e-06, "loss": 0.3254, "step": 1345 }, { "epoch": 0.4017910447761194, "grad_norm": 0.7627893713273461, "learning_rate": 3.396935346292526e-06, "loss": 0.3461, "step": 1346 }, { "epoch": 0.402089552238806, "grad_norm": 0.8945538621615577, "learning_rate": 3.3946785105560742e-06, "loss": 0.312, "step": 1347 }, { "epoch": 0.40238805970149255, "grad_norm": 0.8301080381657066, "learning_rate": 3.3924208383170244e-06, "loss": 0.384, "step": 1348 }, { "epoch": 0.4026865671641791, "grad_norm": 0.8441180205827015, "learning_rate": 3.3901623316862424e-06, "loss": 0.3237, "step": 1349 }, { "epoch": 0.40298507462686567, "grad_norm": 0.917883619027885, "learning_rate": 3.3879029927753782e-06, "loss": 0.3451, "step": 1350 }, { "epoch": 0.40328358208955223, "grad_norm": 0.7676285335284508, "learning_rate": 3.3856428236968593e-06, "loss": 0.3348, "step": 1351 }, { "epoch": 0.4035820895522388, "grad_norm": 0.7694667977006615, "learning_rate": 3.3833818265638868e-06, "loss": 0.3202, "step": 1352 }, { "epoch": 0.40388059701492535, "grad_norm": 0.7859618711908494, "learning_rate": 3.3811200034904392e-06, "loss": 0.3447, "step": 1353 }, { "epoch": 0.40417910447761196, "grad_norm": 0.7857504939863141, "learning_rate": 3.3788573565912665e-06, "loss": 0.2722, "step": 1354 }, { "epoch": 0.4044776119402985, "grad_norm": 0.782826471359172, "learning_rate": 3.3765938879818865e-06, "loss": 0.3318, "step": 1355 }, { "epoch": 0.4047761194029851, "grad_norm": 0.855706197844801, "learning_rate": 3.3743295997785884e-06, "loss": 0.3204, "step": 1356 }, { "epoch": 0.40507462686567164, "grad_norm": 0.8343684499136247, "learning_rate": 3.372064494098427e-06, "loss": 0.3173, "step": 1357 }, { "epoch": 0.4053731343283582, "grad_norm": 0.8369262504159325, "learning_rate": 3.3697985730592187e-06, "loss": 0.3339, "step": 1358 }, { "epoch": 0.40567164179104476, "grad_norm": 0.7799099543460535, "learning_rate": 3.3675318387795473e-06, "loss": 0.3423, "step": 1359 }, { "epoch": 0.4059701492537313, "grad_norm": 0.8648556337576385, "learning_rate": 3.3652642933787526e-06, "loss": 0.3493, "step": 1360 }, { "epoch": 0.40626865671641793, "grad_norm": 0.8204663128828968, "learning_rate": 3.362995938976934e-06, "loss": 0.3317, "step": 1361 }, { "epoch": 0.4065671641791045, "grad_norm": 0.792102559533383, "learning_rate": 3.3607267776949485e-06, "loss": 0.3639, "step": 1362 }, { "epoch": 0.40686567164179105, "grad_norm": 0.8308207742246098, "learning_rate": 3.358456811654406e-06, "loss": 0.3679, "step": 1363 }, { "epoch": 0.4071641791044776, "grad_norm": 0.8656195971316198, "learning_rate": 3.35618604297767e-06, "loss": 0.3797, "step": 1364 }, { "epoch": 0.40746268656716417, "grad_norm": 0.8006170230930989, "learning_rate": 3.3539144737878525e-06, "loss": 0.3471, "step": 1365 }, { "epoch": 0.4077611940298507, "grad_norm": 0.8502026092595737, "learning_rate": 3.351642106208816e-06, "loss": 0.3814, "step": 1366 }, { "epoch": 0.40805970149253734, "grad_norm": 0.7775186619424316, "learning_rate": 3.3493689423651697e-06, "loss": 0.3086, "step": 1367 }, { "epoch": 0.4083582089552239, "grad_norm": 0.798430336568009, "learning_rate": 3.3470949843822657e-06, "loss": 0.3406, "step": 1368 }, { "epoch": 0.40865671641791046, "grad_norm": 0.8053325058264433, "learning_rate": 3.3448202343861992e-06, "loss": 0.3458, "step": 1369 }, { "epoch": 0.408955223880597, "grad_norm": 0.8343334939840257, "learning_rate": 3.3425446945038058e-06, "loss": 0.337, "step": 1370 }, { "epoch": 0.4092537313432836, "grad_norm": 0.8191029497223187, "learning_rate": 3.34026836686266e-06, "loss": 0.3625, "step": 1371 }, { "epoch": 0.40955223880597014, "grad_norm": 0.8618543242740297, "learning_rate": 3.337991253591073e-06, "loss": 0.3382, "step": 1372 }, { "epoch": 0.4098507462686567, "grad_norm": 0.7548489326509085, "learning_rate": 3.3357133568180905e-06, "loss": 0.3296, "step": 1373 }, { "epoch": 0.4101492537313433, "grad_norm": 0.842491815374688, "learning_rate": 3.3334346786734894e-06, "loss": 0.3493, "step": 1374 }, { "epoch": 0.41044776119402987, "grad_norm": 0.8011968378789714, "learning_rate": 3.3311552212877787e-06, "loss": 0.3359, "step": 1375 }, { "epoch": 0.41074626865671643, "grad_norm": 0.8683971005840104, "learning_rate": 3.3288749867921953e-06, "loss": 0.3896, "step": 1376 }, { "epoch": 0.411044776119403, "grad_norm": 0.8853597587848415, "learning_rate": 3.3265939773187026e-06, "loss": 0.31, "step": 1377 }, { "epoch": 0.41134328358208955, "grad_norm": 0.8397333555979059, "learning_rate": 3.3243121949999906e-06, "loss": 0.3478, "step": 1378 }, { "epoch": 0.4116417910447761, "grad_norm": 0.7964931329993589, "learning_rate": 3.3220296419694686e-06, "loss": 0.2821, "step": 1379 }, { "epoch": 0.41194029850746267, "grad_norm": 0.8116358539123523, "learning_rate": 3.319746320361268e-06, "loss": 0.3519, "step": 1380 }, { "epoch": 0.4122388059701493, "grad_norm": 1.417004497957898, "learning_rate": 3.3174622323102396e-06, "loss": 0.3152, "step": 1381 }, { "epoch": 0.41253731343283584, "grad_norm": 0.7731890086167517, "learning_rate": 3.3151773799519492e-06, "loss": 0.3873, "step": 1382 }, { "epoch": 0.4128358208955224, "grad_norm": 0.7984276296697358, "learning_rate": 3.3128917654226794e-06, "loss": 0.3897, "step": 1383 }, { "epoch": 0.41313432835820896, "grad_norm": 0.8349251849409903, "learning_rate": 3.310605390859422e-06, "loss": 0.3516, "step": 1384 }, { "epoch": 0.4134328358208955, "grad_norm": 0.8765686889024715, "learning_rate": 3.3083182583998835e-06, "loss": 0.3757, "step": 1385 }, { "epoch": 0.4137313432835821, "grad_norm": 0.9054669051528854, "learning_rate": 3.3060303701824763e-06, "loss": 0.3599, "step": 1386 }, { "epoch": 0.41402985074626864, "grad_norm": 0.7717625673330878, "learning_rate": 3.303741728346319e-06, "loss": 0.3221, "step": 1387 }, { "epoch": 0.41432835820895525, "grad_norm": 0.7734383019569172, "learning_rate": 3.301452335031238e-06, "loss": 0.3417, "step": 1388 }, { "epoch": 0.4146268656716418, "grad_norm": 0.9879867361611887, "learning_rate": 3.299162192377759e-06, "loss": 0.4098, "step": 1389 }, { "epoch": 0.41492537313432837, "grad_norm": 0.8150464123862557, "learning_rate": 3.2968713025271095e-06, "loss": 0.3322, "step": 1390 }, { "epoch": 0.4152238805970149, "grad_norm": 0.8573334079386694, "learning_rate": 3.2945796676212155e-06, "loss": 0.2965, "step": 1391 }, { "epoch": 0.4155223880597015, "grad_norm": 0.9215312294103535, "learning_rate": 3.2922872898027007e-06, "loss": 0.3696, "step": 1392 }, { "epoch": 0.41582089552238805, "grad_norm": 0.8431748611402596, "learning_rate": 3.289994171214882e-06, "loss": 0.3596, "step": 1393 }, { "epoch": 0.4161194029850746, "grad_norm": 0.9100190938713786, "learning_rate": 3.287700314001769e-06, "loss": 0.3549, "step": 1394 }, { "epoch": 0.4164179104477612, "grad_norm": 0.7828037598477029, "learning_rate": 3.2854057203080624e-06, "loss": 0.3741, "step": 1395 }, { "epoch": 0.4167164179104478, "grad_norm": 0.8491175801452148, "learning_rate": 3.283110392279152e-06, "loss": 0.3451, "step": 1396 }, { "epoch": 0.41701492537313434, "grad_norm": 0.8220914334757844, "learning_rate": 3.2808143320611137e-06, "loss": 0.349, "step": 1397 }, { "epoch": 0.4173134328358209, "grad_norm": 0.7625729166024875, "learning_rate": 3.2785175418007066e-06, "loss": 0.3334, "step": 1398 }, { "epoch": 0.41761194029850746, "grad_norm": 0.7835308374457285, "learning_rate": 3.276220023645374e-06, "loss": 0.3221, "step": 1399 }, { "epoch": 0.417910447761194, "grad_norm": 0.8199114774599374, "learning_rate": 3.2739217797432405e-06, "loss": 0.3238, "step": 1400 }, { "epoch": 0.4182089552238806, "grad_norm": 0.7932003246008847, "learning_rate": 3.2716228122431072e-06, "loss": 0.3066, "step": 1401 }, { "epoch": 0.4185074626865672, "grad_norm": 0.8150947385287399, "learning_rate": 3.2693231232944527e-06, "loss": 0.3184, "step": 1402 }, { "epoch": 0.41880597014925375, "grad_norm": 0.9738953531287554, "learning_rate": 3.2670227150474298e-06, "loss": 0.3689, "step": 1403 }, { "epoch": 0.4191044776119403, "grad_norm": 0.9195470631640194, "learning_rate": 3.2647215896528643e-06, "loss": 0.3668, "step": 1404 }, { "epoch": 0.41940298507462687, "grad_norm": 0.8600925044550167, "learning_rate": 3.262419749262254e-06, "loss": 0.3313, "step": 1405 }, { "epoch": 0.4197014925373134, "grad_norm": 0.7587232892677159, "learning_rate": 3.260117196027761e-06, "loss": 0.3819, "step": 1406 }, { "epoch": 0.42, "grad_norm": 0.822014542644426, "learning_rate": 3.2578139321022175e-06, "loss": 0.335, "step": 1407 }, { "epoch": 0.42029850746268654, "grad_norm": 0.9420250400800545, "learning_rate": 3.25550995963912e-06, "loss": 0.3181, "step": 1408 }, { "epoch": 0.42059701492537316, "grad_norm": 0.7516372540343567, "learning_rate": 3.253205280792625e-06, "loss": 0.2851, "step": 1409 }, { "epoch": 0.4208955223880597, "grad_norm": 0.7948134058388104, "learning_rate": 3.250899897717552e-06, "loss": 0.3454, "step": 1410 }, { "epoch": 0.4211940298507463, "grad_norm": 0.7240159536362353, "learning_rate": 3.248593812569379e-06, "loss": 0.3136, "step": 1411 }, { "epoch": 0.42149253731343284, "grad_norm": 0.759842065598249, "learning_rate": 3.246287027504237e-06, "loss": 0.3287, "step": 1412 }, { "epoch": 0.4217910447761194, "grad_norm": 0.790615746686793, "learning_rate": 3.2439795446789152e-06, "loss": 0.3589, "step": 1413 }, { "epoch": 0.42208955223880595, "grad_norm": 0.8340970220469927, "learning_rate": 3.241671366250854e-06, "loss": 0.3562, "step": 1414 }, { "epoch": 0.4223880597014925, "grad_norm": 0.8939868975796444, "learning_rate": 3.2393624943781426e-06, "loss": 0.3438, "step": 1415 }, { "epoch": 0.42268656716417913, "grad_norm": 0.8450509789557636, "learning_rate": 3.2370529312195225e-06, "loss": 0.3233, "step": 1416 }, { "epoch": 0.4229850746268657, "grad_norm": 0.8150254784866996, "learning_rate": 3.2347426789343766e-06, "loss": 0.3523, "step": 1417 }, { "epoch": 0.42328358208955225, "grad_norm": 1.0717236306406788, "learning_rate": 3.2324317396827355e-06, "loss": 0.3495, "step": 1418 }, { "epoch": 0.4235820895522388, "grad_norm": 0.7615376345185246, "learning_rate": 3.2301201156252704e-06, "loss": 0.3615, "step": 1419 }, { "epoch": 0.42388059701492536, "grad_norm": 0.8626725672132948, "learning_rate": 3.2278078089232945e-06, "loss": 0.359, "step": 1420 }, { "epoch": 0.4241791044776119, "grad_norm": 0.7597752656447071, "learning_rate": 3.2254948217387576e-06, "loss": 0.3437, "step": 1421 }, { "epoch": 0.4244776119402985, "grad_norm": 0.8020767792341673, "learning_rate": 3.223181156234246e-06, "loss": 0.3578, "step": 1422 }, { "epoch": 0.4247761194029851, "grad_norm": 0.7733888823557834, "learning_rate": 3.2208668145729806e-06, "loss": 0.3439, "step": 1423 }, { "epoch": 0.42507462686567166, "grad_norm": 0.8786679302972779, "learning_rate": 3.2185517989188154e-06, "loss": 0.3349, "step": 1424 }, { "epoch": 0.4253731343283582, "grad_norm": 0.7638976687742035, "learning_rate": 3.216236111436233e-06, "loss": 0.3331, "step": 1425 }, { "epoch": 0.4256716417910448, "grad_norm": 0.7840962222569484, "learning_rate": 3.213919754290343e-06, "loss": 0.354, "step": 1426 }, { "epoch": 0.42597014925373133, "grad_norm": 0.8613687089983829, "learning_rate": 3.2116027296468866e-06, "loss": 0.3771, "step": 1427 }, { "epoch": 0.4262686567164179, "grad_norm": 0.7666875547356334, "learning_rate": 3.2092850396722227e-06, "loss": 0.3428, "step": 1428 }, { "epoch": 0.42656716417910445, "grad_norm": 0.8148557834884871, "learning_rate": 3.2069666865333356e-06, "loss": 0.3541, "step": 1429 }, { "epoch": 0.42686567164179107, "grad_norm": 0.7914353848681119, "learning_rate": 3.204647672397829e-06, "loss": 0.3338, "step": 1430 }, { "epoch": 0.4271641791044776, "grad_norm": 0.8843259464989137, "learning_rate": 3.2023279994339242e-06, "loss": 0.3511, "step": 1431 }, { "epoch": 0.4274626865671642, "grad_norm": 0.788441005552561, "learning_rate": 3.2000076698104585e-06, "loss": 0.359, "step": 1432 }, { "epoch": 0.42776119402985074, "grad_norm": 0.8244525477083475, "learning_rate": 3.197686685696885e-06, "loss": 0.3729, "step": 1433 }, { "epoch": 0.4280597014925373, "grad_norm": 0.8468029355697649, "learning_rate": 3.1953650492632664e-06, "loss": 0.3583, "step": 1434 }, { "epoch": 0.42835820895522386, "grad_norm": 0.7871095643951108, "learning_rate": 3.193042762680277e-06, "loss": 0.3595, "step": 1435 }, { "epoch": 0.4286567164179104, "grad_norm": 0.8360573541165367, "learning_rate": 3.1907198281191963e-06, "loss": 0.3374, "step": 1436 }, { "epoch": 0.42895522388059704, "grad_norm": 0.8275698735469698, "learning_rate": 3.1883962477519136e-06, "loss": 0.3247, "step": 1437 }, { "epoch": 0.4292537313432836, "grad_norm": 0.7431117381085794, "learning_rate": 3.1860720237509186e-06, "loss": 0.3186, "step": 1438 }, { "epoch": 0.42955223880597015, "grad_norm": 0.8236400271008097, "learning_rate": 3.1837471582893044e-06, "loss": 0.4015, "step": 1439 }, { "epoch": 0.4298507462686567, "grad_norm": 0.7284531636328365, "learning_rate": 3.181421653540764e-06, "loss": 0.3023, "step": 1440 }, { "epoch": 0.4301492537313433, "grad_norm": 0.8203967087943976, "learning_rate": 3.1790955116795865e-06, "loss": 0.3109, "step": 1441 }, { "epoch": 0.43044776119402983, "grad_norm": 0.8560428452075539, "learning_rate": 3.176768734880658e-06, "loss": 0.3423, "step": 1442 }, { "epoch": 0.4307462686567164, "grad_norm": 0.9450551948814531, "learning_rate": 3.17444132531946e-06, "loss": 0.3602, "step": 1443 }, { "epoch": 0.431044776119403, "grad_norm": 0.7426532649143752, "learning_rate": 3.1721132851720615e-06, "loss": 0.3561, "step": 1444 }, { "epoch": 0.43134328358208956, "grad_norm": 0.8374537611379493, "learning_rate": 3.169784616615125e-06, "loss": 0.3457, "step": 1445 }, { "epoch": 0.4316417910447761, "grad_norm": 0.7637827169896023, "learning_rate": 3.1674553218258976e-06, "loss": 0.2929, "step": 1446 }, { "epoch": 0.4319402985074627, "grad_norm": 0.8669603581918557, "learning_rate": 3.1651254029822126e-06, "loss": 0.3499, "step": 1447 }, { "epoch": 0.43223880597014924, "grad_norm": 0.8055692147761853, "learning_rate": 3.1627948622624894e-06, "loss": 0.3469, "step": 1448 }, { "epoch": 0.4325373134328358, "grad_norm": 1.081434396687064, "learning_rate": 3.160463701845725e-06, "loss": 0.4056, "step": 1449 }, { "epoch": 0.43283582089552236, "grad_norm": 0.866468982049259, "learning_rate": 3.1581319239114983e-06, "loss": 0.3664, "step": 1450 }, { "epoch": 0.433134328358209, "grad_norm": 0.7980338117580451, "learning_rate": 3.1557995306399657e-06, "loss": 0.3377, "step": 1451 }, { "epoch": 0.43343283582089553, "grad_norm": 0.9044155994386817, "learning_rate": 3.1534665242118557e-06, "loss": 0.3559, "step": 1452 }, { "epoch": 0.4337313432835821, "grad_norm": 0.8274492131309824, "learning_rate": 3.151132906808474e-06, "loss": 0.2981, "step": 1453 }, { "epoch": 0.43402985074626865, "grad_norm": 1.1457765974493999, "learning_rate": 3.1487986806116964e-06, "loss": 0.3406, "step": 1454 }, { "epoch": 0.4343283582089552, "grad_norm": 0.7341840039215745, "learning_rate": 3.1464638478039665e-06, "loss": 0.3597, "step": 1455 }, { "epoch": 0.43462686567164177, "grad_norm": 0.8382632632518243, "learning_rate": 3.1441284105682973e-06, "loss": 0.3517, "step": 1456 }, { "epoch": 0.4349253731343284, "grad_norm": 0.8205128080490478, "learning_rate": 3.1417923710882643e-06, "loss": 0.3274, "step": 1457 }, { "epoch": 0.43522388059701494, "grad_norm": 0.8607995458918247, "learning_rate": 3.1394557315480077e-06, "loss": 0.3733, "step": 1458 }, { "epoch": 0.4355223880597015, "grad_norm": 0.9177467111453076, "learning_rate": 3.13711849413223e-06, "loss": 0.3382, "step": 1459 }, { "epoch": 0.43582089552238806, "grad_norm": 0.8007964740073961, "learning_rate": 3.1347806610261886e-06, "loss": 0.3501, "step": 1460 }, { "epoch": 0.4361194029850746, "grad_norm": 0.8794813487482264, "learning_rate": 3.1324422344157026e-06, "loss": 0.3731, "step": 1461 }, { "epoch": 0.4364179104477612, "grad_norm": 0.9082175046205051, "learning_rate": 3.1301032164871436e-06, "loss": 0.4047, "step": 1462 }, { "epoch": 0.43671641791044774, "grad_norm": 0.9375256531298286, "learning_rate": 3.1277636094274357e-06, "loss": 0.3549, "step": 1463 }, { "epoch": 0.43701492537313436, "grad_norm": 0.7978433496058229, "learning_rate": 3.1254234154240544e-06, "loss": 0.3325, "step": 1464 }, { "epoch": 0.4373134328358209, "grad_norm": 0.8332407184088709, "learning_rate": 3.1230826366650245e-06, "loss": 0.347, "step": 1465 }, { "epoch": 0.4376119402985075, "grad_norm": 0.7369892800300631, "learning_rate": 3.1207412753389173e-06, "loss": 0.3096, "step": 1466 }, { "epoch": 0.43791044776119403, "grad_norm": 0.912860064618065, "learning_rate": 3.118399333634848e-06, "loss": 0.4124, "step": 1467 }, { "epoch": 0.4382089552238806, "grad_norm": 0.9428428169193727, "learning_rate": 3.1160568137424757e-06, "loss": 0.3071, "step": 1468 }, { "epoch": 0.43850746268656715, "grad_norm": 0.8484227296987369, "learning_rate": 3.1137137178519983e-06, "loss": 0.3957, "step": 1469 }, { "epoch": 0.4388059701492537, "grad_norm": 0.8297150076443598, "learning_rate": 3.1113700481541547e-06, "loss": 0.3014, "step": 1470 }, { "epoch": 0.4391044776119403, "grad_norm": 0.8049525013596943, "learning_rate": 3.1090258068402173e-06, "loss": 0.3452, "step": 1471 }, { "epoch": 0.4394029850746269, "grad_norm": 0.871830475989278, "learning_rate": 3.1066809961019954e-06, "loss": 0.3659, "step": 1472 }, { "epoch": 0.43970149253731344, "grad_norm": 0.8080522829751793, "learning_rate": 3.1043356181318313e-06, "loss": 0.3318, "step": 1473 }, { "epoch": 0.44, "grad_norm": 0.8144887418555589, "learning_rate": 3.101989675122594e-06, "loss": 0.3259, "step": 1474 }, { "epoch": 0.44029850746268656, "grad_norm": 0.8505193832128495, "learning_rate": 3.099643169267685e-06, "loss": 0.3377, "step": 1475 }, { "epoch": 0.4405970149253731, "grad_norm": 0.8155829812521912, "learning_rate": 3.097296102761028e-06, "loss": 0.3389, "step": 1476 }, { "epoch": 0.4408955223880597, "grad_norm": 0.786054888865815, "learning_rate": 3.0949484777970747e-06, "loss": 0.3208, "step": 1477 }, { "epoch": 0.4411940298507463, "grad_norm": 0.7510442793736704, "learning_rate": 3.0926002965707965e-06, "loss": 0.3468, "step": 1478 }, { "epoch": 0.44149253731343285, "grad_norm": 0.760538965279755, "learning_rate": 3.090251561277685e-06, "loss": 0.3432, "step": 1479 }, { "epoch": 0.4417910447761194, "grad_norm": 0.838884239640953, "learning_rate": 3.0879022741137515e-06, "loss": 0.3718, "step": 1480 }, { "epoch": 0.44208955223880597, "grad_norm": 0.7876393667924823, "learning_rate": 3.085552437275522e-06, "loss": 0.3722, "step": 1481 }, { "epoch": 0.44238805970149253, "grad_norm": 0.8465102759230003, "learning_rate": 3.0832020529600367e-06, "loss": 0.3485, "step": 1482 }, { "epoch": 0.4426865671641791, "grad_norm": 0.8627284225172639, "learning_rate": 3.0808511233648466e-06, "loss": 0.3689, "step": 1483 }, { "epoch": 0.44298507462686565, "grad_norm": 0.7790417749973847, "learning_rate": 3.0784996506880157e-06, "loss": 0.313, "step": 1484 }, { "epoch": 0.44328358208955226, "grad_norm": 0.7197974569889182, "learning_rate": 3.076147637128111e-06, "loss": 0.3109, "step": 1485 }, { "epoch": 0.4435820895522388, "grad_norm": 0.9066043015509634, "learning_rate": 3.0737950848842097e-06, "loss": 0.33, "step": 1486 }, { "epoch": 0.4438805970149254, "grad_norm": 0.7323591878235486, "learning_rate": 3.0714419961558907e-06, "loss": 0.3428, "step": 1487 }, { "epoch": 0.44417910447761194, "grad_norm": 0.816034591616698, "learning_rate": 3.069088373143234e-06, "loss": 0.3388, "step": 1488 }, { "epoch": 0.4444776119402985, "grad_norm": 0.8142476229964963, "learning_rate": 3.06673421804682e-06, "loss": 0.29, "step": 1489 }, { "epoch": 0.44477611940298506, "grad_norm": 0.7722627326209343, "learning_rate": 3.064379533067726e-06, "loss": 0.303, "step": 1490 }, { "epoch": 0.4450746268656716, "grad_norm": 0.8826032609144622, "learning_rate": 3.062024320407525e-06, "loss": 0.3546, "step": 1491 }, { "epoch": 0.44537313432835823, "grad_norm": 0.7817296477576442, "learning_rate": 3.059668582268285e-06, "loss": 0.35, "step": 1492 }, { "epoch": 0.4456716417910448, "grad_norm": 0.7259562656822198, "learning_rate": 3.0573123208525613e-06, "loss": 0.3178, "step": 1493 }, { "epoch": 0.44597014925373135, "grad_norm": 0.755357913464589, "learning_rate": 3.0549555383634032e-06, "loss": 0.354, "step": 1494 }, { "epoch": 0.4462686567164179, "grad_norm": 0.792525116985471, "learning_rate": 3.052598237004343e-06, "loss": 0.345, "step": 1495 }, { "epoch": 0.44656716417910447, "grad_norm": 0.8285212042311892, "learning_rate": 3.0502404189794012e-06, "loss": 0.3618, "step": 1496 }, { "epoch": 0.44686567164179103, "grad_norm": 0.8922327205331874, "learning_rate": 3.0478820864930796e-06, "loss": 0.3616, "step": 1497 }, { "epoch": 0.4471641791044776, "grad_norm": 0.8584872212973166, "learning_rate": 3.0455232417503617e-06, "loss": 0.3367, "step": 1498 }, { "epoch": 0.4474626865671642, "grad_norm": 0.7992271644708301, "learning_rate": 3.0431638869567097e-06, "loss": 0.3214, "step": 1499 }, { "epoch": 0.44776119402985076, "grad_norm": 0.7540776769854936, "learning_rate": 3.0408040243180638e-06, "loss": 0.3285, "step": 1500 }, { "epoch": 0.4480597014925373, "grad_norm": 0.8633455449268056, "learning_rate": 3.0384436560408363e-06, "loss": 0.3401, "step": 1501 }, { "epoch": 0.4483582089552239, "grad_norm": 0.9222419115283121, "learning_rate": 3.0360827843319156e-06, "loss": 0.3838, "step": 1502 }, { "epoch": 0.44865671641791044, "grad_norm": 0.8368812623261956, "learning_rate": 3.033721411398659e-06, "loss": 0.3591, "step": 1503 }, { "epoch": 0.448955223880597, "grad_norm": 0.8378315376207885, "learning_rate": 3.0313595394488917e-06, "loss": 0.3567, "step": 1504 }, { "epoch": 0.44925373134328356, "grad_norm": 0.7763761976990962, "learning_rate": 3.0289971706909064e-06, "loss": 0.3387, "step": 1505 }, { "epoch": 0.4495522388059702, "grad_norm": 0.7607283367215002, "learning_rate": 3.026634307333462e-06, "loss": 0.3541, "step": 1506 }, { "epoch": 0.44985074626865673, "grad_norm": 0.8127172993351834, "learning_rate": 3.024270951585776e-06, "loss": 0.3546, "step": 1507 }, { "epoch": 0.4501492537313433, "grad_norm": 0.8648174995674591, "learning_rate": 3.02190710565753e-06, "loss": 0.3627, "step": 1508 }, { "epoch": 0.45044776119402985, "grad_norm": 0.8480105965484397, "learning_rate": 3.019542771758861e-06, "loss": 0.3599, "step": 1509 }, { "epoch": 0.4507462686567164, "grad_norm": 0.7968525603371781, "learning_rate": 3.0171779521003647e-06, "loss": 0.3186, "step": 1510 }, { "epoch": 0.45104477611940297, "grad_norm": 0.9005901290048014, "learning_rate": 3.0148126488930896e-06, "loss": 0.3793, "step": 1511 }, { "epoch": 0.4513432835820895, "grad_norm": 0.8428645379248311, "learning_rate": 3.012446864348536e-06, "loss": 0.3971, "step": 1512 }, { "epoch": 0.45164179104477614, "grad_norm": 0.8480294223291627, "learning_rate": 3.010080600678656e-06, "loss": 0.3705, "step": 1513 }, { "epoch": 0.4519402985074627, "grad_norm": 0.8472349283442467, "learning_rate": 3.0077138600958468e-06, "loss": 0.3656, "step": 1514 }, { "epoch": 0.45223880597014926, "grad_norm": 0.9103614344387116, "learning_rate": 3.0053466448129535e-06, "loss": 0.3812, "step": 1515 }, { "epoch": 0.4525373134328358, "grad_norm": 0.8231552945682205, "learning_rate": 3.0029789570432665e-06, "loss": 0.348, "step": 1516 }, { "epoch": 0.4528358208955224, "grad_norm": 0.9675315430191836, "learning_rate": 3.000610799000514e-06, "loss": 0.3675, "step": 1517 }, { "epoch": 0.45313432835820894, "grad_norm": 0.8070529408063196, "learning_rate": 2.9982421728988663e-06, "loss": 0.341, "step": 1518 }, { "epoch": 0.4534328358208955, "grad_norm": 0.8536363490650818, "learning_rate": 2.9958730809529326e-06, "loss": 0.3794, "step": 1519 }, { "epoch": 0.4537313432835821, "grad_norm": 0.8122339636569735, "learning_rate": 2.9935035253777544e-06, "loss": 0.3582, "step": 1520 }, { "epoch": 0.45402985074626867, "grad_norm": 0.7993919608319511, "learning_rate": 2.9911335083888093e-06, "loss": 0.344, "step": 1521 }, { "epoch": 0.45432835820895523, "grad_norm": 0.8228888153873775, "learning_rate": 2.988763032202006e-06, "loss": 0.3235, "step": 1522 }, { "epoch": 0.4546268656716418, "grad_norm": 0.8201968407407756, "learning_rate": 2.9863920990336803e-06, "loss": 0.3707, "step": 1523 }, { "epoch": 0.45492537313432835, "grad_norm": 0.8436534725037953, "learning_rate": 2.9840207111005987e-06, "loss": 0.3886, "step": 1524 }, { "epoch": 0.4552238805970149, "grad_norm": 0.7968744092690195, "learning_rate": 2.9816488706199498e-06, "loss": 0.3498, "step": 1525 }, { "epoch": 0.45552238805970147, "grad_norm": 0.8197065867829275, "learning_rate": 2.9792765798093466e-06, "loss": 0.3676, "step": 1526 }, { "epoch": 0.4558208955223881, "grad_norm": 0.8141683013758337, "learning_rate": 2.9769038408868246e-06, "loss": 0.3175, "step": 1527 }, { "epoch": 0.45611940298507464, "grad_norm": 0.8682309359789031, "learning_rate": 2.9745306560708343e-06, "loss": 0.3582, "step": 1528 }, { "epoch": 0.4564179104477612, "grad_norm": 0.8430286979266656, "learning_rate": 2.9721570275802487e-06, "loss": 0.3643, "step": 1529 }, { "epoch": 0.45671641791044776, "grad_norm": 0.8327189426692697, "learning_rate": 2.969782957634351e-06, "loss": 0.3869, "step": 1530 }, { "epoch": 0.4570149253731343, "grad_norm": 0.8303352738239019, "learning_rate": 2.967408448452838e-06, "loss": 0.3396, "step": 1531 }, { "epoch": 0.4573134328358209, "grad_norm": 0.8521303530894394, "learning_rate": 2.9650335022558202e-06, "loss": 0.3593, "step": 1532 }, { "epoch": 0.45761194029850744, "grad_norm": 0.7973923282393381, "learning_rate": 2.962658121263812e-06, "loss": 0.3826, "step": 1533 }, { "epoch": 0.45791044776119405, "grad_norm": 0.8307181709707767, "learning_rate": 2.9602823076977376e-06, "loss": 0.385, "step": 1534 }, { "epoch": 0.4582089552238806, "grad_norm": 0.7681378144807851, "learning_rate": 2.9579060637789257e-06, "loss": 0.3345, "step": 1535 }, { "epoch": 0.45850746268656717, "grad_norm": 0.8862805383356972, "learning_rate": 2.955529391729105e-06, "loss": 0.3342, "step": 1536 }, { "epoch": 0.45880597014925373, "grad_norm": 0.7914026956159501, "learning_rate": 2.9531522937704065e-06, "loss": 0.3161, "step": 1537 }, { "epoch": 0.4591044776119403, "grad_norm": 0.815340219137627, "learning_rate": 2.9507747721253598e-06, "loss": 0.3161, "step": 1538 }, { "epoch": 0.45940298507462685, "grad_norm": 0.8486280883954298, "learning_rate": 2.948396829016888e-06, "loss": 0.3425, "step": 1539 }, { "epoch": 0.4597014925373134, "grad_norm": 0.8507231569423965, "learning_rate": 2.9460184666683112e-06, "loss": 0.3601, "step": 1540 }, { "epoch": 0.46, "grad_norm": 0.8316847618482142, "learning_rate": 2.9436396873033396e-06, "loss": 0.3566, "step": 1541 }, { "epoch": 0.4602985074626866, "grad_norm": 0.7838154911787112, "learning_rate": 2.941260493146074e-06, "loss": 0.3075, "step": 1542 }, { "epoch": 0.46059701492537314, "grad_norm": 0.8919729652488453, "learning_rate": 2.938880886421004e-06, "loss": 0.3628, "step": 1543 }, { "epoch": 0.4608955223880597, "grad_norm": 0.8463176490111552, "learning_rate": 2.9365008693530017e-06, "loss": 0.3916, "step": 1544 }, { "epoch": 0.46119402985074626, "grad_norm": 0.8415000303394304, "learning_rate": 2.9341204441673267e-06, "loss": 0.3378, "step": 1545 }, { "epoch": 0.4614925373134328, "grad_norm": 0.9092886626244101, "learning_rate": 2.931739613089618e-06, "loss": 0.3549, "step": 1546 }, { "epoch": 0.46179104477611943, "grad_norm": 0.8845564844751359, "learning_rate": 2.929358378345894e-06, "loss": 0.3646, "step": 1547 }, { "epoch": 0.462089552238806, "grad_norm": 0.7119788198918313, "learning_rate": 2.9269767421625535e-06, "loss": 0.2987, "step": 1548 }, { "epoch": 0.46238805970149255, "grad_norm": 0.812817673515338, "learning_rate": 2.9245947067663653e-06, "loss": 0.3138, "step": 1549 }, { "epoch": 0.4626865671641791, "grad_norm": 0.8782182840009076, "learning_rate": 2.922212274384476e-06, "loss": 0.3709, "step": 1550 }, { "epoch": 0.46298507462686567, "grad_norm": 0.7972696289980897, "learning_rate": 2.9198294472444022e-06, "loss": 0.3182, "step": 1551 }, { "epoch": 0.4632835820895522, "grad_norm": 0.8368020184190745, "learning_rate": 2.9174462275740286e-06, "loss": 0.4036, "step": 1552 }, { "epoch": 0.4635820895522388, "grad_norm": 0.8084845969111005, "learning_rate": 2.9150626176016065e-06, "loss": 0.3457, "step": 1553 }, { "epoch": 0.4638805970149254, "grad_norm": 0.9420253305451717, "learning_rate": 2.9126786195557554e-06, "loss": 0.3444, "step": 1554 }, { "epoch": 0.46417910447761196, "grad_norm": 0.8848388474337436, "learning_rate": 2.910294235665453e-06, "loss": 0.3494, "step": 1555 }, { "epoch": 0.4644776119402985, "grad_norm": 0.8223510020899589, "learning_rate": 2.9079094681600416e-06, "loss": 0.3289, "step": 1556 }, { "epoch": 0.4647761194029851, "grad_norm": 0.7905450158929105, "learning_rate": 2.9055243192692207e-06, "loss": 0.3338, "step": 1557 }, { "epoch": 0.46507462686567164, "grad_norm": 0.8490757892161332, "learning_rate": 2.9031387912230454e-06, "loss": 0.378, "step": 1558 }, { "epoch": 0.4653731343283582, "grad_norm": 0.7906965441882455, "learning_rate": 2.900752886251927e-06, "loss": 0.3149, "step": 1559 }, { "epoch": 0.46567164179104475, "grad_norm": 0.7577279466640751, "learning_rate": 2.898366606586628e-06, "loss": 0.2942, "step": 1560 }, { "epoch": 0.46597014925373137, "grad_norm": 0.8589842568909583, "learning_rate": 2.895979954458263e-06, "loss": 0.366, "step": 1561 }, { "epoch": 0.46626865671641793, "grad_norm": 0.756467710812116, "learning_rate": 2.893592932098292e-06, "loss": 0.3039, "step": 1562 }, { "epoch": 0.4665671641791045, "grad_norm": 0.749749204455234, "learning_rate": 2.891205541738523e-06, "loss": 0.3461, "step": 1563 }, { "epoch": 0.46686567164179105, "grad_norm": 0.7995654210279144, "learning_rate": 2.8888177856111082e-06, "loss": 0.2976, "step": 1564 }, { "epoch": 0.4671641791044776, "grad_norm": 0.9693221870057586, "learning_rate": 2.8864296659485413e-06, "loss": 0.3557, "step": 1565 }, { "epoch": 0.46746268656716417, "grad_norm": 0.7943690346767394, "learning_rate": 2.8840411849836565e-06, "loss": 0.3598, "step": 1566 }, { "epoch": 0.4677611940298507, "grad_norm": 0.8435450993312467, "learning_rate": 2.881652344949625e-06, "loss": 0.4217, "step": 1567 }, { "epoch": 0.46805970149253734, "grad_norm": 0.7333154530834609, "learning_rate": 2.8792631480799526e-06, "loss": 0.2883, "step": 1568 }, { "epoch": 0.4683582089552239, "grad_norm": 0.7562359363934774, "learning_rate": 2.8768735966084817e-06, "loss": 0.3055, "step": 1569 }, { "epoch": 0.46865671641791046, "grad_norm": 0.7398426343466973, "learning_rate": 2.874483692769385e-06, "loss": 0.3478, "step": 1570 }, { "epoch": 0.468955223880597, "grad_norm": 0.8230158538149063, "learning_rate": 2.8720934387971627e-06, "loss": 0.3278, "step": 1571 }, { "epoch": 0.4692537313432836, "grad_norm": 1.3545109216720357, "learning_rate": 2.869702836926645e-06, "loss": 0.2997, "step": 1572 }, { "epoch": 0.46955223880597013, "grad_norm": 0.926104402556891, "learning_rate": 2.8673118893929876e-06, "loss": 0.3141, "step": 1573 }, { "epoch": 0.4698507462686567, "grad_norm": 0.7738598668200026, "learning_rate": 2.864920598431665e-06, "loss": 0.3212, "step": 1574 }, { "epoch": 0.4701492537313433, "grad_norm": 0.7844426674437163, "learning_rate": 2.862528966278479e-06, "loss": 0.3281, "step": 1575 }, { "epoch": 0.47044776119402987, "grad_norm": 0.8678599800587856, "learning_rate": 2.8601369951695463e-06, "loss": 0.3721, "step": 1576 }, { "epoch": 0.4707462686567164, "grad_norm": 0.863778592486986, "learning_rate": 2.8577446873413007e-06, "loss": 0.3629, "step": 1577 }, { "epoch": 0.471044776119403, "grad_norm": 0.9033383927815183, "learning_rate": 2.855352045030493e-06, "loss": 0.3508, "step": 1578 }, { "epoch": 0.47134328358208955, "grad_norm": 0.812991807684829, "learning_rate": 2.8529590704741843e-06, "loss": 0.384, "step": 1579 }, { "epoch": 0.4716417910447761, "grad_norm": 0.8187956043141631, "learning_rate": 2.8505657659097486e-06, "loss": 0.3707, "step": 1580 }, { "epoch": 0.47194029850746266, "grad_norm": 0.9346578308192153, "learning_rate": 2.8481721335748674e-06, "loss": 0.4094, "step": 1581 }, { "epoch": 0.4722388059701493, "grad_norm": 0.7880484291645268, "learning_rate": 2.845778175707527e-06, "loss": 0.3299, "step": 1582 }, { "epoch": 0.47253731343283584, "grad_norm": 0.811099440312082, "learning_rate": 2.8433838945460207e-06, "loss": 0.33, "step": 1583 }, { "epoch": 0.4728358208955224, "grad_norm": 0.7026137359784628, "learning_rate": 2.8409892923289432e-06, "loss": 0.3127, "step": 1584 }, { "epoch": 0.47313432835820896, "grad_norm": 0.8034762357536271, "learning_rate": 2.838594371295189e-06, "loss": 0.3265, "step": 1585 }, { "epoch": 0.4734328358208955, "grad_norm": 0.8485262509519692, "learning_rate": 2.8361991336839513e-06, "loss": 0.381, "step": 1586 }, { "epoch": 0.4737313432835821, "grad_norm": 0.9427056505224326, "learning_rate": 2.833803581734718e-06, "loss": 0.4068, "step": 1587 }, { "epoch": 0.47402985074626863, "grad_norm": 0.8021996861465295, "learning_rate": 2.8314077176872724e-06, "loss": 0.335, "step": 1588 }, { "epoch": 0.47432835820895525, "grad_norm": 0.7919679011390528, "learning_rate": 2.8290115437816894e-06, "loss": 0.3561, "step": 1589 }, { "epoch": 0.4746268656716418, "grad_norm": 0.7810376577747256, "learning_rate": 2.8266150622583315e-06, "loss": 0.3501, "step": 1590 }, { "epoch": 0.47492537313432837, "grad_norm": 0.9272988287616046, "learning_rate": 2.8242182753578523e-06, "loss": 0.3466, "step": 1591 }, { "epoch": 0.4752238805970149, "grad_norm": 0.8467896274245239, "learning_rate": 2.8218211853211893e-06, "loss": 0.357, "step": 1592 }, { "epoch": 0.4755223880597015, "grad_norm": 0.9156541553176574, "learning_rate": 2.819423794389561e-06, "loss": 0.3529, "step": 1593 }, { "epoch": 0.47582089552238804, "grad_norm": 0.7927076329626589, "learning_rate": 2.817026104804471e-06, "loss": 0.3155, "step": 1594 }, { "epoch": 0.4761194029850746, "grad_norm": 0.767743906513666, "learning_rate": 2.8146281188077017e-06, "loss": 0.3155, "step": 1595 }, { "epoch": 0.4764179104477612, "grad_norm": 0.8361708178822149, "learning_rate": 2.8122298386413094e-06, "loss": 0.361, "step": 1596 }, { "epoch": 0.4767164179104478, "grad_norm": 0.8455590406670277, "learning_rate": 2.8098312665476283e-06, "loss": 0.3462, "step": 1597 }, { "epoch": 0.47701492537313434, "grad_norm": 0.8438149986929196, "learning_rate": 2.8074324047692662e-06, "loss": 0.3597, "step": 1598 }, { "epoch": 0.4773134328358209, "grad_norm": 0.8286441056776019, "learning_rate": 2.8050332555490987e-06, "loss": 0.3648, "step": 1599 }, { "epoch": 0.47761194029850745, "grad_norm": 0.7719571836786463, "learning_rate": 2.8026338211302735e-06, "loss": 0.3333, "step": 1600 }, { "epoch": 0.477910447761194, "grad_norm": 0.8233026839737, "learning_rate": 2.800234103756201e-06, "loss": 0.3337, "step": 1601 }, { "epoch": 0.47820895522388057, "grad_norm": 0.7688811413487633, "learning_rate": 2.7978341056705592e-06, "loss": 0.3075, "step": 1602 }, { "epoch": 0.4785074626865672, "grad_norm": 0.9247126459680931, "learning_rate": 2.7954338291172892e-06, "loss": 0.3842, "step": 1603 }, { "epoch": 0.47880597014925375, "grad_norm": 0.8669810824469362, "learning_rate": 2.79303327634059e-06, "loss": 0.3619, "step": 1604 }, { "epoch": 0.4791044776119403, "grad_norm": 0.824675144757377, "learning_rate": 2.7906324495849206e-06, "loss": 0.3491, "step": 1605 }, { "epoch": 0.47940298507462686, "grad_norm": 0.7606474249944587, "learning_rate": 2.788231351094995e-06, "loss": 0.3409, "step": 1606 }, { "epoch": 0.4797014925373134, "grad_norm": 0.8588453625011868, "learning_rate": 2.785829983115781e-06, "loss": 0.3826, "step": 1607 }, { "epoch": 0.48, "grad_norm": 0.8069935126683544, "learning_rate": 2.7834283478925007e-06, "loss": 0.3424, "step": 1608 }, { "epoch": 0.48029850746268654, "grad_norm": 0.8572397041684174, "learning_rate": 2.7810264476706227e-06, "loss": 0.3258, "step": 1609 }, { "epoch": 0.48059701492537316, "grad_norm": 0.850023545997115, "learning_rate": 2.778624284695867e-06, "loss": 0.3235, "step": 1610 }, { "epoch": 0.4808955223880597, "grad_norm": 0.8475591867049286, "learning_rate": 2.7762218612141966e-06, "loss": 0.3337, "step": 1611 }, { "epoch": 0.4811940298507463, "grad_norm": 0.8536920148847397, "learning_rate": 2.7738191794718183e-06, "loss": 0.307, "step": 1612 }, { "epoch": 0.48149253731343283, "grad_norm": 0.8281648081371542, "learning_rate": 2.771416241715182e-06, "loss": 0.3839, "step": 1613 }, { "epoch": 0.4817910447761194, "grad_norm": 0.7677447755495962, "learning_rate": 2.7690130501909756e-06, "loss": 0.3291, "step": 1614 }, { "epoch": 0.48208955223880595, "grad_norm": 3.3342305797562215, "learning_rate": 2.766609607146124e-06, "loss": 0.3939, "step": 1615 }, { "epoch": 0.4823880597014925, "grad_norm": 0.7437237275043507, "learning_rate": 2.7642059148277894e-06, "loss": 0.3558, "step": 1616 }, { "epoch": 0.4826865671641791, "grad_norm": 0.8563459859815404, "learning_rate": 2.761801975483363e-06, "loss": 0.3396, "step": 1617 }, { "epoch": 0.4829850746268657, "grad_norm": 0.8531096858219094, "learning_rate": 2.7593977913604717e-06, "loss": 0.3741, "step": 1618 }, { "epoch": 0.48328358208955224, "grad_norm": 0.8588995102283145, "learning_rate": 2.7569933647069685e-06, "loss": 0.3189, "step": 1619 }, { "epoch": 0.4835820895522388, "grad_norm": 0.8321789763467867, "learning_rate": 2.754588697770933e-06, "loss": 0.3456, "step": 1620 }, { "epoch": 0.48388059701492536, "grad_norm": 0.8853198602133011, "learning_rate": 2.752183792800671e-06, "loss": 0.3932, "step": 1621 }, { "epoch": 0.4841791044776119, "grad_norm": 0.8434334328362673, "learning_rate": 2.7497786520447093e-06, "loss": 0.4004, "step": 1622 }, { "epoch": 0.4844776119402985, "grad_norm": 0.8669235672763962, "learning_rate": 2.7473732777517965e-06, "loss": 0.3448, "step": 1623 }, { "epoch": 0.4847761194029851, "grad_norm": 0.8146513938626443, "learning_rate": 2.7449676721708995e-06, "loss": 0.3658, "step": 1624 }, { "epoch": 0.48507462686567165, "grad_norm": 0.7682677410362674, "learning_rate": 2.7425618375511992e-06, "loss": 0.3429, "step": 1625 }, { "epoch": 0.4853731343283582, "grad_norm": 0.8659290979909456, "learning_rate": 2.7401557761420933e-06, "loss": 0.3989, "step": 1626 }, { "epoch": 0.4856716417910448, "grad_norm": 0.9005007791678644, "learning_rate": 2.737749490193191e-06, "loss": 0.4066, "step": 1627 }, { "epoch": 0.48597014925373133, "grad_norm": 0.8269529526190118, "learning_rate": 2.7353429819543104e-06, "loss": 0.3104, "step": 1628 }, { "epoch": 0.4862686567164179, "grad_norm": 0.7655526786732026, "learning_rate": 2.7329362536754777e-06, "loss": 0.3489, "step": 1629 }, { "epoch": 0.48656716417910445, "grad_norm": 0.8373568352027102, "learning_rate": 2.7305293076069263e-06, "loss": 0.3264, "step": 1630 }, { "epoch": 0.48686567164179106, "grad_norm": 0.7899465343213389, "learning_rate": 2.728122145999091e-06, "loss": 0.3057, "step": 1631 }, { "epoch": 0.4871641791044776, "grad_norm": 2.475275899634775, "learning_rate": 2.72571477110261e-06, "loss": 0.3467, "step": 1632 }, { "epoch": 0.4874626865671642, "grad_norm": 0.8557171237524993, "learning_rate": 2.72330718516832e-06, "loss": 0.3587, "step": 1633 }, { "epoch": 0.48776119402985074, "grad_norm": 0.7280642925743758, "learning_rate": 2.7208993904472543e-06, "loss": 0.3306, "step": 1634 }, { "epoch": 0.4880597014925373, "grad_norm": 0.7753145285378411, "learning_rate": 2.7184913891906433e-06, "loss": 0.3074, "step": 1635 }, { "epoch": 0.48835820895522386, "grad_norm": 0.7395330717244848, "learning_rate": 2.716083183649909e-06, "loss": 0.3426, "step": 1636 }, { "epoch": 0.4886567164179105, "grad_norm": 0.8002310765854909, "learning_rate": 2.7136747760766653e-06, "loss": 0.3534, "step": 1637 }, { "epoch": 0.48895522388059703, "grad_norm": 0.8324334035465984, "learning_rate": 2.7112661687227142e-06, "loss": 0.3355, "step": 1638 }, { "epoch": 0.4892537313432836, "grad_norm": 0.7743794963426636, "learning_rate": 2.708857363840045e-06, "loss": 0.3329, "step": 1639 }, { "epoch": 0.48955223880597015, "grad_norm": 0.8228579838838092, "learning_rate": 2.7064483636808314e-06, "loss": 0.3755, "step": 1640 }, { "epoch": 0.4898507462686567, "grad_norm": 0.7759076884150069, "learning_rate": 2.7040391704974293e-06, "loss": 0.3534, "step": 1641 }, { "epoch": 0.49014925373134327, "grad_norm": 0.8250088563687552, "learning_rate": 2.7016297865423767e-06, "loss": 0.3562, "step": 1642 }, { "epoch": 0.49044776119402983, "grad_norm": 0.8627019755137874, "learning_rate": 2.699220214068389e-06, "loss": 0.292, "step": 1643 }, { "epoch": 0.49074626865671644, "grad_norm": 0.8814707303291824, "learning_rate": 2.696810455328357e-06, "loss": 0.3616, "step": 1644 }, { "epoch": 0.491044776119403, "grad_norm": 0.8932767174110039, "learning_rate": 2.694400512575346e-06, "loss": 0.3596, "step": 1645 }, { "epoch": 0.49134328358208956, "grad_norm": 0.8118919974953459, "learning_rate": 2.6919903880625954e-06, "loss": 0.3343, "step": 1646 }, { "epoch": 0.4916417910447761, "grad_norm": 0.8648850492388545, "learning_rate": 2.6895800840435106e-06, "loss": 0.335, "step": 1647 }, { "epoch": 0.4919402985074627, "grad_norm": 0.8549039551168018, "learning_rate": 2.687169602771668e-06, "loss": 0.3723, "step": 1648 }, { "epoch": 0.49223880597014924, "grad_norm": 0.7944249187287936, "learning_rate": 2.68475894650081e-06, "loss": 0.3085, "step": 1649 }, { "epoch": 0.4925373134328358, "grad_norm": 0.7927257364542748, "learning_rate": 2.6823481174848405e-06, "loss": 0.3133, "step": 1650 }, { "epoch": 0.4928358208955224, "grad_norm": 0.7886213022780711, "learning_rate": 2.679937117977825e-06, "loss": 0.3822, "step": 1651 }, { "epoch": 0.493134328358209, "grad_norm": 0.8280071240535853, "learning_rate": 2.6775259502339913e-06, "loss": 0.3573, "step": 1652 }, { "epoch": 0.49343283582089553, "grad_norm": 0.8418482347983869, "learning_rate": 2.67511461650772e-06, "loss": 0.3835, "step": 1653 }, { "epoch": 0.4937313432835821, "grad_norm": 0.8387035834884874, "learning_rate": 2.672703119053552e-06, "loss": 0.3586, "step": 1654 }, { "epoch": 0.49402985074626865, "grad_norm": 0.7816419105725156, "learning_rate": 2.670291460126177e-06, "loss": 0.3339, "step": 1655 }, { "epoch": 0.4943283582089552, "grad_norm": 0.8001221218291323, "learning_rate": 2.667879641980437e-06, "loss": 0.3204, "step": 1656 }, { "epoch": 0.49462686567164177, "grad_norm": 0.7879284043181888, "learning_rate": 2.6654676668713245e-06, "loss": 0.3224, "step": 1657 }, { "epoch": 0.4949253731343284, "grad_norm": 0.801234203959891, "learning_rate": 2.6630555370539763e-06, "loss": 0.3327, "step": 1658 }, { "epoch": 0.49522388059701494, "grad_norm": 0.7997197712338734, "learning_rate": 2.6606432547836757e-06, "loss": 0.3375, "step": 1659 }, { "epoch": 0.4955223880597015, "grad_norm": 0.7623217742754592, "learning_rate": 2.658230822315847e-06, "loss": 0.3187, "step": 1660 }, { "epoch": 0.49582089552238806, "grad_norm": 0.7962363516446156, "learning_rate": 2.655818241906057e-06, "loss": 0.3493, "step": 1661 }, { "epoch": 0.4961194029850746, "grad_norm": 0.9034198102424835, "learning_rate": 2.653405515810009e-06, "loss": 0.3753, "step": 1662 }, { "epoch": 0.4964179104477612, "grad_norm": 0.7925807514012592, "learning_rate": 2.650992646283542e-06, "loss": 0.3077, "step": 1663 }, { "epoch": 0.49671641791044774, "grad_norm": 0.7247430409943646, "learning_rate": 2.648579635582632e-06, "loss": 0.3098, "step": 1664 }, { "epoch": 0.49701492537313435, "grad_norm": 0.6769452012660345, "learning_rate": 2.6461664859633844e-06, "loss": 0.2895, "step": 1665 }, { "epoch": 0.4973134328358209, "grad_norm": 0.806049068867384, "learning_rate": 2.6437531996820353e-06, "loss": 0.3263, "step": 1666 }, { "epoch": 0.49761194029850747, "grad_norm": 0.8550960252280426, "learning_rate": 2.641339778994948e-06, "loss": 0.37, "step": 1667 }, { "epoch": 0.49791044776119403, "grad_norm": 0.8478170482025786, "learning_rate": 2.6389262261586127e-06, "loss": 0.3892, "step": 1668 }, { "epoch": 0.4982089552238806, "grad_norm": 0.8345120172992568, "learning_rate": 2.636512543429642e-06, "loss": 0.348, "step": 1669 }, { "epoch": 0.49850746268656715, "grad_norm": 0.8028565137146465, "learning_rate": 2.634098733064771e-06, "loss": 0.3574, "step": 1670 }, { "epoch": 0.4988059701492537, "grad_norm": 0.7451403703442233, "learning_rate": 2.6316847973208535e-06, "loss": 0.2899, "step": 1671 }, { "epoch": 0.4991044776119403, "grad_norm": 0.8352856655412605, "learning_rate": 2.6292707384548604e-06, "loss": 0.3597, "step": 1672 }, { "epoch": 0.4994029850746269, "grad_norm": 0.8545513078252729, "learning_rate": 2.6268565587238777e-06, "loss": 0.3325, "step": 1673 }, { "epoch": 0.49970149253731344, "grad_norm": 0.8282524133497315, "learning_rate": 2.6244422603851046e-06, "loss": 0.3492, "step": 1674 }, { "epoch": 0.5, "grad_norm": 0.7991480826878556, "learning_rate": 2.622027845695851e-06, "loss": 0.3506, "step": 1675 }, { "epoch": 0.5002985074626866, "grad_norm": 0.7582015801891304, "learning_rate": 2.6196133169135368e-06, "loss": 0.3293, "step": 1676 }, { "epoch": 0.5005970149253731, "grad_norm": 0.9612014821814638, "learning_rate": 2.6171986762956856e-06, "loss": 0.3996, "step": 1677 }, { "epoch": 0.5008955223880597, "grad_norm": 0.7898075941393438, "learning_rate": 2.61478392609993e-06, "loss": 0.3312, "step": 1678 }, { "epoch": 0.5011940298507462, "grad_norm": 0.8015043125699043, "learning_rate": 2.612369068584001e-06, "loss": 0.3096, "step": 1679 }, { "epoch": 0.5014925373134328, "grad_norm": 0.8389408382259228, "learning_rate": 2.6099541060057316e-06, "loss": 0.3338, "step": 1680 }, { "epoch": 0.5017910447761194, "grad_norm": 0.8096612317183629, "learning_rate": 2.607539040623054e-06, "loss": 0.3828, "step": 1681 }, { "epoch": 0.502089552238806, "grad_norm": 0.8259878912971927, "learning_rate": 2.6051238746939934e-06, "loss": 0.3031, "step": 1682 }, { "epoch": 0.5023880597014926, "grad_norm": 0.8631597448564671, "learning_rate": 2.602708610476673e-06, "loss": 0.3664, "step": 1683 }, { "epoch": 0.5026865671641791, "grad_norm": 0.7716339492340298, "learning_rate": 2.600293250229306e-06, "loss": 0.338, "step": 1684 }, { "epoch": 0.5029850746268657, "grad_norm": 0.899796414901459, "learning_rate": 2.597877796210194e-06, "loss": 0.3815, "step": 1685 }, { "epoch": 0.5032835820895523, "grad_norm": 0.9820931245782578, "learning_rate": 2.5954622506777285e-06, "loss": 0.3691, "step": 1686 }, { "epoch": 0.5035820895522388, "grad_norm": 0.7494455392178265, "learning_rate": 2.5930466158903856e-06, "loss": 0.3215, "step": 1687 }, { "epoch": 0.5038805970149254, "grad_norm": 0.8086992100975138, "learning_rate": 2.5906308941067243e-06, "loss": 0.3297, "step": 1688 }, { "epoch": 0.5041791044776119, "grad_norm": 0.9419257958465664, "learning_rate": 2.588215087585387e-06, "loss": 0.3312, "step": 1689 }, { "epoch": 0.5044776119402985, "grad_norm": 0.7786799854356972, "learning_rate": 2.5857991985850924e-06, "loss": 0.3236, "step": 1690 }, { "epoch": 0.5047761194029851, "grad_norm": 0.8680079607603017, "learning_rate": 2.583383229364639e-06, "loss": 0.3484, "step": 1691 }, { "epoch": 0.5050746268656716, "grad_norm": 0.867654415324763, "learning_rate": 2.580967182182898e-06, "loss": 0.3494, "step": 1692 }, { "epoch": 0.5053731343283582, "grad_norm": 0.7824582583766272, "learning_rate": 2.5785510592988156e-06, "loss": 0.3345, "step": 1693 }, { "epoch": 0.5056716417910447, "grad_norm": 0.9778668777323667, "learning_rate": 2.5761348629714073e-06, "loss": 0.3447, "step": 1694 }, { "epoch": 0.5059701492537313, "grad_norm": 0.879824672451696, "learning_rate": 2.5737185954597583e-06, "loss": 0.3486, "step": 1695 }, { "epoch": 0.506268656716418, "grad_norm": 0.7272504403686626, "learning_rate": 2.571302259023019e-06, "loss": 0.3284, "step": 1696 }, { "epoch": 0.5065671641791045, "grad_norm": 0.8269875685389965, "learning_rate": 2.5688858559204056e-06, "loss": 0.3189, "step": 1697 }, { "epoch": 0.5068656716417911, "grad_norm": 0.7107142314587015, "learning_rate": 2.5664693884111958e-06, "loss": 0.3069, "step": 1698 }, { "epoch": 0.5071641791044776, "grad_norm": 0.8300310961227638, "learning_rate": 2.564052858754728e-06, "loss": 0.2923, "step": 1699 }, { "epoch": 0.5074626865671642, "grad_norm": 0.8721700820774255, "learning_rate": 2.561636269210399e-06, "loss": 0.3377, "step": 1700 }, { "epoch": 0.5077611940298508, "grad_norm": 0.7550427576298773, "learning_rate": 2.55921962203766e-06, "loss": 0.3222, "step": 1701 }, { "epoch": 0.5080597014925373, "grad_norm": 0.7968625493472227, "learning_rate": 2.5568029194960186e-06, "loss": 0.3291, "step": 1702 }, { "epoch": 0.5083582089552239, "grad_norm": 0.7511883213331606, "learning_rate": 2.554386163845032e-06, "loss": 0.3107, "step": 1703 }, { "epoch": 0.5086567164179104, "grad_norm": 0.752772967275015, "learning_rate": 2.551969357344308e-06, "loss": 0.3579, "step": 1704 }, { "epoch": 0.508955223880597, "grad_norm": 0.7805447167279405, "learning_rate": 2.5495525022535013e-06, "loss": 0.3091, "step": 1705 }, { "epoch": 0.5092537313432836, "grad_norm": 0.7465985414472608, "learning_rate": 2.547135600832313e-06, "loss": 0.3116, "step": 1706 }, { "epoch": 0.5095522388059701, "grad_norm": 0.7452102417332194, "learning_rate": 2.544718655340486e-06, "loss": 0.3141, "step": 1707 }, { "epoch": 0.5098507462686567, "grad_norm": 0.8594711328205182, "learning_rate": 2.5423016680378076e-06, "loss": 0.3254, "step": 1708 }, { "epoch": 0.5101492537313432, "grad_norm": 0.8724045713981662, "learning_rate": 2.5398846411840998e-06, "loss": 0.3306, "step": 1709 }, { "epoch": 0.5104477611940299, "grad_norm": 0.8250540195559437, "learning_rate": 2.5374675770392247e-06, "loss": 0.2953, "step": 1710 }, { "epoch": 0.5107462686567165, "grad_norm": 0.7579796546729729, "learning_rate": 2.5350504778630795e-06, "loss": 0.3514, "step": 1711 }, { "epoch": 0.511044776119403, "grad_norm": 0.7968045617982358, "learning_rate": 2.5326333459155904e-06, "loss": 0.3443, "step": 1712 }, { "epoch": 0.5113432835820896, "grad_norm": 0.7725314533480072, "learning_rate": 2.530216183456719e-06, "loss": 0.3345, "step": 1713 }, { "epoch": 0.5116417910447761, "grad_norm": 0.8181569525639177, "learning_rate": 2.527798992746453e-06, "loss": 0.3061, "step": 1714 }, { "epoch": 0.5119402985074627, "grad_norm": 0.8292279194686958, "learning_rate": 2.525381776044806e-06, "loss": 0.3494, "step": 1715 }, { "epoch": 0.5122388059701493, "grad_norm": 0.9155665246514125, "learning_rate": 2.5229645356118166e-06, "loss": 0.3805, "step": 1716 }, { "epoch": 0.5125373134328358, "grad_norm": 0.8426243739571815, "learning_rate": 2.520547273707546e-06, "loss": 0.3978, "step": 1717 }, { "epoch": 0.5128358208955224, "grad_norm": 0.7578253079200755, "learning_rate": 2.5181299925920756e-06, "loss": 0.3387, "step": 1718 }, { "epoch": 0.5131343283582089, "grad_norm": 0.7152262315331874, "learning_rate": 2.5157126945255043e-06, "loss": 0.2707, "step": 1719 }, { "epoch": 0.5134328358208955, "grad_norm": 0.7803958098892375, "learning_rate": 2.5132953817679466e-06, "loss": 0.3587, "step": 1720 }, { "epoch": 0.513731343283582, "grad_norm": 0.7380296998637139, "learning_rate": 2.510878056579531e-06, "loss": 0.3272, "step": 1721 }, { "epoch": 0.5140298507462686, "grad_norm": 0.8282169867776825, "learning_rate": 2.5084607212203983e-06, "loss": 0.3618, "step": 1722 }, { "epoch": 0.5143283582089552, "grad_norm": 0.8758117401270619, "learning_rate": 2.5060433779506966e-06, "loss": 0.3096, "step": 1723 }, { "epoch": 0.5146268656716418, "grad_norm": 0.9027136861131375, "learning_rate": 2.5036260290305837e-06, "loss": 0.3365, "step": 1724 }, { "epoch": 0.5149253731343284, "grad_norm": 0.8734789095232062, "learning_rate": 2.501208676720223e-06, "loss": 0.3564, "step": 1725 }, { "epoch": 0.515223880597015, "grad_norm": 0.9223885876305432, "learning_rate": 2.498791323279778e-06, "loss": 0.361, "step": 1726 }, { "epoch": 0.5155223880597015, "grad_norm": 0.7412513939737528, "learning_rate": 2.496373970969417e-06, "loss": 0.2946, "step": 1727 }, { "epoch": 0.5158208955223881, "grad_norm": 0.8125704355183377, "learning_rate": 2.4939566220493043e-06, "loss": 0.3597, "step": 1728 }, { "epoch": 0.5161194029850746, "grad_norm": 0.875058571982458, "learning_rate": 2.491539278779603e-06, "loss": 0.3321, "step": 1729 }, { "epoch": 0.5164179104477612, "grad_norm": 1.4129333220086666, "learning_rate": 2.48912194342047e-06, "loss": 0.3021, "step": 1730 }, { "epoch": 0.5167164179104478, "grad_norm": 0.822837577529842, "learning_rate": 2.486704618232054e-06, "loss": 0.2875, "step": 1731 }, { "epoch": 0.5170149253731343, "grad_norm": 0.8508719610068957, "learning_rate": 2.484287305474496e-06, "loss": 0.332, "step": 1732 }, { "epoch": 0.5173134328358209, "grad_norm": 0.8371266747144144, "learning_rate": 2.481870007407925e-06, "loss": 0.3505, "step": 1733 }, { "epoch": 0.5176119402985074, "grad_norm": 0.7674347672744616, "learning_rate": 2.4794527262924546e-06, "loss": 0.3378, "step": 1734 }, { "epoch": 0.517910447761194, "grad_norm": 0.9297458178829942, "learning_rate": 2.4770354643881843e-06, "loss": 0.3841, "step": 1735 }, { "epoch": 0.5182089552238806, "grad_norm": 0.9461877714567667, "learning_rate": 2.474618223955196e-06, "loss": 0.3962, "step": 1736 }, { "epoch": 0.5185074626865671, "grad_norm": 0.9240923700130257, "learning_rate": 2.4722010072535485e-06, "loss": 0.3352, "step": 1737 }, { "epoch": 0.5188059701492538, "grad_norm": 0.7914811230349772, "learning_rate": 2.4697838165432816e-06, "loss": 0.351, "step": 1738 }, { "epoch": 0.5191044776119403, "grad_norm": 0.9433458310818481, "learning_rate": 2.4673666540844105e-06, "loss": 0.3725, "step": 1739 }, { "epoch": 0.5194029850746269, "grad_norm": 0.8700776482081186, "learning_rate": 2.4649495221369218e-06, "loss": 0.3117, "step": 1740 }, { "epoch": 0.5197014925373135, "grad_norm": 0.7373330115096584, "learning_rate": 2.462532422960776e-06, "loss": 0.3273, "step": 1741 }, { "epoch": 0.52, "grad_norm": 0.7741460492182761, "learning_rate": 2.460115358815901e-06, "loss": 0.3185, "step": 1742 }, { "epoch": 0.5202985074626866, "grad_norm": 0.9184552422451486, "learning_rate": 2.457698331962193e-06, "loss": 0.3499, "step": 1743 }, { "epoch": 0.5205970149253731, "grad_norm": 0.7416538846201887, "learning_rate": 2.4552813446595148e-06, "loss": 0.3278, "step": 1744 }, { "epoch": 0.5208955223880597, "grad_norm": 0.8354557623430109, "learning_rate": 2.4528643991676875e-06, "loss": 0.3341, "step": 1745 }, { "epoch": 0.5211940298507463, "grad_norm": 0.8538898399358935, "learning_rate": 2.450447497746499e-06, "loss": 0.3757, "step": 1746 }, { "epoch": 0.5214925373134328, "grad_norm": 0.7395052286353708, "learning_rate": 2.4480306426556925e-06, "loss": 0.2997, "step": 1747 }, { "epoch": 0.5217910447761194, "grad_norm": 0.7583313377731395, "learning_rate": 2.4456138361549678e-06, "loss": 0.3343, "step": 1748 }, { "epoch": 0.5220895522388059, "grad_norm": 0.8107731144138431, "learning_rate": 2.4431970805039814e-06, "loss": 0.2842, "step": 1749 }, { "epoch": 0.5223880597014925, "grad_norm": 0.7658894882988232, "learning_rate": 2.4407803779623394e-06, "loss": 0.3124, "step": 1750 }, { "epoch": 0.522686567164179, "grad_norm": 0.8719707167891656, "learning_rate": 2.4383637307896017e-06, "loss": 0.2977, "step": 1751 }, { "epoch": 0.5229850746268657, "grad_norm": 0.8510394601889362, "learning_rate": 2.4359471412452724e-06, "loss": 0.3698, "step": 1752 }, { "epoch": 0.5232835820895523, "grad_norm": 0.9805015870419773, "learning_rate": 2.4335306115888046e-06, "loss": 0.3421, "step": 1753 }, { "epoch": 0.5235820895522388, "grad_norm": 0.884081775452208, "learning_rate": 2.4311141440795956e-06, "loss": 0.3362, "step": 1754 }, { "epoch": 0.5238805970149254, "grad_norm": 0.8500753318797264, "learning_rate": 2.4286977409769818e-06, "loss": 0.3223, "step": 1755 }, { "epoch": 0.524179104477612, "grad_norm": 0.7775675727762501, "learning_rate": 2.4262814045402425e-06, "loss": 0.3587, "step": 1756 }, { "epoch": 0.5244776119402985, "grad_norm": 0.8771202940506111, "learning_rate": 2.4238651370285936e-06, "loss": 0.3609, "step": 1757 }, { "epoch": 0.5247761194029851, "grad_norm": 0.9343708174797646, "learning_rate": 2.421448940701185e-06, "loss": 0.3788, "step": 1758 }, { "epoch": 0.5250746268656716, "grad_norm": 0.7830270720430653, "learning_rate": 2.4190328178171026e-06, "loss": 0.3327, "step": 1759 }, { "epoch": 0.5253731343283582, "grad_norm": 0.7893636098117658, "learning_rate": 2.4166167706353623e-06, "loss": 0.3195, "step": 1760 }, { "epoch": 0.5256716417910448, "grad_norm": 0.8111132911057031, "learning_rate": 2.414200801414908e-06, "loss": 0.3735, "step": 1761 }, { "epoch": 0.5259701492537313, "grad_norm": 0.9047377513665926, "learning_rate": 2.4117849124146136e-06, "loss": 0.3782, "step": 1762 }, { "epoch": 0.5262686567164179, "grad_norm": 0.8119207582887555, "learning_rate": 2.4093691058932765e-06, "loss": 0.3387, "step": 1763 }, { "epoch": 0.5265671641791044, "grad_norm": 0.7829803719766413, "learning_rate": 2.4069533841096153e-06, "loss": 0.3503, "step": 1764 }, { "epoch": 0.5268656716417911, "grad_norm": 0.8065607770759078, "learning_rate": 2.404537749322273e-06, "loss": 0.3407, "step": 1765 }, { "epoch": 0.5271641791044777, "grad_norm": 0.8371021912735477, "learning_rate": 2.4021222037898066e-06, "loss": 0.3698, "step": 1766 }, { "epoch": 0.5274626865671642, "grad_norm": 0.8296224322060857, "learning_rate": 2.399706749770695e-06, "loss": 0.2996, "step": 1767 }, { "epoch": 0.5277611940298508, "grad_norm": 0.7980329495819588, "learning_rate": 2.3972913895233278e-06, "loss": 0.3164, "step": 1768 }, { "epoch": 0.5280597014925373, "grad_norm": 0.84030721922861, "learning_rate": 2.394876125306007e-06, "loss": 0.3384, "step": 1769 }, { "epoch": 0.5283582089552239, "grad_norm": 0.8654667250374329, "learning_rate": 2.392460959376947e-06, "loss": 0.3168, "step": 1770 }, { "epoch": 0.5286567164179105, "grad_norm": 0.8688067439209197, "learning_rate": 2.3900458939942696e-06, "loss": 0.3503, "step": 1771 }, { "epoch": 0.528955223880597, "grad_norm": 0.7877176392427366, "learning_rate": 2.387630931416e-06, "loss": 0.3048, "step": 1772 }, { "epoch": 0.5292537313432836, "grad_norm": 0.8446237767654405, "learning_rate": 2.3852160739000706e-06, "loss": 0.3626, "step": 1773 }, { "epoch": 0.5295522388059701, "grad_norm": 0.8025405273097861, "learning_rate": 2.382801323704315e-06, "loss": 0.3284, "step": 1774 }, { "epoch": 0.5298507462686567, "grad_norm": 0.9052209154464622, "learning_rate": 2.380386683086465e-06, "loss": 0.3916, "step": 1775 }, { "epoch": 0.5301492537313433, "grad_norm": 0.83787822521044, "learning_rate": 2.3779721543041504e-06, "loss": 0.3503, "step": 1776 }, { "epoch": 0.5304477611940298, "grad_norm": 0.8029033496086618, "learning_rate": 2.3755577396148967e-06, "loss": 0.3198, "step": 1777 }, { "epoch": 0.5307462686567164, "grad_norm": 0.9561232832711954, "learning_rate": 2.3731434412761236e-06, "loss": 0.3653, "step": 1778 }, { "epoch": 0.531044776119403, "grad_norm": 0.8072801892942726, "learning_rate": 2.370729261545141e-06, "loss": 0.34, "step": 1779 }, { "epoch": 0.5313432835820896, "grad_norm": 0.7980807938842572, "learning_rate": 2.3683152026791473e-06, "loss": 0.2973, "step": 1780 }, { "epoch": 0.5316417910447762, "grad_norm": 0.8316738799919338, "learning_rate": 2.365901266935229e-06, "loss": 0.3422, "step": 1781 }, { "epoch": 0.5319402985074627, "grad_norm": 0.7980662515304342, "learning_rate": 2.363487456570359e-06, "loss": 0.3575, "step": 1782 }, { "epoch": 0.5322388059701493, "grad_norm": 0.833051789155456, "learning_rate": 2.3610737738413872e-06, "loss": 0.3391, "step": 1783 }, { "epoch": 0.5325373134328358, "grad_norm": 0.8007945992425828, "learning_rate": 2.3586602210050525e-06, "loss": 0.2989, "step": 1784 }, { "epoch": 0.5328358208955224, "grad_norm": 0.9029970072294182, "learning_rate": 2.356246800317965e-06, "loss": 0.3445, "step": 1785 }, { "epoch": 0.533134328358209, "grad_norm": 0.990141115180327, "learning_rate": 2.353833514036616e-06, "loss": 0.3393, "step": 1786 }, { "epoch": 0.5334328358208955, "grad_norm": 0.8954234000787314, "learning_rate": 2.351420364417368e-06, "loss": 0.3891, "step": 1787 }, { "epoch": 0.5337313432835821, "grad_norm": 0.8292622437441727, "learning_rate": 2.349007353716458e-06, "loss": 0.344, "step": 1788 }, { "epoch": 0.5340298507462686, "grad_norm": 0.8448034395608284, "learning_rate": 2.3465944841899916e-06, "loss": 0.3248, "step": 1789 }, { "epoch": 0.5343283582089552, "grad_norm": 0.9091150167264239, "learning_rate": 2.344181758093943e-06, "loss": 0.3164, "step": 1790 }, { "epoch": 0.5346268656716417, "grad_norm": 0.77933196910969, "learning_rate": 2.3417691776841532e-06, "loss": 0.3361, "step": 1791 }, { "epoch": 0.5349253731343283, "grad_norm": 0.8157989161197443, "learning_rate": 2.339356745216325e-06, "loss": 0.2778, "step": 1792 }, { "epoch": 0.535223880597015, "grad_norm": 0.7979239591724641, "learning_rate": 2.336944462946024e-06, "loss": 0.3471, "step": 1793 }, { "epoch": 0.5355223880597015, "grad_norm": 0.8373875356209881, "learning_rate": 2.3345323331286763e-06, "loss": 0.3221, "step": 1794 }, { "epoch": 0.5358208955223881, "grad_norm": 0.8199518664561318, "learning_rate": 2.3321203580195635e-06, "loss": 0.332, "step": 1795 }, { "epoch": 0.5361194029850747, "grad_norm": 0.8762948103305159, "learning_rate": 2.329708539873824e-06, "loss": 0.3909, "step": 1796 }, { "epoch": 0.5364179104477612, "grad_norm": 0.8353475092926272, "learning_rate": 2.3272968809464486e-06, "loss": 0.3553, "step": 1797 }, { "epoch": 0.5367164179104478, "grad_norm": 0.8759847723242117, "learning_rate": 2.3248853834922803e-06, "loss": 0.3686, "step": 1798 }, { "epoch": 0.5370149253731343, "grad_norm": 0.8615201742620705, "learning_rate": 2.3224740497660096e-06, "loss": 0.3754, "step": 1799 }, { "epoch": 0.5373134328358209, "grad_norm": 0.9099685739043476, "learning_rate": 2.3200628820221756e-06, "loss": 0.3581, "step": 1800 }, { "epoch": 0.5376119402985075, "grad_norm": 0.777596772205429, "learning_rate": 2.3176518825151608e-06, "loss": 0.3378, "step": 1801 }, { "epoch": 0.537910447761194, "grad_norm": 0.8450144255131784, "learning_rate": 2.315241053499191e-06, "loss": 0.3387, "step": 1802 }, { "epoch": 0.5382089552238806, "grad_norm": 0.9315730643786867, "learning_rate": 2.3128303972283327e-06, "loss": 0.3867, "step": 1803 }, { "epoch": 0.5385074626865671, "grad_norm": 0.8448720469427933, "learning_rate": 2.3104199159564902e-06, "loss": 0.3539, "step": 1804 }, { "epoch": 0.5388059701492537, "grad_norm": 0.8164611944170581, "learning_rate": 2.308009611937406e-06, "loss": 0.3366, "step": 1805 }, { "epoch": 0.5391044776119402, "grad_norm": 0.8705803506185734, "learning_rate": 2.3055994874246544e-06, "loss": 0.3427, "step": 1806 }, { "epoch": 0.5394029850746269, "grad_norm": 0.8111592075727269, "learning_rate": 2.3031895446716438e-06, "loss": 0.3498, "step": 1807 }, { "epoch": 0.5397014925373135, "grad_norm": 0.82443436935079, "learning_rate": 2.300779785931611e-06, "loss": 0.3402, "step": 1808 }, { "epoch": 0.54, "grad_norm": 0.7893728042942114, "learning_rate": 2.2983702134576237e-06, "loss": 0.3534, "step": 1809 }, { "epoch": 0.5402985074626866, "grad_norm": 0.7788581120840403, "learning_rate": 2.295960829502571e-06, "loss": 0.3632, "step": 1810 }, { "epoch": 0.5405970149253732, "grad_norm": 0.7144098288588688, "learning_rate": 2.2935516363191695e-06, "loss": 0.3292, "step": 1811 }, { "epoch": 0.5408955223880597, "grad_norm": 0.8718613502814399, "learning_rate": 2.2911426361599563e-06, "loss": 0.3914, "step": 1812 }, { "epoch": 0.5411940298507463, "grad_norm": 0.8140978897388683, "learning_rate": 2.288733831277287e-06, "loss": 0.3285, "step": 1813 }, { "epoch": 0.5414925373134328, "grad_norm": 0.9065681350482651, "learning_rate": 2.286325223923336e-06, "loss": 0.3734, "step": 1814 }, { "epoch": 0.5417910447761194, "grad_norm": 0.8578142995410984, "learning_rate": 2.283916816350092e-06, "loss": 0.3417, "step": 1815 }, { "epoch": 0.542089552238806, "grad_norm": 0.8951438939421517, "learning_rate": 2.2815086108093575e-06, "loss": 0.3678, "step": 1816 }, { "epoch": 0.5423880597014925, "grad_norm": 0.7710865449797061, "learning_rate": 2.279100609552747e-06, "loss": 0.3212, "step": 1817 }, { "epoch": 0.5426865671641791, "grad_norm": 0.8413189744305332, "learning_rate": 2.2766928148316815e-06, "loss": 0.3546, "step": 1818 }, { "epoch": 0.5429850746268656, "grad_norm": 0.7918974236582986, "learning_rate": 2.274285228897391e-06, "loss": 0.3157, "step": 1819 }, { "epoch": 0.5432835820895522, "grad_norm": 0.8039993848081551, "learning_rate": 2.271877854000909e-06, "loss": 0.3115, "step": 1820 }, { "epoch": 0.5435820895522389, "grad_norm": 0.8401858367747947, "learning_rate": 2.2694706923930737e-06, "loss": 0.3542, "step": 1821 }, { "epoch": 0.5438805970149254, "grad_norm": 0.7815175224613609, "learning_rate": 2.2670637463245223e-06, "loss": 0.3417, "step": 1822 }, { "epoch": 0.544179104477612, "grad_norm": 0.8583325595180437, "learning_rate": 2.2646570180456896e-06, "loss": 0.3806, "step": 1823 }, { "epoch": 0.5444776119402985, "grad_norm": 0.8765871329748808, "learning_rate": 2.2622505098068098e-06, "loss": 0.3702, "step": 1824 }, { "epoch": 0.5447761194029851, "grad_norm": 0.7542775615348616, "learning_rate": 2.2598442238579067e-06, "loss": 0.3352, "step": 1825 }, { "epoch": 0.5450746268656717, "grad_norm": 0.8460036442491983, "learning_rate": 2.2574381624488008e-06, "loss": 0.3481, "step": 1826 }, { "epoch": 0.5453731343283582, "grad_norm": 0.8043856469909816, "learning_rate": 2.2550323278291013e-06, "loss": 0.3685, "step": 1827 }, { "epoch": 0.5456716417910448, "grad_norm": 0.8739643120061339, "learning_rate": 2.2526267222482035e-06, "loss": 0.346, "step": 1828 }, { "epoch": 0.5459701492537313, "grad_norm": 0.9831654255410001, "learning_rate": 2.250221347955291e-06, "loss": 0.3252, "step": 1829 }, { "epoch": 0.5462686567164179, "grad_norm": 0.8015068502149022, "learning_rate": 2.2478162071993296e-06, "loss": 0.357, "step": 1830 }, { "epoch": 0.5465671641791044, "grad_norm": 0.7756446807152939, "learning_rate": 2.2454113022290676e-06, "loss": 0.3478, "step": 1831 }, { "epoch": 0.546865671641791, "grad_norm": 0.9555139693052305, "learning_rate": 2.2430066352930323e-06, "loss": 0.3294, "step": 1832 }, { "epoch": 0.5471641791044776, "grad_norm": 0.8021249504547362, "learning_rate": 2.240602208639529e-06, "loss": 0.3343, "step": 1833 }, { "epoch": 0.5474626865671641, "grad_norm": 0.7978682944455774, "learning_rate": 2.238198024516637e-06, "loss": 0.3285, "step": 1834 }, { "epoch": 0.5477611940298508, "grad_norm": 0.8124634613968112, "learning_rate": 2.2357940851722114e-06, "loss": 0.336, "step": 1835 }, { "epoch": 0.5480597014925374, "grad_norm": 0.8713005908230844, "learning_rate": 2.2333903928538765e-06, "loss": 0.3149, "step": 1836 }, { "epoch": 0.5483582089552239, "grad_norm": 0.9595459470624672, "learning_rate": 2.230986949809025e-06, "loss": 0.3468, "step": 1837 }, { "epoch": 0.5486567164179105, "grad_norm": 0.8161907559262533, "learning_rate": 2.2285837582848185e-06, "loss": 0.3801, "step": 1838 }, { "epoch": 0.548955223880597, "grad_norm": 0.8893397151532759, "learning_rate": 2.226180820528182e-06, "loss": 0.3721, "step": 1839 }, { "epoch": 0.5492537313432836, "grad_norm": 0.7700788867504089, "learning_rate": 2.223778138785804e-06, "loss": 0.3014, "step": 1840 }, { "epoch": 0.5495522388059702, "grad_norm": 0.871007482090454, "learning_rate": 2.2213757153041337e-06, "loss": 0.3615, "step": 1841 }, { "epoch": 0.5498507462686567, "grad_norm": 0.8633799398623063, "learning_rate": 2.2189735523293777e-06, "loss": 0.361, "step": 1842 }, { "epoch": 0.5501492537313433, "grad_norm": 0.8645468422430543, "learning_rate": 2.2165716521075e-06, "loss": 0.3533, "step": 1843 }, { "epoch": 0.5504477611940298, "grad_norm": 0.8540892598497185, "learning_rate": 2.21417001688422e-06, "loss": 0.3592, "step": 1844 }, { "epoch": 0.5507462686567164, "grad_norm": 0.8694634042599341, "learning_rate": 2.211768648905006e-06, "loss": 0.352, "step": 1845 }, { "epoch": 0.551044776119403, "grad_norm": 0.7911027777874611, "learning_rate": 2.20936755041508e-06, "loss": 0.3108, "step": 1846 }, { "epoch": 0.5513432835820895, "grad_norm": 0.8066430351634646, "learning_rate": 2.2069667236594104e-06, "loss": 0.2852, "step": 1847 }, { "epoch": 0.5516417910447761, "grad_norm": 0.8185535177696429, "learning_rate": 2.204566170882711e-06, "loss": 0.34, "step": 1848 }, { "epoch": 0.5519402985074627, "grad_norm": 0.7993975027083716, "learning_rate": 2.202165894329441e-06, "loss": 0.3781, "step": 1849 }, { "epoch": 0.5522388059701493, "grad_norm": 0.849262989720656, "learning_rate": 2.1997658962438003e-06, "loss": 0.3165, "step": 1850 }, { "epoch": 0.5525373134328359, "grad_norm": 0.7278759041881264, "learning_rate": 2.197366178869728e-06, "loss": 0.2826, "step": 1851 }, { "epoch": 0.5528358208955224, "grad_norm": 0.828521762307412, "learning_rate": 2.1949667444509025e-06, "loss": 0.3265, "step": 1852 }, { "epoch": 0.553134328358209, "grad_norm": 0.7855801046085621, "learning_rate": 2.192567595230735e-06, "loss": 0.319, "step": 1853 }, { "epoch": 0.5534328358208955, "grad_norm": 0.7381825111391765, "learning_rate": 2.190168733452372e-06, "loss": 0.3203, "step": 1854 }, { "epoch": 0.5537313432835821, "grad_norm": 0.8446345881014914, "learning_rate": 2.187770161358692e-06, "loss": 0.3271, "step": 1855 }, { "epoch": 0.5540298507462686, "grad_norm": 0.725953827886251, "learning_rate": 2.1853718811922996e-06, "loss": 0.3016, "step": 1856 }, { "epoch": 0.5543283582089552, "grad_norm": 0.8971268582384204, "learning_rate": 2.1829738951955295e-06, "loss": 0.3595, "step": 1857 }, { "epoch": 0.5546268656716418, "grad_norm": 0.8332215625700863, "learning_rate": 2.180576205610439e-06, "loss": 0.2938, "step": 1858 }, { "epoch": 0.5549253731343283, "grad_norm": 0.8275970156258079, "learning_rate": 2.178178814678812e-06, "loss": 0.3862, "step": 1859 }, { "epoch": 0.5552238805970149, "grad_norm": 0.86905690858234, "learning_rate": 2.1757817246421477e-06, "loss": 0.3728, "step": 1860 }, { "epoch": 0.5555223880597014, "grad_norm": 0.7889094120217989, "learning_rate": 2.173384937741668e-06, "loss": 0.307, "step": 1861 }, { "epoch": 0.5558208955223881, "grad_norm": 0.9468308573254213, "learning_rate": 2.1709884562183114e-06, "loss": 0.3373, "step": 1862 }, { "epoch": 0.5561194029850747, "grad_norm": 0.8358643562239164, "learning_rate": 2.1685922823127276e-06, "loss": 0.3595, "step": 1863 }, { "epoch": 0.5564179104477612, "grad_norm": 0.8195541395992418, "learning_rate": 2.166196418265282e-06, "loss": 0.3175, "step": 1864 }, { "epoch": 0.5567164179104478, "grad_norm": 0.8872518656245127, "learning_rate": 2.163800866316049e-06, "loss": 0.3329, "step": 1865 }, { "epoch": 0.5570149253731344, "grad_norm": 0.8974786463656703, "learning_rate": 2.161405628704811e-06, "loss": 0.3594, "step": 1866 }, { "epoch": 0.5573134328358209, "grad_norm": 0.9090184559871817, "learning_rate": 2.1590107076710567e-06, "loss": 0.341, "step": 1867 }, { "epoch": 0.5576119402985075, "grad_norm": 0.795923872852375, "learning_rate": 2.1566161054539797e-06, "loss": 0.3174, "step": 1868 }, { "epoch": 0.557910447761194, "grad_norm": 0.836212534397218, "learning_rate": 2.1542218242924737e-06, "loss": 0.3334, "step": 1869 }, { "epoch": 0.5582089552238806, "grad_norm": 0.9386342551389201, "learning_rate": 2.1518278664251334e-06, "loss": 0.3427, "step": 1870 }, { "epoch": 0.5585074626865671, "grad_norm": 0.8270322091053605, "learning_rate": 2.1494342340902518e-06, "loss": 0.3581, "step": 1871 }, { "epoch": 0.5588059701492537, "grad_norm": 0.8355544694865434, "learning_rate": 2.147040929525816e-06, "loss": 0.3277, "step": 1872 }, { "epoch": 0.5591044776119403, "grad_norm": 0.7760309647668585, "learning_rate": 2.1446479549695083e-06, "loss": 0.3003, "step": 1873 }, { "epoch": 0.5594029850746268, "grad_norm": 0.8285302940910255, "learning_rate": 2.1422553126587e-06, "loss": 0.3398, "step": 1874 }, { "epoch": 0.5597014925373134, "grad_norm": 0.8028111781327222, "learning_rate": 2.139863004830455e-06, "loss": 0.3068, "step": 1875 }, { "epoch": 0.56, "grad_norm": 0.9241018551228057, "learning_rate": 2.1374710337215223e-06, "loss": 0.3117, "step": 1876 }, { "epoch": 0.5602985074626866, "grad_norm": 0.8859437856608664, "learning_rate": 2.1350794015683356e-06, "loss": 0.3366, "step": 1877 }, { "epoch": 0.5605970149253732, "grad_norm": 0.8066823845490054, "learning_rate": 2.1326881106070137e-06, "loss": 0.3239, "step": 1878 }, { "epoch": 0.5608955223880597, "grad_norm": 0.8280625069457178, "learning_rate": 2.1302971630733553e-06, "loss": 0.3256, "step": 1879 }, { "epoch": 0.5611940298507463, "grad_norm": 0.7654383674574374, "learning_rate": 2.127906561202838e-06, "loss": 0.3152, "step": 1880 }, { "epoch": 0.5614925373134328, "grad_norm": 0.8019819178042276, "learning_rate": 2.125516307230616e-06, "loss": 0.3496, "step": 1881 }, { "epoch": 0.5617910447761194, "grad_norm": 0.8424062174944292, "learning_rate": 2.1231264033915188e-06, "loss": 0.3457, "step": 1882 }, { "epoch": 0.562089552238806, "grad_norm": 1.0405265753175954, "learning_rate": 2.1207368519200483e-06, "loss": 0.3606, "step": 1883 }, { "epoch": 0.5623880597014925, "grad_norm": 0.8012391807155793, "learning_rate": 2.1183476550503763e-06, "loss": 0.3336, "step": 1884 }, { "epoch": 0.5626865671641791, "grad_norm": 0.8303562700296768, "learning_rate": 2.1159588150163447e-06, "loss": 0.3525, "step": 1885 }, { "epoch": 0.5629850746268656, "grad_norm": 0.8141402829710194, "learning_rate": 2.113570334051459e-06, "loss": 0.3743, "step": 1886 }, { "epoch": 0.5632835820895522, "grad_norm": 0.7825708947108343, "learning_rate": 2.111182214388893e-06, "loss": 0.2933, "step": 1887 }, { "epoch": 0.5635820895522388, "grad_norm": 0.8309709447567482, "learning_rate": 2.108794458261478e-06, "loss": 0.3238, "step": 1888 }, { "epoch": 0.5638805970149253, "grad_norm": 0.8224296447050079, "learning_rate": 2.106407067901709e-06, "loss": 0.3258, "step": 1889 }, { "epoch": 0.564179104477612, "grad_norm": 0.9018945970876204, "learning_rate": 2.104020045541739e-06, "loss": 0.3827, "step": 1890 }, { "epoch": 0.5644776119402986, "grad_norm": 0.8513253106321182, "learning_rate": 2.1016333934133727e-06, "loss": 0.3399, "step": 1891 }, { "epoch": 0.5647761194029851, "grad_norm": 0.9164770773291085, "learning_rate": 2.099247113748074e-06, "loss": 0.3529, "step": 1892 }, { "epoch": 0.5650746268656717, "grad_norm": 0.8334813106827352, "learning_rate": 2.096861208776956e-06, "loss": 0.342, "step": 1893 }, { "epoch": 0.5653731343283582, "grad_norm": 0.79013138961232, "learning_rate": 2.0944756807307805e-06, "loss": 0.3404, "step": 1894 }, { "epoch": 0.5656716417910448, "grad_norm": 0.8495550812479902, "learning_rate": 2.092090531839959e-06, "loss": 0.3307, "step": 1895 }, { "epoch": 0.5659701492537313, "grad_norm": 0.8325862898480291, "learning_rate": 2.089705764334547e-06, "loss": 0.3557, "step": 1896 }, { "epoch": 0.5662686567164179, "grad_norm": 0.8222800051067589, "learning_rate": 2.0873213804442454e-06, "loss": 0.3185, "step": 1897 }, { "epoch": 0.5665671641791045, "grad_norm": 0.7723465046550894, "learning_rate": 2.0849373823983935e-06, "loss": 0.3464, "step": 1898 }, { "epoch": 0.566865671641791, "grad_norm": 0.8615414329369864, "learning_rate": 2.082553772425972e-06, "loss": 0.3505, "step": 1899 }, { "epoch": 0.5671641791044776, "grad_norm": 0.8521312443511204, "learning_rate": 2.080170552755598e-06, "loss": 0.338, "step": 1900 }, { "epoch": 0.5674626865671641, "grad_norm": 0.8669205691719605, "learning_rate": 2.077787725615524e-06, "loss": 0.3645, "step": 1901 }, { "epoch": 0.5677611940298507, "grad_norm": 0.750771605956206, "learning_rate": 2.075405293233635e-06, "loss": 0.2969, "step": 1902 }, { "epoch": 0.5680597014925373, "grad_norm": 0.770946338972137, "learning_rate": 2.073023257837448e-06, "loss": 0.3189, "step": 1903 }, { "epoch": 0.5683582089552239, "grad_norm": 0.8472428925223515, "learning_rate": 2.070641621654106e-06, "loss": 0.3146, "step": 1904 }, { "epoch": 0.5686567164179105, "grad_norm": 0.8076339095091497, "learning_rate": 2.068260386910383e-06, "loss": 0.3259, "step": 1905 }, { "epoch": 0.568955223880597, "grad_norm": 0.7546439694354958, "learning_rate": 2.0658795558326745e-06, "loss": 0.3046, "step": 1906 }, { "epoch": 0.5692537313432836, "grad_norm": 0.7863545601633702, "learning_rate": 2.063499130646999e-06, "loss": 0.3119, "step": 1907 }, { "epoch": 0.5695522388059702, "grad_norm": 0.9196917659773726, "learning_rate": 2.0611191135789972e-06, "loss": 0.3762, "step": 1908 }, { "epoch": 0.5698507462686567, "grad_norm": 0.8725237416065241, "learning_rate": 2.0587395068539268e-06, "loss": 0.3656, "step": 1909 }, { "epoch": 0.5701492537313433, "grad_norm": 0.8403813292122189, "learning_rate": 2.056360312696661e-06, "loss": 0.3555, "step": 1910 }, { "epoch": 0.5704477611940298, "grad_norm": 0.739162644070513, "learning_rate": 2.0539815333316896e-06, "loss": 0.3363, "step": 1911 }, { "epoch": 0.5707462686567164, "grad_norm": 0.8072527202747288, "learning_rate": 2.0516031709831124e-06, "loss": 0.3338, "step": 1912 }, { "epoch": 0.571044776119403, "grad_norm": 0.7551658882134142, "learning_rate": 2.0492252278746406e-06, "loss": 0.3133, "step": 1913 }, { "epoch": 0.5713432835820895, "grad_norm": 0.8610162991085163, "learning_rate": 2.046847706229594e-06, "loss": 0.3595, "step": 1914 }, { "epoch": 0.5716417910447761, "grad_norm": 0.8314973416295284, "learning_rate": 2.0444706082708957e-06, "loss": 0.3151, "step": 1915 }, { "epoch": 0.5719402985074626, "grad_norm": 0.7775346420782155, "learning_rate": 2.042093936221075e-06, "loss": 0.328, "step": 1916 }, { "epoch": 0.5722388059701492, "grad_norm": 1.4529917092934526, "learning_rate": 2.039717692302263e-06, "loss": 0.3158, "step": 1917 }, { "epoch": 0.5725373134328359, "grad_norm": 0.8832461020890808, "learning_rate": 2.0373418787361886e-06, "loss": 0.3249, "step": 1918 }, { "epoch": 0.5728358208955224, "grad_norm": 0.8529873184087406, "learning_rate": 2.0349664977441806e-06, "loss": 0.3627, "step": 1919 }, { "epoch": 0.573134328358209, "grad_norm": 0.8801116310966653, "learning_rate": 2.0325915515471627e-06, "loss": 0.3887, "step": 1920 }, { "epoch": 0.5734328358208955, "grad_norm": 0.8146237688887484, "learning_rate": 2.03021704236565e-06, "loss": 0.3567, "step": 1921 }, { "epoch": 0.5737313432835821, "grad_norm": 0.8299437970321636, "learning_rate": 2.0278429724197517e-06, "loss": 0.3358, "step": 1922 }, { "epoch": 0.5740298507462687, "grad_norm": 0.8343117163911652, "learning_rate": 2.0254693439291665e-06, "loss": 0.3466, "step": 1923 }, { "epoch": 0.5743283582089552, "grad_norm": 0.7664529851147533, "learning_rate": 2.023096159113177e-06, "loss": 0.3201, "step": 1924 }, { "epoch": 0.5746268656716418, "grad_norm": 0.8295280556984195, "learning_rate": 2.0207234201906546e-06, "loss": 0.3285, "step": 1925 }, { "epoch": 0.5749253731343283, "grad_norm": 0.9191354365414987, "learning_rate": 2.018351129380052e-06, "loss": 0.3826, "step": 1926 }, { "epoch": 0.5752238805970149, "grad_norm": 0.7452870603167776, "learning_rate": 2.0159792888994025e-06, "loss": 0.3166, "step": 1927 }, { "epoch": 0.5755223880597015, "grad_norm": 1.0403191478557996, "learning_rate": 2.0136079009663205e-06, "loss": 0.3165, "step": 1928 }, { "epoch": 0.575820895522388, "grad_norm": 0.8016626447497367, "learning_rate": 2.0112369677979955e-06, "loss": 0.3246, "step": 1929 }, { "epoch": 0.5761194029850746, "grad_norm": 0.8057426738001215, "learning_rate": 2.008866491611191e-06, "loss": 0.2706, "step": 1930 }, { "epoch": 0.5764179104477611, "grad_norm": 0.8464060075703335, "learning_rate": 2.0064964746222464e-06, "loss": 0.3619, "step": 1931 }, { "epoch": 0.5767164179104478, "grad_norm": 0.7572279430026017, "learning_rate": 2.0041269190470687e-06, "loss": 0.3071, "step": 1932 }, { "epoch": 0.5770149253731344, "grad_norm": 0.8070777794735167, "learning_rate": 2.001757827101134e-06, "loss": 0.3298, "step": 1933 }, { "epoch": 0.5773134328358209, "grad_norm": 0.7406332845203685, "learning_rate": 1.9993892009994862e-06, "loss": 0.31, "step": 1934 }, { "epoch": 0.5776119402985075, "grad_norm": 0.8057738674065215, "learning_rate": 1.9970210429567343e-06, "loss": 0.3689, "step": 1935 }, { "epoch": 0.577910447761194, "grad_norm": 0.8334636514302126, "learning_rate": 1.9946533551870465e-06, "loss": 0.3531, "step": 1936 }, { "epoch": 0.5782089552238806, "grad_norm": 0.7674289139447094, "learning_rate": 1.9922861399041537e-06, "loss": 0.357, "step": 1937 }, { "epoch": 0.5785074626865672, "grad_norm": 0.8048568434131651, "learning_rate": 1.989919399321345e-06, "loss": 0.346, "step": 1938 }, { "epoch": 0.5788059701492537, "grad_norm": 0.7475874633539165, "learning_rate": 1.9875531356514642e-06, "loss": 0.3308, "step": 1939 }, { "epoch": 0.5791044776119403, "grad_norm": 1.1995168376736667, "learning_rate": 1.9851873511069104e-06, "loss": 0.3112, "step": 1940 }, { "epoch": 0.5794029850746268, "grad_norm": 0.8706989388369915, "learning_rate": 1.9828220478996357e-06, "loss": 0.3703, "step": 1941 }, { "epoch": 0.5797014925373134, "grad_norm": 0.8260670136022402, "learning_rate": 1.980457228241139e-06, "loss": 0.317, "step": 1942 }, { "epoch": 0.58, "grad_norm": 0.7553465208304225, "learning_rate": 1.9780928943424703e-06, "loss": 0.3407, "step": 1943 }, { "epoch": 0.5802985074626865, "grad_norm": 0.9544720767792053, "learning_rate": 1.9757290484142244e-06, "loss": 0.3739, "step": 1944 }, { "epoch": 0.5805970149253732, "grad_norm": 0.8720188388239823, "learning_rate": 1.9733656926665388e-06, "loss": 0.3476, "step": 1945 }, { "epoch": 0.5808955223880597, "grad_norm": 0.8709469192102466, "learning_rate": 1.971002829309094e-06, "loss": 0.3701, "step": 1946 }, { "epoch": 0.5811940298507463, "grad_norm": 0.8604365263309919, "learning_rate": 1.968640460551109e-06, "loss": 0.3698, "step": 1947 }, { "epoch": 0.5814925373134329, "grad_norm": 0.7533527794831715, "learning_rate": 1.9662785886013417e-06, "loss": 0.3349, "step": 1948 }, { "epoch": 0.5817910447761194, "grad_norm": 0.8247273401204854, "learning_rate": 1.9639172156680848e-06, "loss": 0.3361, "step": 1949 }, { "epoch": 0.582089552238806, "grad_norm": 0.8120190282994632, "learning_rate": 1.961556343959164e-06, "loss": 0.2857, "step": 1950 }, { "epoch": 0.5823880597014925, "grad_norm": 0.862894378293909, "learning_rate": 1.9591959756819366e-06, "loss": 0.3927, "step": 1951 }, { "epoch": 0.5826865671641791, "grad_norm": 0.8218779267658843, "learning_rate": 1.9568361130432907e-06, "loss": 0.3287, "step": 1952 }, { "epoch": 0.5829850746268657, "grad_norm": 0.8143989499819814, "learning_rate": 1.954476758249639e-06, "loss": 0.3454, "step": 1953 }, { "epoch": 0.5832835820895522, "grad_norm": 0.8454839729330965, "learning_rate": 1.9521179135069213e-06, "loss": 0.3276, "step": 1954 }, { "epoch": 0.5835820895522388, "grad_norm": 0.750200670423591, "learning_rate": 1.9497595810206e-06, "loss": 0.3304, "step": 1955 }, { "epoch": 0.5838805970149253, "grad_norm": 0.8045091719653029, "learning_rate": 1.9474017629956576e-06, "loss": 0.3525, "step": 1956 }, { "epoch": 0.5841791044776119, "grad_norm": 0.904970348742468, "learning_rate": 1.9450444616365976e-06, "loss": 0.3555, "step": 1957 }, { "epoch": 0.5844776119402985, "grad_norm": 0.7892323857546758, "learning_rate": 1.9426876791474396e-06, "loss": 0.3307, "step": 1958 }, { "epoch": 0.5847761194029851, "grad_norm": 0.8188095376158899, "learning_rate": 1.940331417731716e-06, "loss": 0.3035, "step": 1959 }, { "epoch": 0.5850746268656717, "grad_norm": 0.7550954391161062, "learning_rate": 1.9379756795924757e-06, "loss": 0.309, "step": 1960 }, { "epoch": 0.5853731343283582, "grad_norm": 0.785376246418163, "learning_rate": 1.935620466932275e-06, "loss": 0.3116, "step": 1961 }, { "epoch": 0.5856716417910448, "grad_norm": 0.8351399359015248, "learning_rate": 1.933265781953181e-06, "loss": 0.3944, "step": 1962 }, { "epoch": 0.5859701492537314, "grad_norm": 0.8086095361360239, "learning_rate": 1.9309116268567675e-06, "loss": 0.3319, "step": 1963 }, { "epoch": 0.5862686567164179, "grad_norm": 0.953784526278762, "learning_rate": 1.9285580038441105e-06, "loss": 0.3373, "step": 1964 }, { "epoch": 0.5865671641791045, "grad_norm": 0.909438958477141, "learning_rate": 1.926204915115791e-06, "loss": 0.3498, "step": 1965 }, { "epoch": 0.586865671641791, "grad_norm": 0.8033996032006552, "learning_rate": 1.92385236287189e-06, "loss": 0.3342, "step": 1966 }, { "epoch": 0.5871641791044776, "grad_norm": 0.8135989857406488, "learning_rate": 1.921500349311986e-06, "loss": 0.3831, "step": 1967 }, { "epoch": 0.5874626865671642, "grad_norm": 0.850789249214276, "learning_rate": 1.919148876635154e-06, "loss": 0.2999, "step": 1968 }, { "epoch": 0.5877611940298507, "grad_norm": 0.7746152215096338, "learning_rate": 1.916797947039965e-06, "loss": 0.3327, "step": 1969 }, { "epoch": 0.5880597014925373, "grad_norm": 0.9288841282660558, "learning_rate": 1.914447562724479e-06, "loss": 0.3201, "step": 1970 }, { "epoch": 0.5883582089552238, "grad_norm": 0.7498990167094447, "learning_rate": 1.9120977258862493e-06, "loss": 0.3018, "step": 1971 }, { "epoch": 0.5886567164179104, "grad_norm": 0.7828793001916144, "learning_rate": 1.909748438722315e-06, "loss": 0.3376, "step": 1972 }, { "epoch": 0.5889552238805971, "grad_norm": 0.9745524576411368, "learning_rate": 1.9073997034292043e-06, "loss": 0.4012, "step": 1973 }, { "epoch": 0.5892537313432836, "grad_norm": 0.7597359048329623, "learning_rate": 1.905051522202926e-06, "loss": 0.2966, "step": 1974 }, { "epoch": 0.5895522388059702, "grad_norm": 0.7985139531152796, "learning_rate": 1.902703897238972e-06, "loss": 0.3545, "step": 1975 }, { "epoch": 0.5898507462686567, "grad_norm": 0.7828851032723473, "learning_rate": 1.9003568307323156e-06, "loss": 0.3402, "step": 1976 }, { "epoch": 0.5901492537313433, "grad_norm": 0.8844102544530947, "learning_rate": 1.898010324877406e-06, "loss": 0.3512, "step": 1977 }, { "epoch": 0.5904477611940299, "grad_norm": 1.2103129479742514, "learning_rate": 1.8956643818681685e-06, "loss": 0.3557, "step": 1978 }, { "epoch": 0.5907462686567164, "grad_norm": 0.8338907004205794, "learning_rate": 1.8933190038980044e-06, "loss": 0.2936, "step": 1979 }, { "epoch": 0.591044776119403, "grad_norm": 0.798234969689793, "learning_rate": 1.890974193159783e-06, "loss": 0.3208, "step": 1980 }, { "epoch": 0.5913432835820895, "grad_norm": 0.8265334530994278, "learning_rate": 1.888629951845846e-06, "loss": 0.3321, "step": 1981 }, { "epoch": 0.5916417910447761, "grad_norm": 0.8484611458052064, "learning_rate": 1.8862862821480023e-06, "loss": 0.3302, "step": 1982 }, { "epoch": 0.5919402985074627, "grad_norm": 0.7732292982110145, "learning_rate": 1.8839431862575252e-06, "loss": 0.3178, "step": 1983 }, { "epoch": 0.5922388059701492, "grad_norm": 0.7854227204822434, "learning_rate": 1.881600666365153e-06, "loss": 0.2743, "step": 1984 }, { "epoch": 0.5925373134328358, "grad_norm": 0.8822786181065627, "learning_rate": 1.8792587246610833e-06, "loss": 0.2931, "step": 1985 }, { "epoch": 0.5928358208955223, "grad_norm": 0.8413866496121205, "learning_rate": 1.8769173633349757e-06, "loss": 0.3055, "step": 1986 }, { "epoch": 0.593134328358209, "grad_norm": 0.8929662942782096, "learning_rate": 1.8745765845759466e-06, "loss": 0.3441, "step": 1987 }, { "epoch": 0.5934328358208956, "grad_norm": 0.8084666675397074, "learning_rate": 1.8722363905725654e-06, "loss": 0.3335, "step": 1988 }, { "epoch": 0.5937313432835821, "grad_norm": 0.8138764969858261, "learning_rate": 1.8698967835128572e-06, "loss": 0.3642, "step": 1989 }, { "epoch": 0.5940298507462687, "grad_norm": 0.7905280819462355, "learning_rate": 1.867557765584298e-06, "loss": 0.3279, "step": 1990 }, { "epoch": 0.5943283582089552, "grad_norm": 0.7863823629467317, "learning_rate": 1.8652193389738122e-06, "loss": 0.3364, "step": 1991 }, { "epoch": 0.5946268656716418, "grad_norm": 0.7267169725040129, "learning_rate": 1.8628815058677712e-06, "loss": 0.3171, "step": 1992 }, { "epoch": 0.5949253731343284, "grad_norm": 0.8524291293019064, "learning_rate": 1.8605442684519932e-06, "loss": 0.3593, "step": 1993 }, { "epoch": 0.5952238805970149, "grad_norm": 0.7776370242867892, "learning_rate": 1.8582076289117367e-06, "loss": 0.3241, "step": 1994 }, { "epoch": 0.5955223880597015, "grad_norm": 0.899435349098279, "learning_rate": 1.8558715894317033e-06, "loss": 0.3719, "step": 1995 }, { "epoch": 0.595820895522388, "grad_norm": 0.8848148638993476, "learning_rate": 1.8535361521960341e-06, "loss": 0.3868, "step": 1996 }, { "epoch": 0.5961194029850746, "grad_norm": 0.8154092596098508, "learning_rate": 1.8512013193883044e-06, "loss": 0.3443, "step": 1997 }, { "epoch": 0.5964179104477612, "grad_norm": 0.8146716902809428, "learning_rate": 1.8488670931915268e-06, "loss": 0.3366, "step": 1998 }, { "epoch": 0.5967164179104477, "grad_norm": 0.8576493081535792, "learning_rate": 1.8465334757881453e-06, "loss": 0.3671, "step": 1999 }, { "epoch": 0.5970149253731343, "grad_norm": 0.8128896259402698, "learning_rate": 1.8442004693600358e-06, "loss": 0.3748, "step": 2000 }, { "epoch": 0.597313432835821, "grad_norm": 0.7918103129525407, "learning_rate": 1.8418680760885028e-06, "loss": 0.3413, "step": 2001 }, { "epoch": 0.5976119402985075, "grad_norm": 0.8813005048512403, "learning_rate": 1.8395362981542757e-06, "loss": 0.3706, "step": 2002 }, { "epoch": 0.5979104477611941, "grad_norm": 0.823115743992545, "learning_rate": 1.8372051377375117e-06, "loss": 0.3721, "step": 2003 }, { "epoch": 0.5982089552238806, "grad_norm": 0.7894206010095408, "learning_rate": 1.8348745970177884e-06, "loss": 0.3229, "step": 2004 }, { "epoch": 0.5985074626865672, "grad_norm": 1.3615178436392212, "learning_rate": 1.8325446781741041e-06, "loss": 0.3459, "step": 2005 }, { "epoch": 0.5988059701492537, "grad_norm": 0.7843156247200797, "learning_rate": 1.8302153833848762e-06, "loss": 0.3221, "step": 2006 }, { "epoch": 0.5991044776119403, "grad_norm": 1.075019211892318, "learning_rate": 1.8278867148279395e-06, "loss": 0.3769, "step": 2007 }, { "epoch": 0.5994029850746269, "grad_norm": 0.8427937686071887, "learning_rate": 1.8255586746805412e-06, "loss": 0.3626, "step": 2008 }, { "epoch": 0.5997014925373134, "grad_norm": 1.0019280320812132, "learning_rate": 1.8232312651193418e-06, "loss": 0.319, "step": 2009 }, { "epoch": 0.6, "grad_norm": 0.752076030590557, "learning_rate": 1.8209044883204141e-06, "loss": 0.3165, "step": 2010 }, { "epoch": 0.6002985074626865, "grad_norm": 0.7911868281154993, "learning_rate": 1.8185783464592372e-06, "loss": 0.3463, "step": 2011 }, { "epoch": 0.6005970149253731, "grad_norm": 0.8613079162533939, "learning_rate": 1.8162528417106962e-06, "loss": 0.3539, "step": 2012 }, { "epoch": 0.6008955223880597, "grad_norm": 0.7566901309370632, "learning_rate": 1.8139279762490816e-06, "loss": 0.3156, "step": 2013 }, { "epoch": 0.6011940298507462, "grad_norm": 0.8761370837990474, "learning_rate": 1.8116037522480866e-06, "loss": 0.3873, "step": 2014 }, { "epoch": 0.6014925373134329, "grad_norm": 0.9028602770938112, "learning_rate": 1.8092801718808035e-06, "loss": 0.3325, "step": 2015 }, { "epoch": 0.6017910447761194, "grad_norm": 0.8144205633300299, "learning_rate": 1.8069572373197234e-06, "loss": 0.3361, "step": 2016 }, { "epoch": 0.602089552238806, "grad_norm": 0.8544221934437295, "learning_rate": 1.8046349507367336e-06, "loss": 0.3611, "step": 2017 }, { "epoch": 0.6023880597014926, "grad_norm": 0.808086145145757, "learning_rate": 1.802313314303115e-06, "loss": 0.3479, "step": 2018 }, { "epoch": 0.6026865671641791, "grad_norm": 0.9626316828813825, "learning_rate": 1.7999923301895419e-06, "loss": 0.357, "step": 2019 }, { "epoch": 0.6029850746268657, "grad_norm": 0.8065948651275312, "learning_rate": 1.797672000566077e-06, "loss": 0.3632, "step": 2020 }, { "epoch": 0.6032835820895522, "grad_norm": 0.8600310373365637, "learning_rate": 1.795352327602172e-06, "loss": 0.3467, "step": 2021 }, { "epoch": 0.6035820895522388, "grad_norm": 0.7840180123347607, "learning_rate": 1.7930333134666655e-06, "loss": 0.3145, "step": 2022 }, { "epoch": 0.6038805970149254, "grad_norm": 0.8760399951287269, "learning_rate": 1.790714960327778e-06, "loss": 0.3523, "step": 2023 }, { "epoch": 0.6041791044776119, "grad_norm": 0.9401970743207244, "learning_rate": 1.7883972703531138e-06, "loss": 0.3923, "step": 2024 }, { "epoch": 0.6044776119402985, "grad_norm": 0.7714575169408807, "learning_rate": 1.786080245709657e-06, "loss": 0.331, "step": 2025 }, { "epoch": 0.604776119402985, "grad_norm": 0.8115570578510135, "learning_rate": 1.7837638885637682e-06, "loss": 0.2821, "step": 2026 }, { "epoch": 0.6050746268656716, "grad_norm": 0.8360919692031952, "learning_rate": 1.7814482010811852e-06, "loss": 0.3322, "step": 2027 }, { "epoch": 0.6053731343283583, "grad_norm": 0.8034337429029215, "learning_rate": 1.77913318542702e-06, "loss": 0.3147, "step": 2028 }, { "epoch": 0.6056716417910448, "grad_norm": 0.7672266818780934, "learning_rate": 1.776818843765755e-06, "loss": 0.3489, "step": 2029 }, { "epoch": 0.6059701492537314, "grad_norm": 0.7896411247534145, "learning_rate": 1.7745051782612432e-06, "loss": 0.3341, "step": 2030 }, { "epoch": 0.6062686567164179, "grad_norm": 0.9298746237897938, "learning_rate": 1.7721921910767066e-06, "loss": 0.3759, "step": 2031 }, { "epoch": 0.6065671641791045, "grad_norm": 0.7832828837477588, "learning_rate": 1.76987988437473e-06, "loss": 0.3277, "step": 2032 }, { "epoch": 0.6068656716417911, "grad_norm": 0.8044375141975681, "learning_rate": 1.7675682603172656e-06, "loss": 0.3269, "step": 2033 }, { "epoch": 0.6071641791044776, "grad_norm": 0.8011482806477036, "learning_rate": 1.7652573210656242e-06, "loss": 0.3285, "step": 2034 }, { "epoch": 0.6074626865671642, "grad_norm": 0.8705610431211035, "learning_rate": 1.7629470687804783e-06, "loss": 0.3102, "step": 2035 }, { "epoch": 0.6077611940298507, "grad_norm": 0.8594206025341075, "learning_rate": 1.7606375056218578e-06, "loss": 0.3346, "step": 2036 }, { "epoch": 0.6080597014925373, "grad_norm": 0.8509811944236282, "learning_rate": 1.7583286337491472e-06, "loss": 0.327, "step": 2037 }, { "epoch": 0.6083582089552239, "grad_norm": 0.7545342074644574, "learning_rate": 1.7560204553210858e-06, "loss": 0.3468, "step": 2038 }, { "epoch": 0.6086567164179104, "grad_norm": 0.8580794849406649, "learning_rate": 1.7537129724957644e-06, "loss": 0.3539, "step": 2039 }, { "epoch": 0.608955223880597, "grad_norm": 0.9223321696220602, "learning_rate": 1.7514061874306226e-06, "loss": 0.3834, "step": 2040 }, { "epoch": 0.6092537313432835, "grad_norm": 0.7309630284623936, "learning_rate": 1.7491001022824483e-06, "loss": 0.3494, "step": 2041 }, { "epoch": 0.6095522388059702, "grad_norm": 0.9098414650575413, "learning_rate": 1.7467947192073759e-06, "loss": 0.3578, "step": 2042 }, { "epoch": 0.6098507462686568, "grad_norm": 0.8443943503187832, "learning_rate": 1.744490040360881e-06, "loss": 0.3264, "step": 2043 }, { "epoch": 0.6101492537313433, "grad_norm": 0.8020741133467957, "learning_rate": 1.7421860678977831e-06, "loss": 0.327, "step": 2044 }, { "epoch": 0.6104477611940299, "grad_norm": 0.8065034628311121, "learning_rate": 1.7398828039722403e-06, "loss": 0.333, "step": 2045 }, { "epoch": 0.6107462686567164, "grad_norm": 0.932836415200145, "learning_rate": 1.7375802507377475e-06, "loss": 0.3337, "step": 2046 }, { "epoch": 0.611044776119403, "grad_norm": 0.8386207975303591, "learning_rate": 1.7352784103471355e-06, "loss": 0.3384, "step": 2047 }, { "epoch": 0.6113432835820896, "grad_norm": 0.9979807581300122, "learning_rate": 1.7329772849525707e-06, "loss": 0.3463, "step": 2048 }, { "epoch": 0.6116417910447761, "grad_norm": 0.9144481465630234, "learning_rate": 1.7306768767055481e-06, "loss": 0.3171, "step": 2049 }, { "epoch": 0.6119402985074627, "grad_norm": 0.9179258800006835, "learning_rate": 1.7283771877568934e-06, "loss": 0.2897, "step": 2050 }, { "epoch": 0.6122388059701492, "grad_norm": 0.9088510671380466, "learning_rate": 1.7260782202567595e-06, "loss": 0.337, "step": 2051 }, { "epoch": 0.6125373134328358, "grad_norm": 0.7587527894434154, "learning_rate": 1.723779976354626e-06, "loss": 0.3155, "step": 2052 }, { "epoch": 0.6128358208955224, "grad_norm": 0.8273268196048457, "learning_rate": 1.721482458199294e-06, "loss": 0.3487, "step": 2053 }, { "epoch": 0.6131343283582089, "grad_norm": 0.875544314571328, "learning_rate": 1.7191856679388869e-06, "loss": 0.3419, "step": 2054 }, { "epoch": 0.6134328358208955, "grad_norm": 0.8395751763266083, "learning_rate": 1.7168896077208482e-06, "loss": 0.3419, "step": 2055 }, { "epoch": 0.6137313432835821, "grad_norm": 0.8125721253715453, "learning_rate": 1.714594279691938e-06, "loss": 0.3201, "step": 2056 }, { "epoch": 0.6140298507462687, "grad_norm": 0.91793798749558, "learning_rate": 1.7122996859982318e-06, "loss": 0.324, "step": 2057 }, { "epoch": 0.6143283582089553, "grad_norm": 0.8806848474353035, "learning_rate": 1.710005828785119e-06, "loss": 0.3682, "step": 2058 }, { "epoch": 0.6146268656716418, "grad_norm": 0.7958603606959694, "learning_rate": 1.7077127101973e-06, "loss": 0.3475, "step": 2059 }, { "epoch": 0.6149253731343284, "grad_norm": 0.8477880797334701, "learning_rate": 1.7054203323787854e-06, "loss": 0.3667, "step": 2060 }, { "epoch": 0.6152238805970149, "grad_norm": 0.8049693453434607, "learning_rate": 1.7031286974728916e-06, "loss": 0.3672, "step": 2061 }, { "epoch": 0.6155223880597015, "grad_norm": 0.8445392848976708, "learning_rate": 1.7008378076222417e-06, "loss": 0.3454, "step": 2062 }, { "epoch": 0.6158208955223881, "grad_norm": 0.8355246174084053, "learning_rate": 1.698547664968763e-06, "loss": 0.3214, "step": 2063 }, { "epoch": 0.6161194029850746, "grad_norm": 0.8464507961501435, "learning_rate": 1.6962582716536813e-06, "loss": 0.3299, "step": 2064 }, { "epoch": 0.6164179104477612, "grad_norm": 0.9279680601344305, "learning_rate": 1.6939696298175245e-06, "loss": 0.3676, "step": 2065 }, { "epoch": 0.6167164179104477, "grad_norm": 0.9394401389859991, "learning_rate": 1.6916817416001175e-06, "loss": 0.3252, "step": 2066 }, { "epoch": 0.6170149253731343, "grad_norm": 0.8255999859088444, "learning_rate": 1.6893946091405784e-06, "loss": 0.3492, "step": 2067 }, { "epoch": 0.6173134328358209, "grad_norm": 0.8393914123113455, "learning_rate": 1.6871082345773215e-06, "loss": 0.326, "step": 2068 }, { "epoch": 0.6176119402985074, "grad_norm": 0.7540269358482177, "learning_rate": 1.6848226200480514e-06, "loss": 0.3164, "step": 2069 }, { "epoch": 0.6179104477611941, "grad_norm": 0.8407057610257092, "learning_rate": 1.6825377676897608e-06, "loss": 0.3426, "step": 2070 }, { "epoch": 0.6182089552238806, "grad_norm": 0.9601578918299282, "learning_rate": 1.6802536796387328e-06, "loss": 0.4112, "step": 2071 }, { "epoch": 0.6185074626865672, "grad_norm": 0.8027639919807739, "learning_rate": 1.6779703580305323e-06, "loss": 0.3493, "step": 2072 }, { "epoch": 0.6188059701492538, "grad_norm": 0.8391959617782264, "learning_rate": 1.6756878050000098e-06, "loss": 0.3591, "step": 2073 }, { "epoch": 0.6191044776119403, "grad_norm": 0.7871628742456087, "learning_rate": 1.6734060226812976e-06, "loss": 0.3414, "step": 2074 }, { "epoch": 0.6194029850746269, "grad_norm": 0.8096322967159341, "learning_rate": 1.6711250132078055e-06, "loss": 0.3215, "step": 2075 }, { "epoch": 0.6197014925373134, "grad_norm": 0.8138284215248026, "learning_rate": 1.6688447787122226e-06, "loss": 0.3173, "step": 2076 }, { "epoch": 0.62, "grad_norm": 0.8329557552536543, "learning_rate": 1.666565321326512e-06, "loss": 0.301, "step": 2077 }, { "epoch": 0.6202985074626866, "grad_norm": 0.8459261392156018, "learning_rate": 1.6642866431819107e-06, "loss": 0.3869, "step": 2078 }, { "epoch": 0.6205970149253731, "grad_norm": 0.7415893886114104, "learning_rate": 1.6620087464089275e-06, "loss": 0.3512, "step": 2079 }, { "epoch": 0.6208955223880597, "grad_norm": 0.8509533039291013, "learning_rate": 1.659731633137341e-06, "loss": 0.356, "step": 2080 }, { "epoch": 0.6211940298507462, "grad_norm": 0.7465210952477122, "learning_rate": 1.657455305496195e-06, "loss": 0.3329, "step": 2081 }, { "epoch": 0.6214925373134328, "grad_norm": 0.863374891709103, "learning_rate": 1.6551797656138018e-06, "loss": 0.3296, "step": 2082 }, { "epoch": 0.6217910447761194, "grad_norm": 0.8457477256461274, "learning_rate": 1.6529050156177356e-06, "loss": 0.3386, "step": 2083 }, { "epoch": 0.622089552238806, "grad_norm": 0.8417932226084862, "learning_rate": 1.650631057634831e-06, "loss": 0.3294, "step": 2084 }, { "epoch": 0.6223880597014926, "grad_norm": 0.7940353343813964, "learning_rate": 1.6483578937911836e-06, "loss": 0.3573, "step": 2085 }, { "epoch": 0.6226865671641791, "grad_norm": 0.8984545759070848, "learning_rate": 1.6460855262121479e-06, "loss": 0.335, "step": 2086 }, { "epoch": 0.6229850746268657, "grad_norm": 0.9689021624736345, "learning_rate": 1.6438139570223311e-06, "loss": 0.3637, "step": 2087 }, { "epoch": 0.6232835820895523, "grad_norm": 0.7948610205026269, "learning_rate": 1.641543188345594e-06, "loss": 0.318, "step": 2088 }, { "epoch": 0.6235820895522388, "grad_norm": 0.8388231085500125, "learning_rate": 1.6392732223050515e-06, "loss": 0.3366, "step": 2089 }, { "epoch": 0.6238805970149254, "grad_norm": 0.9034038641733735, "learning_rate": 1.6370040610230662e-06, "loss": 0.3811, "step": 2090 }, { "epoch": 0.6241791044776119, "grad_norm": 0.8056850552409819, "learning_rate": 1.6347357066212478e-06, "loss": 0.3246, "step": 2091 }, { "epoch": 0.6244776119402985, "grad_norm": 0.8924171728248095, "learning_rate": 1.6324681612204527e-06, "loss": 0.3099, "step": 2092 }, { "epoch": 0.624776119402985, "grad_norm": 0.8889195100531483, "learning_rate": 1.6302014269407812e-06, "loss": 0.3916, "step": 2093 }, { "epoch": 0.6250746268656716, "grad_norm": 0.802744288589964, "learning_rate": 1.6279355059015739e-06, "loss": 0.3216, "step": 2094 }, { "epoch": 0.6253731343283582, "grad_norm": 0.8244538019031434, "learning_rate": 1.6256704002214124e-06, "loss": 0.3436, "step": 2095 }, { "epoch": 0.6256716417910447, "grad_norm": 0.8806302935844983, "learning_rate": 1.6234061120181144e-06, "loss": 0.3234, "step": 2096 }, { "epoch": 0.6259701492537313, "grad_norm": 0.7542539098364519, "learning_rate": 1.6211426434087347e-06, "loss": 0.3112, "step": 2097 }, { "epoch": 0.626268656716418, "grad_norm": 0.8779997865446432, "learning_rate": 1.6188799965095614e-06, "loss": 0.3213, "step": 2098 }, { "epoch": 0.6265671641791045, "grad_norm": 0.8512972841056398, "learning_rate": 1.6166181734361136e-06, "loss": 0.3806, "step": 2099 }, { "epoch": 0.6268656716417911, "grad_norm": 0.780603070362957, "learning_rate": 1.6143571763031413e-06, "loss": 0.3381, "step": 2100 }, { "epoch": 0.6271641791044776, "grad_norm": 0.8792815515680216, "learning_rate": 1.6120970072246222e-06, "loss": 0.3795, "step": 2101 }, { "epoch": 0.6274626865671642, "grad_norm": 0.8439949848861296, "learning_rate": 1.6098376683137582e-06, "loss": 0.2953, "step": 2102 }, { "epoch": 0.6277611940298508, "grad_norm": 0.9313525702028004, "learning_rate": 1.6075791616829764e-06, "loss": 0.3475, "step": 2103 }, { "epoch": 0.6280597014925373, "grad_norm": 0.7518808217696689, "learning_rate": 1.6053214894439262e-06, "loss": 0.3147, "step": 2104 }, { "epoch": 0.6283582089552239, "grad_norm": 0.7506483709151667, "learning_rate": 1.6030646537074751e-06, "loss": 0.3167, "step": 2105 }, { "epoch": 0.6286567164179104, "grad_norm": 0.8052425362278406, "learning_rate": 1.6008086565837106e-06, "loss": 0.2981, "step": 2106 }, { "epoch": 0.628955223880597, "grad_norm": 0.8210002191055628, "learning_rate": 1.5985535001819335e-06, "loss": 0.3501, "step": 2107 }, { "epoch": 0.6292537313432836, "grad_norm": 0.8941400826776075, "learning_rate": 1.596299186610661e-06, "loss": 0.3456, "step": 2108 }, { "epoch": 0.6295522388059701, "grad_norm": 1.0083217537161828, "learning_rate": 1.594045717977622e-06, "loss": 0.3841, "step": 2109 }, { "epoch": 0.6298507462686567, "grad_norm": 1.078514382968689, "learning_rate": 1.5917930963897534e-06, "loss": 0.3614, "step": 2110 }, { "epoch": 0.6301492537313432, "grad_norm": 0.7917547916905323, "learning_rate": 1.5895413239532022e-06, "loss": 0.3412, "step": 2111 }, { "epoch": 0.6304477611940299, "grad_norm": 0.7691216432708953, "learning_rate": 1.5872904027733211e-06, "loss": 0.3178, "step": 2112 }, { "epoch": 0.6307462686567165, "grad_norm": 0.77802800862977, "learning_rate": 1.5850403349546655e-06, "loss": 0.309, "step": 2113 }, { "epoch": 0.631044776119403, "grad_norm": 0.8255197315967899, "learning_rate": 1.5827911226009945e-06, "loss": 0.3138, "step": 2114 }, { "epoch": 0.6313432835820896, "grad_norm": 0.8386811603427105, "learning_rate": 1.5805427678152677e-06, "loss": 0.2891, "step": 2115 }, { "epoch": 0.6316417910447761, "grad_norm": 0.8153875110996863, "learning_rate": 1.5782952726996403e-06, "loss": 0.2995, "step": 2116 }, { "epoch": 0.6319402985074627, "grad_norm": 0.8409552206683609, "learning_rate": 1.5760486393554667e-06, "loss": 0.3386, "step": 2117 }, { "epoch": 0.6322388059701493, "grad_norm": 0.8250066653051528, "learning_rate": 1.5738028698832942e-06, "loss": 0.3827, "step": 2118 }, { "epoch": 0.6325373134328358, "grad_norm": 0.8610197982443525, "learning_rate": 1.5715579663828618e-06, "loss": 0.3365, "step": 2119 }, { "epoch": 0.6328358208955224, "grad_norm": 0.9413453392741797, "learning_rate": 1.5693139309531006e-06, "loss": 0.3692, "step": 2120 }, { "epoch": 0.6331343283582089, "grad_norm": 0.818537165590243, "learning_rate": 1.567070765692128e-06, "loss": 0.3237, "step": 2121 }, { "epoch": 0.6334328358208955, "grad_norm": 0.8210467581037414, "learning_rate": 1.5648284726972491e-06, "loss": 0.3422, "step": 2122 }, { "epoch": 0.633731343283582, "grad_norm": 0.8300643168242267, "learning_rate": 1.562587054064953e-06, "loss": 0.3112, "step": 2123 }, { "epoch": 0.6340298507462686, "grad_norm": 0.8063304103666424, "learning_rate": 1.5603465118909122e-06, "loss": 0.3277, "step": 2124 }, { "epoch": 0.6343283582089553, "grad_norm": 0.7841383009639938, "learning_rate": 1.5581068482699797e-06, "loss": 0.3132, "step": 2125 }, { "epoch": 0.6346268656716418, "grad_norm": 0.7663654715100157, "learning_rate": 1.5558680652961844e-06, "loss": 0.3549, "step": 2126 }, { "epoch": 0.6349253731343284, "grad_norm": 0.716788049651756, "learning_rate": 1.553630165062735e-06, "loss": 0.3293, "step": 2127 }, { "epoch": 0.635223880597015, "grad_norm": 0.8376097154978661, "learning_rate": 1.5513931496620138e-06, "loss": 0.348, "step": 2128 }, { "epoch": 0.6355223880597015, "grad_norm": 0.7844875450357711, "learning_rate": 1.549157021185575e-06, "loss": 0.3238, "step": 2129 }, { "epoch": 0.6358208955223881, "grad_norm": 0.7845014380845483, "learning_rate": 1.546921781724145e-06, "loss": 0.3272, "step": 2130 }, { "epoch": 0.6361194029850746, "grad_norm": 0.8084890497779735, "learning_rate": 1.544687433367617e-06, "loss": 0.3503, "step": 2131 }, { "epoch": 0.6364179104477612, "grad_norm": 1.155872428085641, "learning_rate": 1.5424539782050535e-06, "loss": 0.3463, "step": 2132 }, { "epoch": 0.6367164179104478, "grad_norm": 0.8237151552290364, "learning_rate": 1.5402214183246805e-06, "loss": 0.3751, "step": 2133 }, { "epoch": 0.6370149253731343, "grad_norm": 0.8146345074095411, "learning_rate": 1.5379897558138861e-06, "loss": 0.3444, "step": 2134 }, { "epoch": 0.6373134328358209, "grad_norm": 0.7508094621607114, "learning_rate": 1.5357589927592211e-06, "loss": 0.2862, "step": 2135 }, { "epoch": 0.6376119402985074, "grad_norm": 0.7803467497000177, "learning_rate": 1.5335291312463952e-06, "loss": 0.2941, "step": 2136 }, { "epoch": 0.637910447761194, "grad_norm": 0.7746140356066684, "learning_rate": 1.531300173360273e-06, "loss": 0.338, "step": 2137 }, { "epoch": 0.6382089552238805, "grad_norm": 0.7466760142466666, "learning_rate": 1.5290721211848767e-06, "loss": 0.2746, "step": 2138 }, { "epoch": 0.6385074626865672, "grad_norm": 0.8289320396750041, "learning_rate": 1.5268449768033811e-06, "loss": 0.3639, "step": 2139 }, { "epoch": 0.6388059701492538, "grad_norm": 0.7786271838202594, "learning_rate": 1.5246187422981113e-06, "loss": 0.3195, "step": 2140 }, { "epoch": 0.6391044776119403, "grad_norm": 0.7961531887157887, "learning_rate": 1.522393419750542e-06, "loss": 0.3472, "step": 2141 }, { "epoch": 0.6394029850746269, "grad_norm": 0.7553655687048026, "learning_rate": 1.520169011241297e-06, "loss": 0.3181, "step": 2142 }, { "epoch": 0.6397014925373135, "grad_norm": 0.8791350108013017, "learning_rate": 1.5179455188501425e-06, "loss": 0.3573, "step": 2143 }, { "epoch": 0.64, "grad_norm": 0.791573138903334, "learning_rate": 1.5157229446559903e-06, "loss": 0.3146, "step": 2144 }, { "epoch": 0.6402985074626866, "grad_norm": 0.7982451287466451, "learning_rate": 1.5135012907368928e-06, "loss": 0.3238, "step": 2145 }, { "epoch": 0.6405970149253731, "grad_norm": 0.9547736379566139, "learning_rate": 1.5112805591700426e-06, "loss": 0.3464, "step": 2146 }, { "epoch": 0.6408955223880597, "grad_norm": 0.9389426478984015, "learning_rate": 1.50906075203177e-06, "loss": 0.3831, "step": 2147 }, { "epoch": 0.6411940298507462, "grad_norm": 0.7559187426695012, "learning_rate": 1.5068418713975397e-06, "loss": 0.2961, "step": 2148 }, { "epoch": 0.6414925373134328, "grad_norm": 0.8009593103452896, "learning_rate": 1.5046239193419514e-06, "loss": 0.3345, "step": 2149 }, { "epoch": 0.6417910447761194, "grad_norm": 0.8808029291340134, "learning_rate": 1.5024068979387365e-06, "loss": 0.3622, "step": 2150 }, { "epoch": 0.6420895522388059, "grad_norm": 0.8048602418552092, "learning_rate": 1.5001908092607553e-06, "loss": 0.3272, "step": 2151 }, { "epoch": 0.6423880597014925, "grad_norm": 0.886841416239724, "learning_rate": 1.497975655379997e-06, "loss": 0.3637, "step": 2152 }, { "epoch": 0.6426865671641792, "grad_norm": 0.8501760322071722, "learning_rate": 1.495761438367577e-06, "loss": 0.3315, "step": 2153 }, { "epoch": 0.6429850746268657, "grad_norm": 0.8499429690154281, "learning_rate": 1.4935481602937334e-06, "loss": 0.3519, "step": 2154 }, { "epoch": 0.6432835820895523, "grad_norm": 0.8164800240232427, "learning_rate": 1.4913358232278274e-06, "loss": 0.327, "step": 2155 }, { "epoch": 0.6435820895522388, "grad_norm": 0.8053874302696963, "learning_rate": 1.4891244292383412e-06, "loss": 0.3362, "step": 2156 }, { "epoch": 0.6438805970149254, "grad_norm": 0.7798998762739349, "learning_rate": 1.4869139803928727e-06, "loss": 0.3538, "step": 2157 }, { "epoch": 0.644179104477612, "grad_norm": 0.925044008666226, "learning_rate": 1.4847044787581395e-06, "loss": 0.3266, "step": 2158 }, { "epoch": 0.6444776119402985, "grad_norm": 0.8652309852841852, "learning_rate": 1.48249592639997e-06, "loss": 0.3687, "step": 2159 }, { "epoch": 0.6447761194029851, "grad_norm": 0.7856735870745902, "learning_rate": 1.4802883253833073e-06, "loss": 0.2956, "step": 2160 }, { "epoch": 0.6450746268656716, "grad_norm": 0.7855897694933878, "learning_rate": 1.4780816777722052e-06, "loss": 0.3162, "step": 2161 }, { "epoch": 0.6453731343283582, "grad_norm": 0.808546264261996, "learning_rate": 1.4758759856298248e-06, "loss": 0.3095, "step": 2162 }, { "epoch": 0.6456716417910447, "grad_norm": 0.8076290541474285, "learning_rate": 1.4736712510184359e-06, "loss": 0.3276, "step": 2163 }, { "epoch": 0.6459701492537313, "grad_norm": 0.7968186141075612, "learning_rate": 1.4714674759994096e-06, "loss": 0.2801, "step": 2164 }, { "epoch": 0.6462686567164179, "grad_norm": 0.7847934498100252, "learning_rate": 1.4692646626332228e-06, "loss": 0.3368, "step": 2165 }, { "epoch": 0.6465671641791044, "grad_norm": 0.9749102135271982, "learning_rate": 1.4670628129794527e-06, "loss": 0.3638, "step": 2166 }, { "epoch": 0.6468656716417911, "grad_norm": 0.7325101572568912, "learning_rate": 1.4648619290967742e-06, "loss": 0.3243, "step": 2167 }, { "epoch": 0.6471641791044777, "grad_norm": 0.9104372675901333, "learning_rate": 1.4626620130429608e-06, "loss": 0.3549, "step": 2168 }, { "epoch": 0.6474626865671642, "grad_norm": 0.7969438524329776, "learning_rate": 1.4604630668748795e-06, "loss": 0.3465, "step": 2169 }, { "epoch": 0.6477611940298508, "grad_norm": 0.8828900184898577, "learning_rate": 1.4582650926484921e-06, "loss": 0.3556, "step": 2170 }, { "epoch": 0.6480597014925373, "grad_norm": 0.798311888333371, "learning_rate": 1.4560680924188508e-06, "loss": 0.3184, "step": 2171 }, { "epoch": 0.6483582089552239, "grad_norm": 0.860792245744944, "learning_rate": 1.453872068240097e-06, "loss": 0.3363, "step": 2172 }, { "epoch": 0.6486567164179105, "grad_norm": 0.7649776213408986, "learning_rate": 1.4516770221654614e-06, "loss": 0.3355, "step": 2173 }, { "epoch": 0.648955223880597, "grad_norm": 0.7942691044713359, "learning_rate": 1.4494829562472558e-06, "loss": 0.3341, "step": 2174 }, { "epoch": 0.6492537313432836, "grad_norm": 0.8621884015676841, "learning_rate": 1.4472898725368795e-06, "loss": 0.3343, "step": 2175 }, { "epoch": 0.6495522388059701, "grad_norm": 0.7924705469899432, "learning_rate": 1.4450977730848132e-06, "loss": 0.3275, "step": 2176 }, { "epoch": 0.6498507462686567, "grad_norm": 1.2686960712118278, "learning_rate": 1.4429066599406152e-06, "loss": 0.3576, "step": 2177 }, { "epoch": 0.6501492537313432, "grad_norm": 0.7074769773056945, "learning_rate": 1.4407165351529246e-06, "loss": 0.2896, "step": 2178 }, { "epoch": 0.6504477611940298, "grad_norm": 0.8333612223546273, "learning_rate": 1.4385274007694527e-06, "loss": 0.3181, "step": 2179 }, { "epoch": 0.6507462686567164, "grad_norm": 0.8657347511763104, "learning_rate": 1.4363392588369876e-06, "loss": 0.3312, "step": 2180 }, { "epoch": 0.651044776119403, "grad_norm": 0.7696448268786413, "learning_rate": 1.4341521114013888e-06, "loss": 0.3079, "step": 2181 }, { "epoch": 0.6513432835820896, "grad_norm": 0.8733955547423778, "learning_rate": 1.4319659605075855e-06, "loss": 0.3489, "step": 2182 }, { "epoch": 0.6516417910447762, "grad_norm": 1.079444796750812, "learning_rate": 1.429780808199576e-06, "loss": 0.3833, "step": 2183 }, { "epoch": 0.6519402985074627, "grad_norm": 0.7722744529314012, "learning_rate": 1.4275966565204251e-06, "loss": 0.3301, "step": 2184 }, { "epoch": 0.6522388059701493, "grad_norm": 0.8366589681087087, "learning_rate": 1.4254135075122596e-06, "loss": 0.2948, "step": 2185 }, { "epoch": 0.6525373134328358, "grad_norm": 0.8263300107210038, "learning_rate": 1.4232313632162714e-06, "loss": 0.3113, "step": 2186 }, { "epoch": 0.6528358208955224, "grad_norm": 0.7782108665859974, "learning_rate": 1.421050225672712e-06, "loss": 0.3098, "step": 2187 }, { "epoch": 0.653134328358209, "grad_norm": 0.9032704619470634, "learning_rate": 1.418870096920893e-06, "loss": 0.3312, "step": 2188 }, { "epoch": 0.6534328358208955, "grad_norm": 0.7838544001315462, "learning_rate": 1.4166909789991813e-06, "loss": 0.3091, "step": 2189 }, { "epoch": 0.6537313432835821, "grad_norm": 0.8249256923116727, "learning_rate": 1.4145128739449974e-06, "loss": 0.3657, "step": 2190 }, { "epoch": 0.6540298507462686, "grad_norm": 0.7937594433877714, "learning_rate": 1.4123357837948177e-06, "loss": 0.3459, "step": 2191 }, { "epoch": 0.6543283582089552, "grad_norm": 1.0209366045959114, "learning_rate": 1.410159710584168e-06, "loss": 0.3305, "step": 2192 }, { "epoch": 0.6546268656716417, "grad_norm": 0.8416577328440887, "learning_rate": 1.4079846563476246e-06, "loss": 0.3428, "step": 2193 }, { "epoch": 0.6549253731343283, "grad_norm": 0.8381158795554763, "learning_rate": 1.4058106231188093e-06, "loss": 0.3278, "step": 2194 }, { "epoch": 0.655223880597015, "grad_norm": 0.7994100548783899, "learning_rate": 1.4036376129303914e-06, "loss": 0.3187, "step": 2195 }, { "epoch": 0.6555223880597015, "grad_norm": 0.8441055293209245, "learning_rate": 1.4014656278140806e-06, "loss": 0.3585, "step": 2196 }, { "epoch": 0.6558208955223881, "grad_norm": 0.8224053402613939, "learning_rate": 1.399294669800631e-06, "loss": 0.3106, "step": 2197 }, { "epoch": 0.6561194029850747, "grad_norm": 0.893366800253749, "learning_rate": 1.3971247409198347e-06, "loss": 0.3897, "step": 2198 }, { "epoch": 0.6564179104477612, "grad_norm": 0.8805627796048343, "learning_rate": 1.3949558432005245e-06, "loss": 0.347, "step": 2199 }, { "epoch": 0.6567164179104478, "grad_norm": 0.764024546070756, "learning_rate": 1.3927879786705645e-06, "loss": 0.3213, "step": 2200 }, { "epoch": 0.6570149253731343, "grad_norm": 0.722603696748916, "learning_rate": 1.3906211493568563e-06, "loss": 0.3137, "step": 2201 }, { "epoch": 0.6573134328358209, "grad_norm": 0.9269224842693098, "learning_rate": 1.3884553572853324e-06, "loss": 0.3678, "step": 2202 }, { "epoch": 0.6576119402985074, "grad_norm": 0.7631786113345267, "learning_rate": 1.3862906044809554e-06, "loss": 0.314, "step": 2203 }, { "epoch": 0.657910447761194, "grad_norm": 0.7773514605842012, "learning_rate": 1.3841268929677165e-06, "loss": 0.3133, "step": 2204 }, { "epoch": 0.6582089552238806, "grad_norm": 0.7946457192152244, "learning_rate": 1.381964224768634e-06, "loss": 0.3181, "step": 2205 }, { "epoch": 0.6585074626865671, "grad_norm": 0.8529990432133921, "learning_rate": 1.3798026019057483e-06, "loss": 0.3426, "step": 2206 }, { "epoch": 0.6588059701492537, "grad_norm": 0.9216515652907142, "learning_rate": 1.377642026400125e-06, "loss": 0.3643, "step": 2207 }, { "epoch": 0.6591044776119404, "grad_norm": 0.832800795535474, "learning_rate": 1.3754825002718498e-06, "loss": 0.3629, "step": 2208 }, { "epoch": 0.6594029850746269, "grad_norm": 0.990654044032596, "learning_rate": 1.3733240255400263e-06, "loss": 0.3506, "step": 2209 }, { "epoch": 0.6597014925373135, "grad_norm": 0.6854970183511663, "learning_rate": 1.3711666042227772e-06, "loss": 0.3319, "step": 2210 }, { "epoch": 0.66, "grad_norm": 0.8428108856258283, "learning_rate": 1.3690102383372369e-06, "loss": 0.2742, "step": 2211 }, { "epoch": 0.6602985074626866, "grad_norm": 0.8741591762317693, "learning_rate": 1.3668549298995558e-06, "loss": 0.3687, "step": 2212 }, { "epoch": 0.6605970149253731, "grad_norm": 0.7955836464506277, "learning_rate": 1.3647006809248947e-06, "loss": 0.3032, "step": 2213 }, { "epoch": 0.6608955223880597, "grad_norm": 0.8711695616206405, "learning_rate": 1.3625474934274241e-06, "loss": 0.381, "step": 2214 }, { "epoch": 0.6611940298507463, "grad_norm": 0.8401790125361631, "learning_rate": 1.360395369420322e-06, "loss": 0.3229, "step": 2215 }, { "epoch": 0.6614925373134328, "grad_norm": 0.8676846365068158, "learning_rate": 1.3582443109157722e-06, "loss": 0.3615, "step": 2216 }, { "epoch": 0.6617910447761194, "grad_norm": 0.7929648763121615, "learning_rate": 1.3560943199249605e-06, "loss": 0.3588, "step": 2217 }, { "epoch": 0.662089552238806, "grad_norm": 0.9511947801173409, "learning_rate": 1.3539453984580767e-06, "loss": 0.3105, "step": 2218 }, { "epoch": 0.6623880597014925, "grad_norm": 0.7404356758215146, "learning_rate": 1.3517975485243103e-06, "loss": 0.2941, "step": 2219 }, { "epoch": 0.6626865671641791, "grad_norm": 1.0227800450584272, "learning_rate": 1.3496507721318486e-06, "loss": 0.3296, "step": 2220 }, { "epoch": 0.6629850746268656, "grad_norm": 0.7690958302654547, "learning_rate": 1.3475050712878755e-06, "loss": 0.321, "step": 2221 }, { "epoch": 0.6632835820895523, "grad_norm": 0.8740712288587946, "learning_rate": 1.345360447998569e-06, "loss": 0.3596, "step": 2222 }, { "epoch": 0.6635820895522389, "grad_norm": 0.8474627076368212, "learning_rate": 1.3432169042690988e-06, "loss": 0.3634, "step": 2223 }, { "epoch": 0.6638805970149254, "grad_norm": 0.9253686814199646, "learning_rate": 1.3410744421036262e-06, "loss": 0.2892, "step": 2224 }, { "epoch": 0.664179104477612, "grad_norm": 0.8348049414436733, "learning_rate": 1.3389330635053013e-06, "loss": 0.3515, "step": 2225 }, { "epoch": 0.6644776119402985, "grad_norm": 0.822846204285828, "learning_rate": 1.3367927704762613e-06, "loss": 0.3425, "step": 2226 }, { "epoch": 0.6647761194029851, "grad_norm": 0.8678200777615288, "learning_rate": 1.3346535650176284e-06, "loss": 0.3978, "step": 2227 }, { "epoch": 0.6650746268656716, "grad_norm": 0.7358960864836035, "learning_rate": 1.3325154491295062e-06, "loss": 0.2916, "step": 2228 }, { "epoch": 0.6653731343283582, "grad_norm": 0.7423135388554165, "learning_rate": 1.330378424810981e-06, "loss": 0.3068, "step": 2229 }, { "epoch": 0.6656716417910448, "grad_norm": 0.8230255273207777, "learning_rate": 1.3282424940601197e-06, "loss": 0.355, "step": 2230 }, { "epoch": 0.6659701492537313, "grad_norm": 0.8023837250650214, "learning_rate": 1.326107658873964e-06, "loss": 0.2787, "step": 2231 }, { "epoch": 0.6662686567164179, "grad_norm": 0.7899498370106252, "learning_rate": 1.3239739212485342e-06, "loss": 0.2977, "step": 2232 }, { "epoch": 0.6665671641791044, "grad_norm": 0.9616260736960074, "learning_rate": 1.3218412831788232e-06, "loss": 0.3403, "step": 2233 }, { "epoch": 0.666865671641791, "grad_norm": 0.9674895085633961, "learning_rate": 1.3197097466587939e-06, "loss": 0.3681, "step": 2234 }, { "epoch": 0.6671641791044776, "grad_norm": 0.808185104408685, "learning_rate": 1.317579313681382e-06, "loss": 0.3109, "step": 2235 }, { "epoch": 0.6674626865671642, "grad_norm": 0.8737274124495528, "learning_rate": 1.315449986238489e-06, "loss": 0.3471, "step": 2236 }, { "epoch": 0.6677611940298508, "grad_norm": 0.8084788833507394, "learning_rate": 1.3133217663209873e-06, "loss": 0.3272, "step": 2237 }, { "epoch": 0.6680597014925374, "grad_norm": 0.8888189135315439, "learning_rate": 1.3111946559187078e-06, "loss": 0.359, "step": 2238 }, { "epoch": 0.6683582089552239, "grad_norm": 0.8035833032123232, "learning_rate": 1.309068657020448e-06, "loss": 0.3285, "step": 2239 }, { "epoch": 0.6686567164179105, "grad_norm": 0.9254549420427741, "learning_rate": 1.3069437716139648e-06, "loss": 0.377, "step": 2240 }, { "epoch": 0.668955223880597, "grad_norm": 0.768533993055729, "learning_rate": 1.3048200016859743e-06, "loss": 0.3516, "step": 2241 }, { "epoch": 0.6692537313432836, "grad_norm": 0.7917607581116863, "learning_rate": 1.3026973492221501e-06, "loss": 0.3074, "step": 2242 }, { "epoch": 0.6695522388059701, "grad_norm": 0.8087854690730707, "learning_rate": 1.3005758162071206e-06, "loss": 0.3164, "step": 2243 }, { "epoch": 0.6698507462686567, "grad_norm": 0.8650182535397217, "learning_rate": 1.298455404624466e-06, "loss": 0.3296, "step": 2244 }, { "epoch": 0.6701492537313433, "grad_norm": 0.7997154190905688, "learning_rate": 1.2963361164567207e-06, "loss": 0.3546, "step": 2245 }, { "epoch": 0.6704477611940298, "grad_norm": 0.9966416873938866, "learning_rate": 1.2942179536853666e-06, "loss": 0.3685, "step": 2246 }, { "epoch": 0.6707462686567164, "grad_norm": 0.8148532817098773, "learning_rate": 1.2921009182908351e-06, "loss": 0.3242, "step": 2247 }, { "epoch": 0.6710447761194029, "grad_norm": 0.75826379276025, "learning_rate": 1.2899850122525037e-06, "loss": 0.2655, "step": 2248 }, { "epoch": 0.6713432835820895, "grad_norm": 0.9350771859645716, "learning_rate": 1.2878702375486905e-06, "loss": 0.3477, "step": 2249 }, { "epoch": 0.6716417910447762, "grad_norm": 0.7895404077047398, "learning_rate": 1.2857565961566593e-06, "loss": 0.3619, "step": 2250 }, { "epoch": 0.6719402985074627, "grad_norm": 0.956687016013122, "learning_rate": 1.2836440900526136e-06, "loss": 0.3528, "step": 2251 }, { "epoch": 0.6722388059701493, "grad_norm": 0.8027731716484646, "learning_rate": 1.281532721211695e-06, "loss": 0.3426, "step": 2252 }, { "epoch": 0.6725373134328358, "grad_norm": 0.8683386280279184, "learning_rate": 1.2794224916079818e-06, "loss": 0.3624, "step": 2253 }, { "epoch": 0.6728358208955224, "grad_norm": 0.8021378966828665, "learning_rate": 1.2773134032144885e-06, "loss": 0.3344, "step": 2254 }, { "epoch": 0.673134328358209, "grad_norm": 0.822448080875513, "learning_rate": 1.2752054580031592e-06, "loss": 0.3012, "step": 2255 }, { "epoch": 0.6734328358208955, "grad_norm": 0.8066506836401879, "learning_rate": 1.2730986579448719e-06, "loss": 0.3137, "step": 2256 }, { "epoch": 0.6737313432835821, "grad_norm": 0.7452823368578445, "learning_rate": 1.2709930050094337e-06, "loss": 0.3389, "step": 2257 }, { "epoch": 0.6740298507462686, "grad_norm": 0.7510246741662416, "learning_rate": 1.268888501165579e-06, "loss": 0.2977, "step": 2258 }, { "epoch": 0.6743283582089552, "grad_norm": 0.8248092475825504, "learning_rate": 1.2667851483809673e-06, "loss": 0.3085, "step": 2259 }, { "epoch": 0.6746268656716418, "grad_norm": 0.8232298732269487, "learning_rate": 1.264682948622183e-06, "loss": 0.3586, "step": 2260 }, { "epoch": 0.6749253731343283, "grad_norm": 0.87600455902864, "learning_rate": 1.2625819038547302e-06, "loss": 0.3549, "step": 2261 }, { "epoch": 0.6752238805970149, "grad_norm": 0.8569153532990559, "learning_rate": 1.2604820160430348e-06, "loss": 0.3223, "step": 2262 }, { "epoch": 0.6755223880597014, "grad_norm": 0.8313060298524098, "learning_rate": 1.2583832871504415e-06, "loss": 0.3437, "step": 2263 }, { "epoch": 0.6758208955223881, "grad_norm": 0.8731999146655798, "learning_rate": 1.25628571913921e-06, "loss": 0.3928, "step": 2264 }, { "epoch": 0.6761194029850747, "grad_norm": 0.830291452853221, "learning_rate": 1.2541893139705162e-06, "loss": 0.3391, "step": 2265 }, { "epoch": 0.6764179104477612, "grad_norm": 0.7748207330310743, "learning_rate": 1.2520940736044468e-06, "loss": 0.3191, "step": 2266 }, { "epoch": 0.6767164179104478, "grad_norm": 0.7873823715976541, "learning_rate": 1.2500000000000007e-06, "loss": 0.3354, "step": 2267 }, { "epoch": 0.6770149253731343, "grad_norm": 0.8238073008584107, "learning_rate": 1.2479070951150857e-06, "loss": 0.3722, "step": 2268 }, { "epoch": 0.6773134328358209, "grad_norm": 0.7684716974580467, "learning_rate": 1.245815360906517e-06, "loss": 0.3427, "step": 2269 }, { "epoch": 0.6776119402985075, "grad_norm": 0.9061590212050163, "learning_rate": 1.2437247993300147e-06, "loss": 0.3099, "step": 2270 }, { "epoch": 0.677910447761194, "grad_norm": 0.8775458433192322, "learning_rate": 1.2416354123402047e-06, "loss": 0.3393, "step": 2271 }, { "epoch": 0.6782089552238806, "grad_norm": 0.8109025654110851, "learning_rate": 1.2395472018906102e-06, "loss": 0.3608, "step": 2272 }, { "epoch": 0.6785074626865671, "grad_norm": 0.8511617196960201, "learning_rate": 1.2374601699336586e-06, "loss": 0.3599, "step": 2273 }, { "epoch": 0.6788059701492537, "grad_norm": 1.2567441350562456, "learning_rate": 1.2353743184206724e-06, "loss": 0.3313, "step": 2274 }, { "epoch": 0.6791044776119403, "grad_norm": 0.7862840705237462, "learning_rate": 1.2332896493018753e-06, "loss": 0.323, "step": 2275 }, { "epoch": 0.6794029850746268, "grad_norm": 0.7520760436868094, "learning_rate": 1.231206164526379e-06, "loss": 0.3234, "step": 2276 }, { "epoch": 0.6797014925373134, "grad_norm": 0.7862063406522266, "learning_rate": 1.229123866042192e-06, "loss": 0.3108, "step": 2277 }, { "epoch": 0.68, "grad_norm": 0.7931130898996858, "learning_rate": 1.2270427557962125e-06, "loss": 0.2967, "step": 2278 }, { "epoch": 0.6802985074626866, "grad_norm": 0.8831726582888332, "learning_rate": 1.2249628357342284e-06, "loss": 0.3366, "step": 2279 }, { "epoch": 0.6805970149253732, "grad_norm": 0.8362589161672496, "learning_rate": 1.2228841078009133e-06, "loss": 0.32, "step": 2280 }, { "epoch": 0.6808955223880597, "grad_norm": 0.7825482021832821, "learning_rate": 1.2208065739398286e-06, "loss": 0.3236, "step": 2281 }, { "epoch": 0.6811940298507463, "grad_norm": 0.7726512675817263, "learning_rate": 1.2187302360934162e-06, "loss": 0.3659, "step": 2282 }, { "epoch": 0.6814925373134328, "grad_norm": 0.8087551514263144, "learning_rate": 1.216655096203002e-06, "loss": 0.3497, "step": 2283 }, { "epoch": 0.6817910447761194, "grad_norm": 0.8321435088465757, "learning_rate": 1.214581156208791e-06, "loss": 0.3367, "step": 2284 }, { "epoch": 0.682089552238806, "grad_norm": 0.8973773653830277, "learning_rate": 1.2125084180498672e-06, "loss": 0.3681, "step": 2285 }, { "epoch": 0.6823880597014925, "grad_norm": 0.7645793362757674, "learning_rate": 1.2104368836641909e-06, "loss": 0.3385, "step": 2286 }, { "epoch": 0.6826865671641791, "grad_norm": 0.7860470543681153, "learning_rate": 1.208366554988594e-06, "loss": 0.3102, "step": 2287 }, { "epoch": 0.6829850746268656, "grad_norm": 0.7711760963359957, "learning_rate": 1.2062974339587844e-06, "loss": 0.3456, "step": 2288 }, { "epoch": 0.6832835820895522, "grad_norm": 0.8535820450749667, "learning_rate": 1.2042295225093404e-06, "loss": 0.3325, "step": 2289 }, { "epoch": 0.6835820895522388, "grad_norm": 0.849860239170701, "learning_rate": 1.202162822573708e-06, "loss": 0.3324, "step": 2290 }, { "epoch": 0.6838805970149253, "grad_norm": 1.3404957810983344, "learning_rate": 1.2000973360842019e-06, "loss": 0.3833, "step": 2291 }, { "epoch": 0.684179104477612, "grad_norm": 0.819656752872545, "learning_rate": 1.1980330649720024e-06, "loss": 0.3346, "step": 2292 }, { "epoch": 0.6844776119402985, "grad_norm": 0.9829794779330354, "learning_rate": 1.1959700111671508e-06, "loss": 0.3462, "step": 2293 }, { "epoch": 0.6847761194029851, "grad_norm": 0.7859290591207186, "learning_rate": 1.1939081765985528e-06, "loss": 0.291, "step": 2294 }, { "epoch": 0.6850746268656717, "grad_norm": 0.810647132569844, "learning_rate": 1.1918475631939735e-06, "loss": 0.3525, "step": 2295 }, { "epoch": 0.6853731343283582, "grad_norm": 0.8879767152876183, "learning_rate": 1.1897881728800364e-06, "loss": 0.3398, "step": 2296 }, { "epoch": 0.6856716417910448, "grad_norm": 0.8081639689735962, "learning_rate": 1.1877300075822223e-06, "loss": 0.3221, "step": 2297 }, { "epoch": 0.6859701492537313, "grad_norm": 0.8175265918913039, "learning_rate": 1.1856730692248635e-06, "loss": 0.3699, "step": 2298 }, { "epoch": 0.6862686567164179, "grad_norm": 0.7821789581867407, "learning_rate": 1.1836173597311484e-06, "loss": 0.316, "step": 2299 }, { "epoch": 0.6865671641791045, "grad_norm": 0.7996767148379933, "learning_rate": 1.1815628810231147e-06, "loss": 0.3795, "step": 2300 }, { "epoch": 0.686865671641791, "grad_norm": 0.8382268679790396, "learning_rate": 1.1795096350216506e-06, "loss": 0.3339, "step": 2301 }, { "epoch": 0.6871641791044776, "grad_norm": 0.8667351459692307, "learning_rate": 1.1774576236464906e-06, "loss": 0.3781, "step": 2302 }, { "epoch": 0.6874626865671641, "grad_norm": 0.7891693977056329, "learning_rate": 1.1754068488162165e-06, "loss": 0.2936, "step": 2303 }, { "epoch": 0.6877611940298507, "grad_norm": 0.7802694773636687, "learning_rate": 1.1733573124482505e-06, "loss": 0.3155, "step": 2304 }, { "epoch": 0.6880597014925374, "grad_norm": 0.7825736393246602, "learning_rate": 1.1713090164588608e-06, "loss": 0.3262, "step": 2305 }, { "epoch": 0.6883582089552239, "grad_norm": 0.7824674762625962, "learning_rate": 1.1692619627631539e-06, "loss": 0.3152, "step": 2306 }, { "epoch": 0.6886567164179105, "grad_norm": 0.7859681853056969, "learning_rate": 1.1672161532750749e-06, "loss": 0.2974, "step": 2307 }, { "epoch": 0.688955223880597, "grad_norm": 0.7718704768344236, "learning_rate": 1.1651715899074057e-06, "loss": 0.3303, "step": 2308 }, { "epoch": 0.6892537313432836, "grad_norm": 1.0009242400338725, "learning_rate": 1.1631282745717646e-06, "loss": 0.3184, "step": 2309 }, { "epoch": 0.6895522388059702, "grad_norm": 0.7836521432415025, "learning_rate": 1.1610862091785993e-06, "loss": 0.3612, "step": 2310 }, { "epoch": 0.6898507462686567, "grad_norm": 0.8471632552947357, "learning_rate": 1.1590453956371925e-06, "loss": 0.2925, "step": 2311 }, { "epoch": 0.6901492537313433, "grad_norm": 0.7698745533567938, "learning_rate": 1.157005835855654e-06, "loss": 0.3735, "step": 2312 }, { "epoch": 0.6904477611940298, "grad_norm": 0.7805344892196837, "learning_rate": 1.1549675317409254e-06, "loss": 0.3117, "step": 2313 }, { "epoch": 0.6907462686567164, "grad_norm": 0.8027470646073804, "learning_rate": 1.1529304851987685e-06, "loss": 0.3219, "step": 2314 }, { "epoch": 0.691044776119403, "grad_norm": 0.8673613890056963, "learning_rate": 1.1508946981337729e-06, "loss": 0.3615, "step": 2315 }, { "epoch": 0.6913432835820895, "grad_norm": 0.7571748507097132, "learning_rate": 1.1488601724493504e-06, "loss": 0.3159, "step": 2316 }, { "epoch": 0.6916417910447761, "grad_norm": 0.8645844258196447, "learning_rate": 1.1468269100477322e-06, "loss": 0.3551, "step": 2317 }, { "epoch": 0.6919402985074626, "grad_norm": 0.7659538316934659, "learning_rate": 1.1447949128299695e-06, "loss": 0.34, "step": 2318 }, { "epoch": 0.6922388059701493, "grad_norm": 0.9851936264003477, "learning_rate": 1.1427641826959313e-06, "loss": 0.3599, "step": 2319 }, { "epoch": 0.6925373134328359, "grad_norm": 0.8517192723805389, "learning_rate": 1.1407347215442985e-06, "loss": 0.3335, "step": 2320 }, { "epoch": 0.6928358208955224, "grad_norm": 0.8252636410104004, "learning_rate": 1.138706531272569e-06, "loss": 0.3648, "step": 2321 }, { "epoch": 0.693134328358209, "grad_norm": 0.7905456222424458, "learning_rate": 1.1366796137770512e-06, "loss": 0.3108, "step": 2322 }, { "epoch": 0.6934328358208955, "grad_norm": 0.8262859024812593, "learning_rate": 1.1346539709528634e-06, "loss": 0.3211, "step": 2323 }, { "epoch": 0.6937313432835821, "grad_norm": 0.7940913246786866, "learning_rate": 1.1326296046939334e-06, "loss": 0.3616, "step": 2324 }, { "epoch": 0.6940298507462687, "grad_norm": 0.9195285544655638, "learning_rate": 1.1306065168929925e-06, "loss": 0.3563, "step": 2325 }, { "epoch": 0.6943283582089552, "grad_norm": 0.7768973786157898, "learning_rate": 1.1285847094415792e-06, "loss": 0.3046, "step": 2326 }, { "epoch": 0.6946268656716418, "grad_norm": 0.8076162534794478, "learning_rate": 1.126564184230034e-06, "loss": 0.3552, "step": 2327 }, { "epoch": 0.6949253731343283, "grad_norm": 0.7569456200327547, "learning_rate": 1.1245449431474994e-06, "loss": 0.3296, "step": 2328 }, { "epoch": 0.6952238805970149, "grad_norm": 0.8337061109406402, "learning_rate": 1.1225269880819158e-06, "loss": 0.2923, "step": 2329 }, { "epoch": 0.6955223880597015, "grad_norm": 0.8392372442467751, "learning_rate": 1.1205103209200238e-06, "loss": 0.3642, "step": 2330 }, { "epoch": 0.695820895522388, "grad_norm": 0.8358260600131869, "learning_rate": 1.1184949435473556e-06, "loss": 0.328, "step": 2331 }, { "epoch": 0.6961194029850746, "grad_norm": 0.8295407000320718, "learning_rate": 1.1164808578482405e-06, "loss": 0.3583, "step": 2332 }, { "epoch": 0.6964179104477612, "grad_norm": 0.8276689046645763, "learning_rate": 1.1144680657058005e-06, "loss": 0.3463, "step": 2333 }, { "epoch": 0.6967164179104478, "grad_norm": 0.8590053159382256, "learning_rate": 1.1124565690019465e-06, "loss": 0.3184, "step": 2334 }, { "epoch": 0.6970149253731344, "grad_norm": 0.8071370342595704, "learning_rate": 1.1104463696173798e-06, "loss": 0.3232, "step": 2335 }, { "epoch": 0.6973134328358209, "grad_norm": 0.8642648814478965, "learning_rate": 1.1084374694315863e-06, "loss": 0.3465, "step": 2336 }, { "epoch": 0.6976119402985075, "grad_norm": 0.7645757881216615, "learning_rate": 1.1064298703228397e-06, "loss": 0.329, "step": 2337 }, { "epoch": 0.697910447761194, "grad_norm": 0.8218872469842694, "learning_rate": 1.1044235741681957e-06, "loss": 0.3571, "step": 2338 }, { "epoch": 0.6982089552238806, "grad_norm": 0.8082592854417281, "learning_rate": 1.102418582843493e-06, "loss": 0.3386, "step": 2339 }, { "epoch": 0.6985074626865672, "grad_norm": 0.8551551134548198, "learning_rate": 1.100414898223349e-06, "loss": 0.3187, "step": 2340 }, { "epoch": 0.6988059701492537, "grad_norm": 0.8454776918886312, "learning_rate": 1.0984125221811611e-06, "loss": 0.3226, "step": 2341 }, { "epoch": 0.6991044776119403, "grad_norm": 0.8274576186330233, "learning_rate": 1.0964114565891005e-06, "loss": 0.339, "step": 2342 }, { "epoch": 0.6994029850746268, "grad_norm": 0.8865291981715888, "learning_rate": 1.0944117033181151e-06, "loss": 0.2815, "step": 2343 }, { "epoch": 0.6997014925373134, "grad_norm": 0.7691276300346361, "learning_rate": 1.0924132642379262e-06, "loss": 0.2907, "step": 2344 }, { "epoch": 0.7, "grad_norm": 0.8838758440348157, "learning_rate": 1.090416141217025e-06, "loss": 0.3687, "step": 2345 }, { "epoch": 0.7002985074626865, "grad_norm": 0.834499070283479, "learning_rate": 1.0884203361226733e-06, "loss": 0.3357, "step": 2346 }, { "epoch": 0.7005970149253732, "grad_norm": 0.7837295011852984, "learning_rate": 1.0864258508209008e-06, "loss": 0.3141, "step": 2347 }, { "epoch": 0.7008955223880597, "grad_norm": 0.9621383837777848, "learning_rate": 1.0844326871765012e-06, "loss": 0.3344, "step": 2348 }, { "epoch": 0.7011940298507463, "grad_norm": 0.836293896828257, "learning_rate": 1.0824408470530334e-06, "loss": 0.3519, "step": 2349 }, { "epoch": 0.7014925373134329, "grad_norm": 0.8713650513051289, "learning_rate": 1.0804503323128214e-06, "loss": 0.3365, "step": 2350 }, { "epoch": 0.7017910447761194, "grad_norm": 0.7793599987932461, "learning_rate": 1.0784611448169482e-06, "loss": 0.3324, "step": 2351 }, { "epoch": 0.702089552238806, "grad_norm": 0.747029931462924, "learning_rate": 1.0764732864252538e-06, "loss": 0.3049, "step": 2352 }, { "epoch": 0.7023880597014925, "grad_norm": 0.7301826389896193, "learning_rate": 1.0744867589963378e-06, "loss": 0.2968, "step": 2353 }, { "epoch": 0.7026865671641791, "grad_norm": 0.80685723875685, "learning_rate": 1.0725015643875553e-06, "loss": 0.3443, "step": 2354 }, { "epoch": 0.7029850746268657, "grad_norm": 0.8130732259035608, "learning_rate": 1.0705177044550147e-06, "loss": 0.3303, "step": 2355 }, { "epoch": 0.7032835820895522, "grad_norm": 1.0494784933389165, "learning_rate": 1.0685351810535779e-06, "loss": 0.3631, "step": 2356 }, { "epoch": 0.7035820895522388, "grad_norm": 0.771095705431318, "learning_rate": 1.0665539960368536e-06, "loss": 0.3097, "step": 2357 }, { "epoch": 0.7038805970149253, "grad_norm": 0.8622991762796726, "learning_rate": 1.0645741512572031e-06, "loss": 0.3797, "step": 2358 }, { "epoch": 0.7041791044776119, "grad_norm": 0.8468783834550917, "learning_rate": 1.0625956485657321e-06, "loss": 0.313, "step": 2359 }, { "epoch": 0.7044776119402985, "grad_norm": 0.7455704245680197, "learning_rate": 1.060618489812293e-06, "loss": 0.3154, "step": 2360 }, { "epoch": 0.7047761194029851, "grad_norm": 0.8264996079174645, "learning_rate": 1.0586426768454813e-06, "loss": 0.3475, "step": 2361 }, { "epoch": 0.7050746268656717, "grad_norm": 0.8225831760639863, "learning_rate": 1.0566682115126345e-06, "loss": 0.3354, "step": 2362 }, { "epoch": 0.7053731343283582, "grad_norm": 0.7582974133085499, "learning_rate": 1.0546950956598276e-06, "loss": 0.3143, "step": 2363 }, { "epoch": 0.7056716417910448, "grad_norm": 0.837445402977908, "learning_rate": 1.0527233311318768e-06, "loss": 0.3432, "step": 2364 }, { "epoch": 0.7059701492537314, "grad_norm": 0.8064450995316124, "learning_rate": 1.050752919772334e-06, "loss": 0.3427, "step": 2365 }, { "epoch": 0.7062686567164179, "grad_norm": 0.7708046546693463, "learning_rate": 1.048783863423486e-06, "loss": 0.2782, "step": 2366 }, { "epoch": 0.7065671641791045, "grad_norm": 0.7890086390145694, "learning_rate": 1.0468161639263518e-06, "loss": 0.3246, "step": 2367 }, { "epoch": 0.706865671641791, "grad_norm": 0.9613407733550523, "learning_rate": 1.0448498231206843e-06, "loss": 0.3788, "step": 2368 }, { "epoch": 0.7071641791044776, "grad_norm": 0.8377614962573282, "learning_rate": 1.0428848428449618e-06, "loss": 0.2917, "step": 2369 }, { "epoch": 0.7074626865671642, "grad_norm": 0.7644477209892271, "learning_rate": 1.040921224936394e-06, "loss": 0.2539, "step": 2370 }, { "epoch": 0.7077611940298507, "grad_norm": 0.8073027138319966, "learning_rate": 1.0389589712309156e-06, "loss": 0.3853, "step": 2371 }, { "epoch": 0.7080597014925373, "grad_norm": 0.773541394858571, "learning_rate": 1.0369980835631862e-06, "loss": 0.3109, "step": 2372 }, { "epoch": 0.7083582089552238, "grad_norm": 0.8939609435492758, "learning_rate": 1.035038563766589e-06, "loss": 0.3331, "step": 2373 }, { "epoch": 0.7086567164179104, "grad_norm": 0.8792285135356904, "learning_rate": 1.0330804136732253e-06, "loss": 0.3031, "step": 2374 }, { "epoch": 0.7089552238805971, "grad_norm": 0.8249719781900057, "learning_rate": 1.0311236351139186e-06, "loss": 0.3463, "step": 2375 }, { "epoch": 0.7092537313432836, "grad_norm": 0.9446425866318262, "learning_rate": 1.0291682299182092e-06, "loss": 0.3444, "step": 2376 }, { "epoch": 0.7095522388059702, "grad_norm": 0.8352690148905147, "learning_rate": 1.027214199914353e-06, "loss": 0.3358, "step": 2377 }, { "epoch": 0.7098507462686567, "grad_norm": 0.7396486359215371, "learning_rate": 1.025261546929321e-06, "loss": 0.3504, "step": 2378 }, { "epoch": 0.7101492537313433, "grad_norm": 0.8423507780215546, "learning_rate": 1.0233102727887967e-06, "loss": 0.3328, "step": 2379 }, { "epoch": 0.7104477611940299, "grad_norm": 0.8857400481832675, "learning_rate": 1.0213603793171717e-06, "loss": 0.3581, "step": 2380 }, { "epoch": 0.7107462686567164, "grad_norm": 0.8154084588304341, "learning_rate": 1.0194118683375502e-06, "loss": 0.3725, "step": 2381 }, { "epoch": 0.711044776119403, "grad_norm": 0.9479114104669211, "learning_rate": 1.0174647416717428e-06, "loss": 0.3963, "step": 2382 }, { "epoch": 0.7113432835820895, "grad_norm": 0.8293843508242847, "learning_rate": 1.015519001140265e-06, "loss": 0.3394, "step": 2383 }, { "epoch": 0.7116417910447761, "grad_norm": 0.8038453888278364, "learning_rate": 1.0135746485623376e-06, "loss": 0.2989, "step": 2384 }, { "epoch": 0.7119402985074627, "grad_norm": 0.8752892342124312, "learning_rate": 1.0116316857558814e-06, "loss": 0.3621, "step": 2385 }, { "epoch": 0.7122388059701492, "grad_norm": 0.7967390929681123, "learning_rate": 1.0096901145375197e-06, "loss": 0.3461, "step": 2386 }, { "epoch": 0.7125373134328358, "grad_norm": 0.8551632337211533, "learning_rate": 1.0077499367225737e-06, "loss": 0.3295, "step": 2387 }, { "epoch": 0.7128358208955224, "grad_norm": 0.8773095549519137, "learning_rate": 1.0058111541250642e-06, "loss": 0.3562, "step": 2388 }, { "epoch": 0.713134328358209, "grad_norm": 0.8061003756386974, "learning_rate": 1.0038737685577057e-06, "loss": 0.3063, "step": 2389 }, { "epoch": 0.7134328358208956, "grad_norm": 0.842089773836329, "learning_rate": 1.0019377818319045e-06, "loss": 0.3467, "step": 2390 }, { "epoch": 0.7137313432835821, "grad_norm": 0.7906352842135439, "learning_rate": 1.0000031957577618e-06, "loss": 0.3419, "step": 2391 }, { "epoch": 0.7140298507462687, "grad_norm": 0.9218014121141632, "learning_rate": 9.98070012144069e-07, "loss": 0.3658, "step": 2392 }, { "epoch": 0.7143283582089552, "grad_norm": 0.7939940692921382, "learning_rate": 9.96138232798305e-07, "loss": 0.3208, "step": 2393 }, { "epoch": 0.7146268656716418, "grad_norm": 0.8770021483327558, "learning_rate": 9.942078595266374e-07, "loss": 0.3714, "step": 2394 }, { "epoch": 0.7149253731343284, "grad_norm": 0.8900060492450977, "learning_rate": 9.922788941339169e-07, "loss": 0.3235, "step": 2395 }, { "epoch": 0.7152238805970149, "grad_norm": 0.7812682445205359, "learning_rate": 9.90351338423679e-07, "loss": 0.3309, "step": 2396 }, { "epoch": 0.7155223880597015, "grad_norm": 1.0022155072876207, "learning_rate": 9.884251941981424e-07, "loss": 0.3809, "step": 2397 }, { "epoch": 0.715820895522388, "grad_norm": 0.917525828896109, "learning_rate": 9.865004632582045e-07, "loss": 0.361, "step": 2398 }, { "epoch": 0.7161194029850746, "grad_norm": 0.7859374963278128, "learning_rate": 9.845771474034419e-07, "loss": 0.355, "step": 2399 }, { "epoch": 0.7164179104477612, "grad_norm": 0.8984576640616055, "learning_rate": 9.826552484321086e-07, "loss": 0.3332, "step": 2400 }, { "epoch": 0.7167164179104477, "grad_norm": 0.8103441506377737, "learning_rate": 9.80734768141132e-07, "loss": 0.3105, "step": 2401 }, { "epoch": 0.7170149253731344, "grad_norm": 0.7674707580678842, "learning_rate": 9.788157083261152e-07, "loss": 0.2509, "step": 2402 }, { "epoch": 0.7173134328358209, "grad_norm": 0.86655253489264, "learning_rate": 9.768980707813319e-07, "loss": 0.4078, "step": 2403 }, { "epoch": 0.7176119402985075, "grad_norm": 0.8359834932221715, "learning_rate": 9.74981857299727e-07, "loss": 0.3915, "step": 2404 }, { "epoch": 0.7179104477611941, "grad_norm": 0.8302643419290284, "learning_rate": 9.730670696729128e-07, "loss": 0.3383, "step": 2405 }, { "epoch": 0.7182089552238806, "grad_norm": 0.8047361286111895, "learning_rate": 9.711537096911704e-07, "loss": 0.3249, "step": 2406 }, { "epoch": 0.7185074626865672, "grad_norm": 0.7966244710402332, "learning_rate": 9.692417791434431e-07, "loss": 0.284, "step": 2407 }, { "epoch": 0.7188059701492537, "grad_norm": 0.8341828877431555, "learning_rate": 9.673312798173399e-07, "loss": 0.321, "step": 2408 }, { "epoch": 0.7191044776119403, "grad_norm": 0.8810165039262209, "learning_rate": 9.654222134991312e-07, "loss": 0.3862, "step": 2409 }, { "epoch": 0.7194029850746269, "grad_norm": 0.861373072096721, "learning_rate": 9.635145819737476e-07, "loss": 0.336, "step": 2410 }, { "epoch": 0.7197014925373134, "grad_norm": 0.8448013124798284, "learning_rate": 9.616083870247785e-07, "loss": 0.3962, "step": 2411 }, { "epoch": 0.72, "grad_norm": 0.8451750195129675, "learning_rate": 9.597036304344688e-07, "loss": 0.3554, "step": 2412 }, { "epoch": 0.7202985074626865, "grad_norm": 0.7751019468536666, "learning_rate": 9.578003139837196e-07, "loss": 0.305, "step": 2413 }, { "epoch": 0.7205970149253731, "grad_norm": 0.8478856215524544, "learning_rate": 9.558984394520856e-07, "loss": 0.3419, "step": 2414 }, { "epoch": 0.7208955223880597, "grad_norm": 0.7578568314911519, "learning_rate": 9.539980086177734e-07, "loss": 0.2998, "step": 2415 }, { "epoch": 0.7211940298507463, "grad_norm": 0.8308842159873571, "learning_rate": 9.520990232576391e-07, "loss": 0.3567, "step": 2416 }, { "epoch": 0.7214925373134329, "grad_norm": 0.9597039646097656, "learning_rate": 9.502014851471888e-07, "loss": 0.3741, "step": 2417 }, { "epoch": 0.7217910447761194, "grad_norm": 0.8391053251773948, "learning_rate": 9.483053960605726e-07, "loss": 0.3388, "step": 2418 }, { "epoch": 0.722089552238806, "grad_norm": 0.7561556408738367, "learning_rate": 9.464107577705887e-07, "loss": 0.3408, "step": 2419 }, { "epoch": 0.7223880597014926, "grad_norm": 0.9032217114431608, "learning_rate": 9.445175720486771e-07, "loss": 0.3679, "step": 2420 }, { "epoch": 0.7226865671641791, "grad_norm": 0.909162048526229, "learning_rate": 9.426258406649211e-07, "loss": 0.3379, "step": 2421 }, { "epoch": 0.7229850746268657, "grad_norm": 0.7673893074765138, "learning_rate": 9.407355653880437e-07, "loss": 0.2601, "step": 2422 }, { "epoch": 0.7232835820895522, "grad_norm": 0.8015859918855246, "learning_rate": 9.388467479854046e-07, "loss": 0.3201, "step": 2423 }, { "epoch": 0.7235820895522388, "grad_norm": 0.7805401715822302, "learning_rate": 9.369593902230032e-07, "loss": 0.3044, "step": 2424 }, { "epoch": 0.7238805970149254, "grad_norm": 0.7760863233512528, "learning_rate": 9.350734938654715e-07, "loss": 0.3316, "step": 2425 }, { "epoch": 0.7241791044776119, "grad_norm": 0.8337237951101096, "learning_rate": 9.331890606760791e-07, "loss": 0.3232, "step": 2426 }, { "epoch": 0.7244776119402985, "grad_norm": 0.7698626803368143, "learning_rate": 9.313060924167247e-07, "loss": 0.3024, "step": 2427 }, { "epoch": 0.724776119402985, "grad_norm": 0.7772472006066783, "learning_rate": 9.29424590847936e-07, "loss": 0.3098, "step": 2428 }, { "epoch": 0.7250746268656716, "grad_norm": 0.8379061596545783, "learning_rate": 9.275445577288722e-07, "loss": 0.3738, "step": 2429 }, { "epoch": 0.7253731343283583, "grad_norm": 0.89865650654176, "learning_rate": 9.256659948173181e-07, "loss": 0.3338, "step": 2430 }, { "epoch": 0.7256716417910448, "grad_norm": 0.8370452187741632, "learning_rate": 9.237889038696843e-07, "loss": 0.3382, "step": 2431 }, { "epoch": 0.7259701492537314, "grad_norm": 0.8134596631444544, "learning_rate": 9.219132866410063e-07, "loss": 0.3064, "step": 2432 }, { "epoch": 0.7262686567164179, "grad_norm": 0.8264430928633055, "learning_rate": 9.200391448849383e-07, "loss": 0.3432, "step": 2433 }, { "epoch": 0.7265671641791045, "grad_norm": 0.7728324764571474, "learning_rate": 9.181664803537585e-07, "loss": 0.3278, "step": 2434 }, { "epoch": 0.7268656716417911, "grad_norm": 0.8542443384866948, "learning_rate": 9.162952947983619e-07, "loss": 0.311, "step": 2435 }, { "epoch": 0.7271641791044776, "grad_norm": 0.7451019337472867, "learning_rate": 9.144255899682622e-07, "loss": 0.3326, "step": 2436 }, { "epoch": 0.7274626865671642, "grad_norm": 0.7771194432519887, "learning_rate": 9.125573676115873e-07, "loss": 0.3467, "step": 2437 }, { "epoch": 0.7277611940298507, "grad_norm": 0.7687912387800152, "learning_rate": 9.106906294750806e-07, "loss": 0.3147, "step": 2438 }, { "epoch": 0.7280597014925373, "grad_norm": 0.9034996997981363, "learning_rate": 9.088253773040947e-07, "loss": 0.3687, "step": 2439 }, { "epoch": 0.7283582089552239, "grad_norm": 0.8836325866150969, "learning_rate": 9.069616128425964e-07, "loss": 0.4087, "step": 2440 }, { "epoch": 0.7286567164179104, "grad_norm": 1.0041210920794168, "learning_rate": 9.050993378331599e-07, "loss": 0.3454, "step": 2441 }, { "epoch": 0.728955223880597, "grad_norm": 0.9216219623020782, "learning_rate": 9.032385540169664e-07, "loss": 0.3622, "step": 2442 }, { "epoch": 0.7292537313432835, "grad_norm": 0.8079424743719126, "learning_rate": 9.013792631338048e-07, "loss": 0.3418, "step": 2443 }, { "epoch": 0.7295522388059702, "grad_norm": 0.9118676627334383, "learning_rate": 8.995214669220648e-07, "loss": 0.3449, "step": 2444 }, { "epoch": 0.7298507462686568, "grad_norm": 0.8611393356418761, "learning_rate": 8.976651671187417e-07, "loss": 0.3299, "step": 2445 }, { "epoch": 0.7301492537313433, "grad_norm": 0.7639011408029112, "learning_rate": 8.958103654594302e-07, "loss": 0.3003, "step": 2446 }, { "epoch": 0.7304477611940299, "grad_norm": 0.8195346005096708, "learning_rate": 8.939570636783249e-07, "loss": 0.3394, "step": 2447 }, { "epoch": 0.7307462686567164, "grad_norm": 0.8782576394827525, "learning_rate": 8.921052635082175e-07, "loss": 0.361, "step": 2448 }, { "epoch": 0.731044776119403, "grad_norm": 0.8671344977010738, "learning_rate": 8.902549666804971e-07, "loss": 0.3828, "step": 2449 }, { "epoch": 0.7313432835820896, "grad_norm": 0.809160024689468, "learning_rate": 8.884061749251446e-07, "loss": 0.3113, "step": 2450 }, { "epoch": 0.7316417910447761, "grad_norm": 0.8158507141596787, "learning_rate": 8.865588899707358e-07, "loss": 0.3481, "step": 2451 }, { "epoch": 0.7319402985074627, "grad_norm": 0.8308589930715008, "learning_rate": 8.847131135444373e-07, "loss": 0.3448, "step": 2452 }, { "epoch": 0.7322388059701492, "grad_norm": 0.8737351072965946, "learning_rate": 8.828688473720051e-07, "loss": 0.3881, "step": 2453 }, { "epoch": 0.7325373134328358, "grad_norm": 0.8195810108023454, "learning_rate": 8.810260931777828e-07, "loss": 0.3467, "step": 2454 }, { "epoch": 0.7328358208955223, "grad_norm": 0.8343708212051258, "learning_rate": 8.791848526847016e-07, "loss": 0.3586, "step": 2455 }, { "epoch": 0.7331343283582089, "grad_norm": 0.8073884493919178, "learning_rate": 8.77345127614275e-07, "loss": 0.3176, "step": 2456 }, { "epoch": 0.7334328358208955, "grad_norm": 0.9085802883997077, "learning_rate": 8.755069196866014e-07, "loss": 0.3577, "step": 2457 }, { "epoch": 0.7337313432835821, "grad_norm": 0.7503848280448058, "learning_rate": 8.736702306203612e-07, "loss": 0.2852, "step": 2458 }, { "epoch": 0.7340298507462687, "grad_norm": 1.180773273009247, "learning_rate": 8.718350621328137e-07, "loss": 0.353, "step": 2459 }, { "epoch": 0.7343283582089553, "grad_norm": 0.8460574776608701, "learning_rate": 8.700014159397971e-07, "loss": 0.3534, "step": 2460 }, { "epoch": 0.7346268656716418, "grad_norm": 0.8636506104198776, "learning_rate": 8.681692937557246e-07, "loss": 0.2733, "step": 2461 }, { "epoch": 0.7349253731343284, "grad_norm": 0.7554477293718002, "learning_rate": 8.663386972935864e-07, "loss": 0.3375, "step": 2462 }, { "epoch": 0.7352238805970149, "grad_norm": 0.8170180017573404, "learning_rate": 8.645096282649448e-07, "loss": 0.3501, "step": 2463 }, { "epoch": 0.7355223880597015, "grad_norm": 0.785002214591176, "learning_rate": 8.626820883799364e-07, "loss": 0.322, "step": 2464 }, { "epoch": 0.735820895522388, "grad_norm": 0.8124518915482682, "learning_rate": 8.608560793472667e-07, "loss": 0.3488, "step": 2465 }, { "epoch": 0.7361194029850746, "grad_norm": 0.8819651422697621, "learning_rate": 8.590316028742079e-07, "loss": 0.3871, "step": 2466 }, { "epoch": 0.7364179104477612, "grad_norm": 0.8051048323090729, "learning_rate": 8.572086606666016e-07, "loss": 0.3391, "step": 2467 }, { "epoch": 0.7367164179104477, "grad_norm": 0.8716400545844462, "learning_rate": 8.553872544288544e-07, "loss": 0.3561, "step": 2468 }, { "epoch": 0.7370149253731343, "grad_norm": 1.0077273262938058, "learning_rate": 8.535673858639368e-07, "loss": 0.3269, "step": 2469 }, { "epoch": 0.7373134328358208, "grad_norm": 0.7495287643901639, "learning_rate": 8.517490566733827e-07, "loss": 0.3547, "step": 2470 }, { "epoch": 0.7376119402985075, "grad_norm": 0.94770061816034, "learning_rate": 8.499322685572834e-07, "loss": 0.3732, "step": 2471 }, { "epoch": 0.7379104477611941, "grad_norm": 0.8036303894234751, "learning_rate": 8.481170232142923e-07, "loss": 0.3387, "step": 2472 }, { "epoch": 0.7382089552238806, "grad_norm": 0.770395344765945, "learning_rate": 8.463033223416201e-07, "loss": 0.2754, "step": 2473 }, { "epoch": 0.7385074626865672, "grad_norm": 0.7821239157671365, "learning_rate": 8.444911676350326e-07, "loss": 0.3406, "step": 2474 }, { "epoch": 0.7388059701492538, "grad_norm": 0.8604005758402877, "learning_rate": 8.426805607888502e-07, "loss": 0.3882, "step": 2475 }, { "epoch": 0.7391044776119403, "grad_norm": 0.7436455606846663, "learning_rate": 8.408715034959469e-07, "loss": 0.3026, "step": 2476 }, { "epoch": 0.7394029850746269, "grad_norm": 0.8531423147657465, "learning_rate": 8.390639974477461e-07, "loss": 0.3147, "step": 2477 }, { "epoch": 0.7397014925373134, "grad_norm": 0.897354179300323, "learning_rate": 8.372580443342218e-07, "loss": 0.3369, "step": 2478 }, { "epoch": 0.74, "grad_norm": 0.7937336535275226, "learning_rate": 8.354536458438969e-07, "loss": 0.3417, "step": 2479 }, { "epoch": 0.7402985074626866, "grad_norm": 0.7948747004341821, "learning_rate": 8.3365080366384e-07, "loss": 0.3138, "step": 2480 }, { "epoch": 0.7405970149253731, "grad_norm": 0.8198556735193836, "learning_rate": 8.31849519479665e-07, "loss": 0.3394, "step": 2481 }, { "epoch": 0.7408955223880597, "grad_norm": 0.8619919834482459, "learning_rate": 8.300497949755271e-07, "loss": 0.312, "step": 2482 }, { "epoch": 0.7411940298507462, "grad_norm": 0.8257206030147372, "learning_rate": 8.282516318341258e-07, "loss": 0.3253, "step": 2483 }, { "epoch": 0.7414925373134328, "grad_norm": 0.8361638399301501, "learning_rate": 8.264550317366998e-07, "loss": 0.3585, "step": 2484 }, { "epoch": 0.7417910447761195, "grad_norm": 0.9277677786456653, "learning_rate": 8.246599963630266e-07, "loss": 0.3364, "step": 2485 }, { "epoch": 0.742089552238806, "grad_norm": 0.8008796354592284, "learning_rate": 8.228665273914202e-07, "loss": 0.305, "step": 2486 }, { "epoch": 0.7423880597014926, "grad_norm": 0.7154547857109149, "learning_rate": 8.210746264987315e-07, "loss": 0.3003, "step": 2487 }, { "epoch": 0.7426865671641791, "grad_norm": 0.8086001172238579, "learning_rate": 8.192842953603422e-07, "loss": 0.3445, "step": 2488 }, { "epoch": 0.7429850746268657, "grad_norm": 0.809703550422652, "learning_rate": 8.174955356501693e-07, "loss": 0.3528, "step": 2489 }, { "epoch": 0.7432835820895523, "grad_norm": 0.7940020900981949, "learning_rate": 8.157083490406593e-07, "loss": 0.3609, "step": 2490 }, { "epoch": 0.7435820895522388, "grad_norm": 0.8667872980953691, "learning_rate": 8.139227372027883e-07, "loss": 0.3538, "step": 2491 }, { "epoch": 0.7438805970149254, "grad_norm": 0.8022738790006476, "learning_rate": 8.121387018060601e-07, "loss": 0.3551, "step": 2492 }, { "epoch": 0.7441791044776119, "grad_norm": 0.7944094322718988, "learning_rate": 8.103562445185045e-07, "loss": 0.2928, "step": 2493 }, { "epoch": 0.7444776119402985, "grad_norm": 0.9134473219974198, "learning_rate": 8.085753670066746e-07, "loss": 0.3283, "step": 2494 }, { "epoch": 0.744776119402985, "grad_norm": 0.9171291476302468, "learning_rate": 8.067960709356479e-07, "loss": 0.3432, "step": 2495 }, { "epoch": 0.7450746268656716, "grad_norm": 0.7630031740731616, "learning_rate": 8.05018357969023e-07, "loss": 0.2707, "step": 2496 }, { "epoch": 0.7453731343283582, "grad_norm": 0.8408211927826157, "learning_rate": 8.03242229768918e-07, "loss": 0.3425, "step": 2497 }, { "epoch": 0.7456716417910447, "grad_norm": 0.8479202019499819, "learning_rate": 8.014676879959704e-07, "loss": 0.3288, "step": 2498 }, { "epoch": 0.7459701492537314, "grad_norm": 0.8959963249910727, "learning_rate": 7.996947343093323e-07, "loss": 0.3825, "step": 2499 }, { "epoch": 0.746268656716418, "grad_norm": 0.8469480512025105, "learning_rate": 7.979233703666714e-07, "loss": 0.3364, "step": 2500 }, { "epoch": 0.7465671641791045, "grad_norm": 0.8363118813056599, "learning_rate": 7.961535978241722e-07, "loss": 0.3655, "step": 2501 }, { "epoch": 0.7468656716417911, "grad_norm": 0.7993313849201408, "learning_rate": 7.943854183365277e-07, "loss": 0.3081, "step": 2502 }, { "epoch": 0.7471641791044776, "grad_norm": 0.8028836757795254, "learning_rate": 7.926188335569432e-07, "loss": 0.3151, "step": 2503 }, { "epoch": 0.7474626865671642, "grad_norm": 0.8193299901791269, "learning_rate": 7.908538451371311e-07, "loss": 0.3327, "step": 2504 }, { "epoch": 0.7477611940298508, "grad_norm": 0.812821295525394, "learning_rate": 7.890904547273134e-07, "loss": 0.3028, "step": 2505 }, { "epoch": 0.7480597014925373, "grad_norm": 0.7946631103592113, "learning_rate": 7.873286639762171e-07, "loss": 0.3305, "step": 2506 }, { "epoch": 0.7483582089552239, "grad_norm": 0.8168449751348733, "learning_rate": 7.855684745310732e-07, "loss": 0.311, "step": 2507 }, { "epoch": 0.7486567164179104, "grad_norm": 0.7577415278692218, "learning_rate": 7.838098880376175e-07, "loss": 0.3005, "step": 2508 }, { "epoch": 0.748955223880597, "grad_norm": 1.007066007674342, "learning_rate": 7.820529061400833e-07, "loss": 0.3422, "step": 2509 }, { "epoch": 0.7492537313432835, "grad_norm": 0.8285843650225135, "learning_rate": 7.802975304812068e-07, "loss": 0.3649, "step": 2510 }, { "epoch": 0.7495522388059701, "grad_norm": 0.8802781806958206, "learning_rate": 7.785437627022216e-07, "loss": 0.3442, "step": 2511 }, { "epoch": 0.7498507462686567, "grad_norm": 0.8872697995230727, "learning_rate": 7.767916044428578e-07, "loss": 0.3535, "step": 2512 }, { "epoch": 0.7501492537313433, "grad_norm": 0.7493499394945767, "learning_rate": 7.750410573413403e-07, "loss": 0.3, "step": 2513 }, { "epoch": 0.7504477611940299, "grad_norm": 0.8420235208681436, "learning_rate": 7.732921230343893e-07, "loss": 0.3361, "step": 2514 }, { "epoch": 0.7507462686567165, "grad_norm": 0.9016435397252184, "learning_rate": 7.715448031572137e-07, "loss": 0.3665, "step": 2515 }, { "epoch": 0.751044776119403, "grad_norm": 0.8254349682410594, "learning_rate": 7.697990993435162e-07, "loss": 0.3658, "step": 2516 }, { "epoch": 0.7513432835820896, "grad_norm": 0.8283799880314672, "learning_rate": 7.680550132254869e-07, "loss": 0.2997, "step": 2517 }, { "epoch": 0.7516417910447761, "grad_norm": 0.7889635309394748, "learning_rate": 7.663125464338042e-07, "loss": 0.3662, "step": 2518 }, { "epoch": 0.7519402985074627, "grad_norm": 0.7589268133008595, "learning_rate": 7.645717005976328e-07, "loss": 0.2914, "step": 2519 }, { "epoch": 0.7522388059701492, "grad_norm": 0.8796818140938326, "learning_rate": 7.628324773446192e-07, "loss": 0.3653, "step": 2520 }, { "epoch": 0.7525373134328358, "grad_norm": 0.8567321353644352, "learning_rate": 7.610948783008956e-07, "loss": 0.3185, "step": 2521 }, { "epoch": 0.7528358208955224, "grad_norm": 0.8340526715199753, "learning_rate": 7.593589050910752e-07, "loss": 0.3609, "step": 2522 }, { "epoch": 0.7531343283582089, "grad_norm": 0.8400627991409522, "learning_rate": 7.576245593382503e-07, "loss": 0.3362, "step": 2523 }, { "epoch": 0.7534328358208955, "grad_norm": 0.8335760592291928, "learning_rate": 7.558918426639919e-07, "loss": 0.3326, "step": 2524 }, { "epoch": 0.753731343283582, "grad_norm": 0.7330595579589215, "learning_rate": 7.541607566883486e-07, "loss": 0.316, "step": 2525 }, { "epoch": 0.7540298507462686, "grad_norm": 0.8319155733303641, "learning_rate": 7.524313030298422e-07, "loss": 0.3825, "step": 2526 }, { "epoch": 0.7543283582089553, "grad_norm": 0.8537564302430024, "learning_rate": 7.507034833054705e-07, "loss": 0.3675, "step": 2527 }, { "epoch": 0.7546268656716418, "grad_norm": 0.8541046358406873, "learning_rate": 7.489772991307026e-07, "loss": 0.3463, "step": 2528 }, { "epoch": 0.7549253731343284, "grad_norm": 0.8333446760301982, "learning_rate": 7.472527521194789e-07, "loss": 0.3248, "step": 2529 }, { "epoch": 0.755223880597015, "grad_norm": 0.918746792475965, "learning_rate": 7.455298438842096e-07, "loss": 0.3618, "step": 2530 }, { "epoch": 0.7555223880597015, "grad_norm": 0.7709947075005573, "learning_rate": 7.4380857603577e-07, "loss": 0.323, "step": 2531 }, { "epoch": 0.7558208955223881, "grad_norm": 0.8113705340822994, "learning_rate": 7.420889501835046e-07, "loss": 0.3353, "step": 2532 }, { "epoch": 0.7561194029850746, "grad_norm": 0.8316485419308024, "learning_rate": 7.403709679352216e-07, "loss": 0.3197, "step": 2533 }, { "epoch": 0.7564179104477612, "grad_norm": 0.9244834653645814, "learning_rate": 7.386546308971926e-07, "loss": 0.3625, "step": 2534 }, { "epoch": 0.7567164179104477, "grad_norm": 0.7698158757397285, "learning_rate": 7.369399406741509e-07, "loss": 0.3401, "step": 2535 }, { "epoch": 0.7570149253731343, "grad_norm": 0.7936072499011064, "learning_rate": 7.35226898869291e-07, "loss": 0.3294, "step": 2536 }, { "epoch": 0.7573134328358209, "grad_norm": 0.8096055523595336, "learning_rate": 7.335155070842631e-07, "loss": 0.3354, "step": 2537 }, { "epoch": 0.7576119402985074, "grad_norm": 0.8150395641292778, "learning_rate": 7.318057669191775e-07, "loss": 0.3378, "step": 2538 }, { "epoch": 0.757910447761194, "grad_norm": 0.8749495077069125, "learning_rate": 7.300976799726007e-07, "loss": 0.3548, "step": 2539 }, { "epoch": 0.7582089552238805, "grad_norm": 0.7864759817642071, "learning_rate": 7.283912478415522e-07, "loss": 0.298, "step": 2540 }, { "epoch": 0.7585074626865672, "grad_norm": 0.85761429358801, "learning_rate": 7.266864721215033e-07, "loss": 0.3667, "step": 2541 }, { "epoch": 0.7588059701492538, "grad_norm": 0.7614755089756268, "learning_rate": 7.249833544063786e-07, "loss": 0.3075, "step": 2542 }, { "epoch": 0.7591044776119403, "grad_norm": 0.7998879965445289, "learning_rate": 7.232818962885513e-07, "loss": 0.3155, "step": 2543 }, { "epoch": 0.7594029850746269, "grad_norm": 1.1882550211054863, "learning_rate": 7.215820993588435e-07, "loss": 0.3549, "step": 2544 }, { "epoch": 0.7597014925373134, "grad_norm": 0.6960475242707338, "learning_rate": 7.198839652065234e-07, "loss": 0.2709, "step": 2545 }, { "epoch": 0.76, "grad_norm": 0.8929595250452405, "learning_rate": 7.181874954193066e-07, "loss": 0.4011, "step": 2546 }, { "epoch": 0.7602985074626866, "grad_norm": 0.8085099466146569, "learning_rate": 7.164926915833489e-07, "loss": 0.3258, "step": 2547 }, { "epoch": 0.7605970149253731, "grad_norm": 0.7526251937377817, "learning_rate": 7.147995552832518e-07, "loss": 0.3357, "step": 2548 }, { "epoch": 0.7608955223880597, "grad_norm": 0.7452474358495229, "learning_rate": 7.13108088102056e-07, "loss": 0.2711, "step": 2549 }, { "epoch": 0.7611940298507462, "grad_norm": 0.8572099572681284, "learning_rate": 7.114182916212423e-07, "loss": 0.3695, "step": 2550 }, { "epoch": 0.7614925373134328, "grad_norm": 0.9598211721736167, "learning_rate": 7.097301674207291e-07, "loss": 0.3611, "step": 2551 }, { "epoch": 0.7617910447761194, "grad_norm": 0.9085391291886787, "learning_rate": 7.080437170788723e-07, "loss": 0.3824, "step": 2552 }, { "epoch": 0.7620895522388059, "grad_norm": 0.8177813221411165, "learning_rate": 7.063589421724601e-07, "loss": 0.2772, "step": 2553 }, { "epoch": 0.7623880597014925, "grad_norm": 0.8628641410267316, "learning_rate": 7.046758442767171e-07, "loss": 0.334, "step": 2554 }, { "epoch": 0.7626865671641792, "grad_norm": 0.928648183310679, "learning_rate": 7.029944249652987e-07, "loss": 0.3221, "step": 2555 }, { "epoch": 0.7629850746268657, "grad_norm": 0.9255552592094868, "learning_rate": 7.013146858102906e-07, "loss": 0.3226, "step": 2556 }, { "epoch": 0.7632835820895523, "grad_norm": 0.7920749204965727, "learning_rate": 6.996366283822093e-07, "loss": 0.2868, "step": 2557 }, { "epoch": 0.7635820895522388, "grad_norm": 0.9935006013461068, "learning_rate": 6.979602542499955e-07, "loss": 0.313, "step": 2558 }, { "epoch": 0.7638805970149254, "grad_norm": 0.790352931784802, "learning_rate": 6.962855649810193e-07, "loss": 0.3091, "step": 2559 }, { "epoch": 0.764179104477612, "grad_norm": 0.784302622452047, "learning_rate": 6.946125621410746e-07, "loss": 0.3021, "step": 2560 }, { "epoch": 0.7644776119402985, "grad_norm": 0.8521244035570953, "learning_rate": 6.929412472943775e-07, "loss": 0.3065, "step": 2561 }, { "epoch": 0.7647761194029851, "grad_norm": 0.7990696640196155, "learning_rate": 6.91271622003567e-07, "loss": 0.317, "step": 2562 }, { "epoch": 0.7650746268656716, "grad_norm": 0.7301484428928479, "learning_rate": 6.896036878297033e-07, "loss": 0.3071, "step": 2563 }, { "epoch": 0.7653731343283582, "grad_norm": 0.7370476408839566, "learning_rate": 6.879374463322619e-07, "loss": 0.2726, "step": 2564 }, { "epoch": 0.7656716417910447, "grad_norm": 0.9000167299852614, "learning_rate": 6.862728990691392e-07, "loss": 0.3357, "step": 2565 }, { "epoch": 0.7659701492537313, "grad_norm": 0.7870895798188804, "learning_rate": 6.846100475966461e-07, "loss": 0.2879, "step": 2566 }, { "epoch": 0.7662686567164179, "grad_norm": 0.8360237218908565, "learning_rate": 6.829488934695083e-07, "loss": 0.3215, "step": 2567 }, { "epoch": 0.7665671641791045, "grad_norm": 0.9359634021391391, "learning_rate": 6.812894382408652e-07, "loss": 0.3455, "step": 2568 }, { "epoch": 0.7668656716417911, "grad_norm": 0.930052847096152, "learning_rate": 6.796316834622654e-07, "loss": 0.3582, "step": 2569 }, { "epoch": 0.7671641791044777, "grad_norm": 0.9038130785381302, "learning_rate": 6.779756306836702e-07, "loss": 0.2985, "step": 2570 }, { "epoch": 0.7674626865671642, "grad_norm": 0.7826417580282887, "learning_rate": 6.763212814534484e-07, "loss": 0.3482, "step": 2571 }, { "epoch": 0.7677611940298508, "grad_norm": 0.8486630906094814, "learning_rate": 6.746686373183761e-07, "loss": 0.3688, "step": 2572 }, { "epoch": 0.7680597014925373, "grad_norm": 0.8735312621938363, "learning_rate": 6.730176998236355e-07, "loss": 0.3815, "step": 2573 }, { "epoch": 0.7683582089552239, "grad_norm": 0.8940128219367052, "learning_rate": 6.713684705128135e-07, "loss": 0.3194, "step": 2574 }, { "epoch": 0.7686567164179104, "grad_norm": 0.9408242887386759, "learning_rate": 6.697209509278979e-07, "loss": 0.3658, "step": 2575 }, { "epoch": 0.768955223880597, "grad_norm": 0.8959030487388594, "learning_rate": 6.680751426092791e-07, "loss": 0.3391, "step": 2576 }, { "epoch": 0.7692537313432836, "grad_norm": 0.7803592674313101, "learning_rate": 6.664310470957497e-07, "loss": 0.2969, "step": 2577 }, { "epoch": 0.7695522388059701, "grad_norm": 0.7505213649281478, "learning_rate": 6.647886659244987e-07, "loss": 0.2958, "step": 2578 }, { "epoch": 0.7698507462686567, "grad_norm": 0.8011860941500494, "learning_rate": 6.631480006311108e-07, "loss": 0.311, "step": 2579 }, { "epoch": 0.7701492537313432, "grad_norm": 0.7936319294693487, "learning_rate": 6.61509052749569e-07, "loss": 0.3458, "step": 2580 }, { "epoch": 0.7704477611940298, "grad_norm": 0.7316957671250423, "learning_rate": 6.598718238122497e-07, "loss": 0.2991, "step": 2581 }, { "epoch": 0.7707462686567165, "grad_norm": 0.8104928093316218, "learning_rate": 6.582363153499221e-07, "loss": 0.3437, "step": 2582 }, { "epoch": 0.771044776119403, "grad_norm": 0.9010645711228973, "learning_rate": 6.566025288917463e-07, "loss": 0.3652, "step": 2583 }, { "epoch": 0.7713432835820896, "grad_norm": 0.8089700577841448, "learning_rate": 6.549704659652742e-07, "loss": 0.371, "step": 2584 }, { "epoch": 0.7716417910447761, "grad_norm": 0.8261177060625268, "learning_rate": 6.533401280964427e-07, "loss": 0.3337, "step": 2585 }, { "epoch": 0.7719402985074627, "grad_norm": 0.8072522990077107, "learning_rate": 6.517115168095792e-07, "loss": 0.3177, "step": 2586 }, { "epoch": 0.7722388059701493, "grad_norm": 0.8196927890724248, "learning_rate": 6.500846336273953e-07, "loss": 0.3239, "step": 2587 }, { "epoch": 0.7725373134328358, "grad_norm": 0.8447623565955257, "learning_rate": 6.484594800709873e-07, "loss": 0.3301, "step": 2588 }, { "epoch": 0.7728358208955224, "grad_norm": 0.8469818517147698, "learning_rate": 6.468360576598339e-07, "loss": 0.3407, "step": 2589 }, { "epoch": 0.7731343283582089, "grad_norm": 0.8200398390703326, "learning_rate": 6.452143679117965e-07, "loss": 0.3304, "step": 2590 }, { "epoch": 0.7734328358208955, "grad_norm": 0.8316560060664724, "learning_rate": 6.435944123431137e-07, "loss": 0.3667, "step": 2591 }, { "epoch": 0.7737313432835821, "grad_norm": 0.8479185212991167, "learning_rate": 6.419761924684056e-07, "loss": 0.3306, "step": 2592 }, { "epoch": 0.7740298507462686, "grad_norm": 0.7788855711723567, "learning_rate": 6.403597098006684e-07, "loss": 0.3336, "step": 2593 }, { "epoch": 0.7743283582089552, "grad_norm": 0.8580372589130707, "learning_rate": 6.387449658512735e-07, "loss": 0.3446, "step": 2594 }, { "epoch": 0.7746268656716417, "grad_norm": 0.8545734419513868, "learning_rate": 6.371319621299679e-07, "loss": 0.338, "step": 2595 }, { "epoch": 0.7749253731343284, "grad_norm": 1.3812292290362789, "learning_rate": 6.355207001448696e-07, "loss": 0.3346, "step": 2596 }, { "epoch": 0.775223880597015, "grad_norm": 0.853363224946842, "learning_rate": 6.339111814024701e-07, "loss": 0.391, "step": 2597 }, { "epoch": 0.7755223880597015, "grad_norm": 0.8265934162348483, "learning_rate": 6.323034074076298e-07, "loss": 0.3339, "step": 2598 }, { "epoch": 0.7758208955223881, "grad_norm": 0.9064944014108518, "learning_rate": 6.306973796635785e-07, "loss": 0.3589, "step": 2599 }, { "epoch": 0.7761194029850746, "grad_norm": 0.9677195208881169, "learning_rate": 6.29093099671913e-07, "loss": 0.317, "step": 2600 }, { "epoch": 0.7764179104477612, "grad_norm": 0.8311565173033396, "learning_rate": 6.274905689325966e-07, "loss": 0.3472, "step": 2601 }, { "epoch": 0.7767164179104478, "grad_norm": 0.7950764731124217, "learning_rate": 6.258897889439549e-07, "loss": 0.3109, "step": 2602 }, { "epoch": 0.7770149253731343, "grad_norm": 0.9141762608429268, "learning_rate": 6.242907612026792e-07, "loss": 0.3974, "step": 2603 }, { "epoch": 0.7773134328358209, "grad_norm": 0.8803598215750571, "learning_rate": 6.22693487203821e-07, "loss": 0.3331, "step": 2604 }, { "epoch": 0.7776119402985074, "grad_norm": 0.7668038317932448, "learning_rate": 6.210979684407931e-07, "loss": 0.3298, "step": 2605 }, { "epoch": 0.777910447761194, "grad_norm": 0.8448858692096748, "learning_rate": 6.195042064053672e-07, "loss": 0.3259, "step": 2606 }, { "epoch": 0.7782089552238806, "grad_norm": 0.8115377621130089, "learning_rate": 6.179122025876702e-07, "loss": 0.3155, "step": 2607 }, { "epoch": 0.7785074626865671, "grad_norm": 0.7350727849438435, "learning_rate": 6.163219584761879e-07, "loss": 0.3448, "step": 2608 }, { "epoch": 0.7788059701492537, "grad_norm": 0.803225857620502, "learning_rate": 6.147334755577597e-07, "loss": 0.3125, "step": 2609 }, { "epoch": 0.7791044776119403, "grad_norm": 0.7835583970542247, "learning_rate": 6.131467553175785e-07, "loss": 0.295, "step": 2610 }, { "epoch": 0.7794029850746269, "grad_norm": 0.802514614490111, "learning_rate": 6.11561799239189e-07, "loss": 0.3624, "step": 2611 }, { "epoch": 0.7797014925373135, "grad_norm": 0.8264140923298133, "learning_rate": 6.099786088044871e-07, "loss": 0.3453, "step": 2612 }, { "epoch": 0.78, "grad_norm": 0.7730123427638769, "learning_rate": 6.083971854937157e-07, "loss": 0.3126, "step": 2613 }, { "epoch": 0.7802985074626866, "grad_norm": 0.876817858400939, "learning_rate": 6.068175307854676e-07, "loss": 0.3374, "step": 2614 }, { "epoch": 0.7805970149253731, "grad_norm": 0.7780503068811441, "learning_rate": 6.052396461566823e-07, "loss": 0.402, "step": 2615 }, { "epoch": 0.7808955223880597, "grad_norm": 0.769230896426925, "learning_rate": 6.036635330826437e-07, "loss": 0.3172, "step": 2616 }, { "epoch": 0.7811940298507463, "grad_norm": 0.9137700677139416, "learning_rate": 6.02089193036978e-07, "loss": 0.3312, "step": 2617 }, { "epoch": 0.7814925373134328, "grad_norm": 0.7641819962198311, "learning_rate": 6.00516627491655e-07, "loss": 0.3646, "step": 2618 }, { "epoch": 0.7817910447761194, "grad_norm": 0.7985862077567278, "learning_rate": 5.989458379169852e-07, "loss": 0.3615, "step": 2619 }, { "epoch": 0.7820895522388059, "grad_norm": 0.8430048020986005, "learning_rate": 5.973768257816187e-07, "loss": 0.3587, "step": 2620 }, { "epoch": 0.7823880597014925, "grad_norm": 0.8377553518393402, "learning_rate": 5.958095925525437e-07, "loss": 0.2958, "step": 2621 }, { "epoch": 0.7826865671641791, "grad_norm": 0.822488397832201, "learning_rate": 5.942441396950857e-07, "loss": 0.3351, "step": 2622 }, { "epoch": 0.7829850746268656, "grad_norm": 0.8772777506321962, "learning_rate": 5.926804686729034e-07, "loss": 0.3915, "step": 2623 }, { "epoch": 0.7832835820895523, "grad_norm": 0.8598726121904382, "learning_rate": 5.911185809479919e-07, "loss": 0.2941, "step": 2624 }, { "epoch": 0.7835820895522388, "grad_norm": 0.767393624496139, "learning_rate": 5.895584779806782e-07, "loss": 0.3313, "step": 2625 }, { "epoch": 0.7838805970149254, "grad_norm": 0.8365941078767859, "learning_rate": 5.880001612296208e-07, "loss": 0.3248, "step": 2626 }, { "epoch": 0.784179104477612, "grad_norm": 0.8689097485635112, "learning_rate": 5.864436321518077e-07, "loss": 0.3254, "step": 2627 }, { "epoch": 0.7844776119402985, "grad_norm": 0.8501156388344782, "learning_rate": 5.848888922025553e-07, "loss": 0.356, "step": 2628 }, { "epoch": 0.7847761194029851, "grad_norm": 0.871278978235916, "learning_rate": 5.833359428355076e-07, "loss": 0.3179, "step": 2629 }, { "epoch": 0.7850746268656716, "grad_norm": 0.8117067102631867, "learning_rate": 5.817847855026346e-07, "loss": 0.3056, "step": 2630 }, { "epoch": 0.7853731343283582, "grad_norm": 0.7831623164451796, "learning_rate": 5.802354216542305e-07, "loss": 0.2978, "step": 2631 }, { "epoch": 0.7856716417910448, "grad_norm": 0.8648788615413797, "learning_rate": 5.78687852738913e-07, "loss": 0.3269, "step": 2632 }, { "epoch": 0.7859701492537313, "grad_norm": 1.2129702699211065, "learning_rate": 5.771420802036218e-07, "loss": 0.3086, "step": 2633 }, { "epoch": 0.7862686567164179, "grad_norm": 0.8425751147312559, "learning_rate": 5.755981054936152e-07, "loss": 0.3467, "step": 2634 }, { "epoch": 0.7865671641791044, "grad_norm": 0.8226182315548135, "learning_rate": 5.740559300524726e-07, "loss": 0.3224, "step": 2635 }, { "epoch": 0.786865671641791, "grad_norm": 0.7695238455194613, "learning_rate": 5.725155553220904e-07, "loss": 0.282, "step": 2636 }, { "epoch": 0.7871641791044776, "grad_norm": 0.8454114923108801, "learning_rate": 5.709769827426814e-07, "loss": 0.3547, "step": 2637 }, { "epoch": 0.7874626865671642, "grad_norm": 0.9164178244850171, "learning_rate": 5.694402137527735e-07, "loss": 0.3695, "step": 2638 }, { "epoch": 0.7877611940298508, "grad_norm": 0.9339118467727153, "learning_rate": 5.679052497892093e-07, "loss": 0.3797, "step": 2639 }, { "epoch": 0.7880597014925373, "grad_norm": 0.8732595479379647, "learning_rate": 5.663720922871408e-07, "loss": 0.3355, "step": 2640 }, { "epoch": 0.7883582089552239, "grad_norm": 0.7644896912195902, "learning_rate": 5.648407426800337e-07, "loss": 0.3427, "step": 2641 }, { "epoch": 0.7886567164179105, "grad_norm": 0.9237943784299637, "learning_rate": 5.633112023996626e-07, "loss": 0.3387, "step": 2642 }, { "epoch": 0.788955223880597, "grad_norm": 0.8362466946619596, "learning_rate": 5.617834728761104e-07, "loss": 0.329, "step": 2643 }, { "epoch": 0.7892537313432836, "grad_norm": 0.819307805738088, "learning_rate": 5.602575555377676e-07, "loss": 0.3064, "step": 2644 }, { "epoch": 0.7895522388059701, "grad_norm": 0.7447822449758292, "learning_rate": 5.587334518113285e-07, "loss": 0.3393, "step": 2645 }, { "epoch": 0.7898507462686567, "grad_norm": 0.8444167330983842, "learning_rate": 5.572111631217936e-07, "loss": 0.339, "step": 2646 }, { "epoch": 0.7901492537313433, "grad_norm": 0.8006438974002873, "learning_rate": 5.556906908924656e-07, "loss": 0.3163, "step": 2647 }, { "epoch": 0.7904477611940298, "grad_norm": 0.9751532556450391, "learning_rate": 5.541720365449488e-07, "loss": 0.3634, "step": 2648 }, { "epoch": 0.7907462686567164, "grad_norm": 0.8268096722848733, "learning_rate": 5.526552014991485e-07, "loss": 0.3187, "step": 2649 }, { "epoch": 0.7910447761194029, "grad_norm": 0.7212108804420516, "learning_rate": 5.511401871732685e-07, "loss": 0.3009, "step": 2650 }, { "epoch": 0.7913432835820896, "grad_norm": 0.8746502710892506, "learning_rate": 5.496269949838096e-07, "loss": 0.3687, "step": 2651 }, { "epoch": 0.7916417910447762, "grad_norm": 0.8435667329953185, "learning_rate": 5.481156263455692e-07, "loss": 0.3187, "step": 2652 }, { "epoch": 0.7919402985074627, "grad_norm": 0.7718206010241203, "learning_rate": 5.466060826716416e-07, "loss": 0.2981, "step": 2653 }, { "epoch": 0.7922388059701493, "grad_norm": 0.8420876178922836, "learning_rate": 5.450983653734135e-07, "loss": 0.3497, "step": 2654 }, { "epoch": 0.7925373134328358, "grad_norm": 0.7794372062492068, "learning_rate": 5.435924758605621e-07, "loss": 0.3162, "step": 2655 }, { "epoch": 0.7928358208955224, "grad_norm": 0.7218324952169056, "learning_rate": 5.420884155410583e-07, "loss": 0.2859, "step": 2656 }, { "epoch": 0.793134328358209, "grad_norm": 0.8935853893312067, "learning_rate": 5.405861858211617e-07, "loss": 0.3275, "step": 2657 }, { "epoch": 0.7934328358208955, "grad_norm": 0.8885132950616169, "learning_rate": 5.390857881054206e-07, "loss": 0.335, "step": 2658 }, { "epoch": 0.7937313432835821, "grad_norm": 0.7811013082337122, "learning_rate": 5.375872237966706e-07, "loss": 0.3193, "step": 2659 }, { "epoch": 0.7940298507462686, "grad_norm": 0.761806698796138, "learning_rate": 5.360904942960327e-07, "loss": 0.3307, "step": 2660 }, { "epoch": 0.7943283582089552, "grad_norm": 0.8600929269448834, "learning_rate": 5.345956010029118e-07, "loss": 0.3242, "step": 2661 }, { "epoch": 0.7946268656716418, "grad_norm": 0.8628730775985842, "learning_rate": 5.331025453149971e-07, "loss": 0.2922, "step": 2662 }, { "epoch": 0.7949253731343283, "grad_norm": 0.8558985245441267, "learning_rate": 5.316113286282595e-07, "loss": 0.3258, "step": 2663 }, { "epoch": 0.7952238805970149, "grad_norm": 0.8797671914656098, "learning_rate": 5.301219523369499e-07, "loss": 0.3694, "step": 2664 }, { "epoch": 0.7955223880597015, "grad_norm": 0.7806683157905394, "learning_rate": 5.286344178336e-07, "loss": 0.3144, "step": 2665 }, { "epoch": 0.7958208955223881, "grad_norm": 0.81264272258197, "learning_rate": 5.271487265090163e-07, "loss": 0.3667, "step": 2666 }, { "epoch": 0.7961194029850747, "grad_norm": 0.8601858762639831, "learning_rate": 5.256648797522851e-07, "loss": 0.337, "step": 2667 }, { "epoch": 0.7964179104477612, "grad_norm": 0.8271856093309297, "learning_rate": 5.241828789507669e-07, "loss": 0.2935, "step": 2668 }, { "epoch": 0.7967164179104478, "grad_norm": 0.859387934817203, "learning_rate": 5.227027254900963e-07, "loss": 0.3188, "step": 2669 }, { "epoch": 0.7970149253731343, "grad_norm": 0.9900633077884397, "learning_rate": 5.212244207541806e-07, "loss": 0.4112, "step": 2670 }, { "epoch": 0.7973134328358209, "grad_norm": 0.7598238168460656, "learning_rate": 5.197479661251994e-07, "loss": 0.2827, "step": 2671 }, { "epoch": 0.7976119402985075, "grad_norm": 0.8105463195253384, "learning_rate": 5.182733629836006e-07, "loss": 0.3, "step": 2672 }, { "epoch": 0.797910447761194, "grad_norm": 0.8470648410134773, "learning_rate": 5.168006127081027e-07, "loss": 0.348, "step": 2673 }, { "epoch": 0.7982089552238806, "grad_norm": 0.8543097630359251, "learning_rate": 5.153297166756913e-07, "loss": 0.3301, "step": 2674 }, { "epoch": 0.7985074626865671, "grad_norm": 0.9187030166832789, "learning_rate": 5.138606762616186e-07, "loss": 0.3212, "step": 2675 }, { "epoch": 0.7988059701492537, "grad_norm": 0.746950665181267, "learning_rate": 5.123934928394011e-07, "loss": 0.2924, "step": 2676 }, { "epoch": 0.7991044776119403, "grad_norm": 0.8206743880698498, "learning_rate": 5.109281677808207e-07, "loss": 0.3299, "step": 2677 }, { "epoch": 0.7994029850746268, "grad_norm": 0.8977645162297897, "learning_rate": 5.094647024559188e-07, "loss": 0.363, "step": 2678 }, { "epoch": 0.7997014925373135, "grad_norm": 0.7603505513127645, "learning_rate": 5.080030982330008e-07, "loss": 0.2685, "step": 2679 }, { "epoch": 0.8, "grad_norm": 0.7885960133919522, "learning_rate": 5.065433564786312e-07, "loss": 0.3079, "step": 2680 }, { "epoch": 0.8002985074626866, "grad_norm": 0.801692605365031, "learning_rate": 5.050854785576326e-07, "loss": 0.3456, "step": 2681 }, { "epoch": 0.8005970149253732, "grad_norm": 0.7935079560838534, "learning_rate": 5.036294658330862e-07, "loss": 0.359, "step": 2682 }, { "epoch": 0.8008955223880597, "grad_norm": 0.7511048696552357, "learning_rate": 5.021753196663268e-07, "loss": 0.3289, "step": 2683 }, { "epoch": 0.8011940298507463, "grad_norm": 0.9217091503137018, "learning_rate": 5.007230414169464e-07, "loss": 0.3606, "step": 2684 }, { "epoch": 0.8014925373134328, "grad_norm": 0.8424874150110574, "learning_rate": 4.992726324427901e-07, "loss": 0.3526, "step": 2685 }, { "epoch": 0.8017910447761194, "grad_norm": 0.7801243835807536, "learning_rate": 4.978240940999546e-07, "loss": 0.2924, "step": 2686 }, { "epoch": 0.802089552238806, "grad_norm": 0.7866205363691048, "learning_rate": 4.96377427742788e-07, "loss": 0.3286, "step": 2687 }, { "epoch": 0.8023880597014925, "grad_norm": 0.8260723183869202, "learning_rate": 4.949326347238887e-07, "loss": 0.3512, "step": 2688 }, { "epoch": 0.8026865671641791, "grad_norm": 0.9243904923211944, "learning_rate": 4.934897163941008e-07, "loss": 0.3652, "step": 2689 }, { "epoch": 0.8029850746268656, "grad_norm": 0.7325605543620152, "learning_rate": 4.920486741025202e-07, "loss": 0.3073, "step": 2690 }, { "epoch": 0.8032835820895522, "grad_norm": 0.825997047722358, "learning_rate": 4.906095091964855e-07, "loss": 0.3904, "step": 2691 }, { "epoch": 0.8035820895522388, "grad_norm": 0.9371944525084498, "learning_rate": 4.891722230215812e-07, "loss": 0.3535, "step": 2692 }, { "epoch": 0.8038805970149254, "grad_norm": 0.8203179911092189, "learning_rate": 4.87736816921634e-07, "loss": 0.3135, "step": 2693 }, { "epoch": 0.804179104477612, "grad_norm": 0.9856622184168011, "learning_rate": 4.863032922387137e-07, "loss": 0.3265, "step": 2694 }, { "epoch": 0.8044776119402985, "grad_norm": 0.8181896117890224, "learning_rate": 4.848716503131312e-07, "loss": 0.3039, "step": 2695 }, { "epoch": 0.8047761194029851, "grad_norm": 0.8035708164782546, "learning_rate": 4.834418924834372e-07, "loss": 0.3458, "step": 2696 }, { "epoch": 0.8050746268656717, "grad_norm": 0.8265207452128133, "learning_rate": 4.820140200864198e-07, "loss": 0.3108, "step": 2697 }, { "epoch": 0.8053731343283582, "grad_norm": 0.8263073899283685, "learning_rate": 4.805880344571057e-07, "loss": 0.336, "step": 2698 }, { "epoch": 0.8056716417910448, "grad_norm": 0.7679933299366023, "learning_rate": 4.791639369287557e-07, "loss": 0.3183, "step": 2699 }, { "epoch": 0.8059701492537313, "grad_norm": 0.8067035384863501, "learning_rate": 4.777417288328667e-07, "loss": 0.3481, "step": 2700 }, { "epoch": 0.8062686567164179, "grad_norm": 0.8326924635445278, "learning_rate": 4.763214114991685e-07, "loss": 0.3581, "step": 2701 }, { "epoch": 0.8065671641791045, "grad_norm": 0.8792635453313903, "learning_rate": 4.749029862556234e-07, "loss": 0.3514, "step": 2702 }, { "epoch": 0.806865671641791, "grad_norm": 0.8231309020118249, "learning_rate": 4.7348645442842486e-07, "loss": 0.3245, "step": 2703 }, { "epoch": 0.8071641791044776, "grad_norm": 0.8220357789415259, "learning_rate": 4.720718173419947e-07, "loss": 0.3068, "step": 2704 }, { "epoch": 0.8074626865671641, "grad_norm": 0.8172111167713195, "learning_rate": 4.706590763189842e-07, "loss": 0.311, "step": 2705 }, { "epoch": 0.8077611940298507, "grad_norm": 0.8197511984015036, "learning_rate": 4.692482326802722e-07, "loss": 0.3214, "step": 2706 }, { "epoch": 0.8080597014925374, "grad_norm": 0.795191617688341, "learning_rate": 4.678392877449631e-07, "loss": 0.343, "step": 2707 }, { "epoch": 0.8083582089552239, "grad_norm": 0.9261906113677095, "learning_rate": 4.6643224283038606e-07, "loss": 0.3497, "step": 2708 }, { "epoch": 0.8086567164179105, "grad_norm": 0.8073388835958137, "learning_rate": 4.6502709925209415e-07, "loss": 0.3566, "step": 2709 }, { "epoch": 0.808955223880597, "grad_norm": 0.8727196491973467, "learning_rate": 4.636238583238611e-07, "loss": 0.3442, "step": 2710 }, { "epoch": 0.8092537313432836, "grad_norm": 0.7474594969428882, "learning_rate": 4.622225213576842e-07, "loss": 0.3188, "step": 2711 }, { "epoch": 0.8095522388059702, "grad_norm": 0.8418571780348719, "learning_rate": 4.608230896637786e-07, "loss": 0.3451, "step": 2712 }, { "epoch": 0.8098507462686567, "grad_norm": 0.8706546788357333, "learning_rate": 4.5942556455057907e-07, "loss": 0.328, "step": 2713 }, { "epoch": 0.8101492537313433, "grad_norm": 0.82036774081707, "learning_rate": 4.580299473247385e-07, "loss": 0.3743, "step": 2714 }, { "epoch": 0.8104477611940298, "grad_norm": 0.8382687141631925, "learning_rate": 4.566362392911233e-07, "loss": 0.3347, "step": 2715 }, { "epoch": 0.8107462686567164, "grad_norm": 0.8501600750136287, "learning_rate": 4.5524444175281764e-07, "loss": 0.3122, "step": 2716 }, { "epoch": 0.811044776119403, "grad_norm": 0.7513505089871687, "learning_rate": 4.5385455601111764e-07, "loss": 0.2863, "step": 2717 }, { "epoch": 0.8113432835820895, "grad_norm": 0.8351608395297877, "learning_rate": 4.5246658336553285e-07, "loss": 0.3418, "step": 2718 }, { "epoch": 0.8116417910447761, "grad_norm": 0.8672168533410403, "learning_rate": 4.51080525113784e-07, "loss": 0.3373, "step": 2719 }, { "epoch": 0.8119402985074626, "grad_norm": 0.891251070972832, "learning_rate": 4.496963825518025e-07, "loss": 0.3924, "step": 2720 }, { "epoch": 0.8122388059701493, "grad_norm": 1.0096715094848157, "learning_rate": 4.483141569737262e-07, "loss": 0.3393, "step": 2721 }, { "epoch": 0.8125373134328359, "grad_norm": 0.7978505191567731, "learning_rate": 4.4693384967190336e-07, "loss": 0.3533, "step": 2722 }, { "epoch": 0.8128358208955224, "grad_norm": 0.8736364444396616, "learning_rate": 4.455554619368874e-07, "loss": 0.3452, "step": 2723 }, { "epoch": 0.813134328358209, "grad_norm": 0.868351671110689, "learning_rate": 4.441789950574374e-07, "loss": 0.3279, "step": 2724 }, { "epoch": 0.8134328358208955, "grad_norm": 0.8426500515372894, "learning_rate": 4.4280445032051644e-07, "loss": 0.3321, "step": 2725 }, { "epoch": 0.8137313432835821, "grad_norm": 0.921670497533751, "learning_rate": 4.4143182901129065e-07, "loss": 0.3849, "step": 2726 }, { "epoch": 0.8140298507462687, "grad_norm": 0.8427699059645096, "learning_rate": 4.40061132413126e-07, "loss": 0.3491, "step": 2727 }, { "epoch": 0.8143283582089552, "grad_norm": 1.0244827972367638, "learning_rate": 4.386923618075919e-07, "loss": 0.3091, "step": 2728 }, { "epoch": 0.8146268656716418, "grad_norm": 0.8797755605970998, "learning_rate": 4.3732551847445514e-07, "loss": 0.3726, "step": 2729 }, { "epoch": 0.8149253731343283, "grad_norm": 0.8300697363391679, "learning_rate": 4.3596060369168166e-07, "loss": 0.339, "step": 2730 }, { "epoch": 0.8152238805970149, "grad_norm": 0.8232225916495611, "learning_rate": 4.3459761873543173e-07, "loss": 0.3237, "step": 2731 }, { "epoch": 0.8155223880597015, "grad_norm": 0.7786052553153007, "learning_rate": 4.3323656488006433e-07, "loss": 0.3131, "step": 2732 }, { "epoch": 0.815820895522388, "grad_norm": 0.8470582237532246, "learning_rate": 4.3187744339813096e-07, "loss": 0.3337, "step": 2733 }, { "epoch": 0.8161194029850747, "grad_norm": 0.8893530166853234, "learning_rate": 4.30520255560378e-07, "loss": 0.3582, "step": 2734 }, { "epoch": 0.8164179104477612, "grad_norm": 0.8553834397028571, "learning_rate": 4.291650026357419e-07, "loss": 0.3852, "step": 2735 }, { "epoch": 0.8167164179104478, "grad_norm": 0.8952589728174519, "learning_rate": 4.278116858913525e-07, "loss": 0.401, "step": 2736 }, { "epoch": 0.8170149253731344, "grad_norm": 0.7976255563281726, "learning_rate": 4.2646030659252657e-07, "loss": 0.326, "step": 2737 }, { "epoch": 0.8173134328358209, "grad_norm": 0.9629301647838667, "learning_rate": 4.2511086600277143e-07, "loss": 0.3878, "step": 2738 }, { "epoch": 0.8176119402985075, "grad_norm": 0.8360663192438232, "learning_rate": 4.2376336538378084e-07, "loss": 0.3345, "step": 2739 }, { "epoch": 0.817910447761194, "grad_norm": 0.8380674045458814, "learning_rate": 4.2241780599543563e-07, "loss": 0.3498, "step": 2740 }, { "epoch": 0.8182089552238806, "grad_norm": 0.8738374475513894, "learning_rate": 4.210741890958009e-07, "loss": 0.3488, "step": 2741 }, { "epoch": 0.8185074626865672, "grad_norm": 1.27003433660867, "learning_rate": 4.19732515941125e-07, "loss": 0.3179, "step": 2742 }, { "epoch": 0.8188059701492537, "grad_norm": 0.744478080565413, "learning_rate": 4.1839278778584014e-07, "loss": 0.3175, "step": 2743 }, { "epoch": 0.8191044776119403, "grad_norm": 0.7803913692575267, "learning_rate": 4.1705500588255956e-07, "loss": 0.2942, "step": 2744 }, { "epoch": 0.8194029850746268, "grad_norm": 0.7539919885280165, "learning_rate": 4.157191714820766e-07, "loss": 0.2942, "step": 2745 }, { "epoch": 0.8197014925373134, "grad_norm": 0.8191488598187618, "learning_rate": 4.1438528583336384e-07, "loss": 0.3379, "step": 2746 }, { "epoch": 0.82, "grad_norm": 0.8663924389620965, "learning_rate": 4.1305335018357235e-07, "loss": 0.3193, "step": 2747 }, { "epoch": 0.8202985074626866, "grad_norm": 0.8843844251532348, "learning_rate": 4.117233657780287e-07, "loss": 0.3684, "step": 2748 }, { "epoch": 0.8205970149253732, "grad_norm": 0.7260699768191158, "learning_rate": 4.1039533386023595e-07, "loss": 0.2681, "step": 2749 }, { "epoch": 0.8208955223880597, "grad_norm": 0.8255147907973385, "learning_rate": 4.09069255671872e-07, "loss": 0.3105, "step": 2750 }, { "epoch": 0.8211940298507463, "grad_norm": 0.7640188918178487, "learning_rate": 4.077451324527873e-07, "loss": 0.2879, "step": 2751 }, { "epoch": 0.8214925373134329, "grad_norm": 0.8228944124806514, "learning_rate": 4.06422965441006e-07, "loss": 0.3215, "step": 2752 }, { "epoch": 0.8217910447761194, "grad_norm": 0.8372918580266345, "learning_rate": 4.0510275587271995e-07, "loss": 0.3465, "step": 2753 }, { "epoch": 0.822089552238806, "grad_norm": 0.7687323322051585, "learning_rate": 4.0378450498229417e-07, "loss": 0.3526, "step": 2754 }, { "epoch": 0.8223880597014925, "grad_norm": 0.7942261241475315, "learning_rate": 4.0246821400226095e-07, "loss": 0.3391, "step": 2755 }, { "epoch": 0.8226865671641791, "grad_norm": 0.9083289569523623, "learning_rate": 4.0115388416332e-07, "loss": 0.3058, "step": 2756 }, { "epoch": 0.8229850746268657, "grad_norm": 0.7389692614308948, "learning_rate": 3.998415166943381e-07, "loss": 0.2869, "step": 2757 }, { "epoch": 0.8232835820895522, "grad_norm": 0.7891964084273392, "learning_rate": 3.9853111282234736e-07, "loss": 0.3293, "step": 2758 }, { "epoch": 0.8235820895522388, "grad_norm": 0.8794771704418654, "learning_rate": 3.972226737725421e-07, "loss": 0.3271, "step": 2759 }, { "epoch": 0.8238805970149253, "grad_norm": 0.7902448566807511, "learning_rate": 3.9591620076828127e-07, "loss": 0.2814, "step": 2760 }, { "epoch": 0.8241791044776119, "grad_norm": 0.7750307089786643, "learning_rate": 3.94611695031086e-07, "loss": 0.3438, "step": 2761 }, { "epoch": 0.8244776119402986, "grad_norm": 0.9123133939275277, "learning_rate": 3.9330915778063666e-07, "loss": 0.3011, "step": 2762 }, { "epoch": 0.8247761194029851, "grad_norm": 0.8857984121410764, "learning_rate": 3.920085902347745e-07, "loss": 0.3484, "step": 2763 }, { "epoch": 0.8250746268656717, "grad_norm": 0.849801189339493, "learning_rate": 3.9070999360949824e-07, "loss": 0.3515, "step": 2764 }, { "epoch": 0.8253731343283582, "grad_norm": 0.8945130952730969, "learning_rate": 3.8941336911896306e-07, "loss": 0.372, "step": 2765 }, { "epoch": 0.8256716417910448, "grad_norm": 0.8619546550134426, "learning_rate": 3.881187179754828e-07, "loss": 0.3446, "step": 2766 }, { "epoch": 0.8259701492537314, "grad_norm": 0.910387966103673, "learning_rate": 3.868260413895239e-07, "loss": 0.3719, "step": 2767 }, { "epoch": 0.8262686567164179, "grad_norm": 0.7564349469839857, "learning_rate": 3.855353405697082e-07, "loss": 0.337, "step": 2768 }, { "epoch": 0.8265671641791045, "grad_norm": 0.8389296921319255, "learning_rate": 3.842466167228082e-07, "loss": 0.3324, "step": 2769 }, { "epoch": 0.826865671641791, "grad_norm": 0.7638208844437337, "learning_rate": 3.829598710537502e-07, "loss": 0.2851, "step": 2770 }, { "epoch": 0.8271641791044776, "grad_norm": 0.7857817130843364, "learning_rate": 3.816751047656098e-07, "loss": 0.33, "step": 2771 }, { "epoch": 0.8274626865671642, "grad_norm": 0.8418912981277041, "learning_rate": 3.8039231905961204e-07, "loss": 0.2974, "step": 2772 }, { "epoch": 0.8277611940298507, "grad_norm": 0.9404182488278726, "learning_rate": 3.791115151351313e-07, "loss": 0.3785, "step": 2773 }, { "epoch": 0.8280597014925373, "grad_norm": 0.8318189034227368, "learning_rate": 3.778326941896862e-07, "loss": 0.353, "step": 2774 }, { "epoch": 0.8283582089552238, "grad_norm": 0.8300336051160621, "learning_rate": 3.7655585741894454e-07, "loss": 0.3494, "step": 2775 }, { "epoch": 0.8286567164179105, "grad_norm": 0.7380131940769754, "learning_rate": 3.7528100601671694e-07, "loss": 0.2907, "step": 2776 }, { "epoch": 0.8289552238805971, "grad_norm": 0.792304457008959, "learning_rate": 3.740081411749588e-07, "loss": 0.3718, "step": 2777 }, { "epoch": 0.8292537313432836, "grad_norm": 0.8319630537903212, "learning_rate": 3.7273726408376734e-07, "loss": 0.3514, "step": 2778 }, { "epoch": 0.8295522388059702, "grad_norm": 0.8174896078993995, "learning_rate": 3.714683759313825e-07, "loss": 0.3168, "step": 2779 }, { "epoch": 0.8298507462686567, "grad_norm": 0.8243230824612728, "learning_rate": 3.7020147790418266e-07, "loss": 0.3156, "step": 2780 }, { "epoch": 0.8301492537313433, "grad_norm": 0.8235097405737742, "learning_rate": 3.689365711866869e-07, "loss": 0.3215, "step": 2781 }, { "epoch": 0.8304477611940299, "grad_norm": 0.972966084001807, "learning_rate": 3.676736569615524e-07, "loss": 0.3932, "step": 2782 }, { "epoch": 0.8307462686567164, "grad_norm": 0.8685667990613507, "learning_rate": 3.664127364095732e-07, "loss": 0.3553, "step": 2783 }, { "epoch": 0.831044776119403, "grad_norm": 0.9074240672477265, "learning_rate": 3.6515381070967916e-07, "loss": 0.3711, "step": 2784 }, { "epoch": 0.8313432835820895, "grad_norm": 0.9317748916806734, "learning_rate": 3.6389688103893565e-07, "loss": 0.3269, "step": 2785 }, { "epoch": 0.8316417910447761, "grad_norm": 0.7730908407067022, "learning_rate": 3.626419485725402e-07, "loss": 0.291, "step": 2786 }, { "epoch": 0.8319402985074626, "grad_norm": 0.8220545588467743, "learning_rate": 3.6138901448382475e-07, "loss": 0.2818, "step": 2787 }, { "epoch": 0.8322388059701492, "grad_norm": 0.7753288586198012, "learning_rate": 3.601380799442519e-07, "loss": 0.3117, "step": 2788 }, { "epoch": 0.8325373134328358, "grad_norm": 0.7319571164319162, "learning_rate": 3.5888914612341506e-07, "loss": 0.2653, "step": 2789 }, { "epoch": 0.8328358208955224, "grad_norm": 0.9086402808294651, "learning_rate": 3.576422141890376e-07, "loss": 0.3587, "step": 2790 }, { "epoch": 0.833134328358209, "grad_norm": 0.9224261714666895, "learning_rate": 3.5639728530696944e-07, "loss": 0.3608, "step": 2791 }, { "epoch": 0.8334328358208956, "grad_norm": 0.8510496483807699, "learning_rate": 3.551543606411889e-07, "loss": 0.3436, "step": 2792 }, { "epoch": 0.8337313432835821, "grad_norm": 0.8921671526472281, "learning_rate": 3.5391344135380065e-07, "loss": 0.332, "step": 2793 }, { "epoch": 0.8340298507462687, "grad_norm": 0.8000190887650788, "learning_rate": 3.526745286050334e-07, "loss": 0.3302, "step": 2794 }, { "epoch": 0.8343283582089552, "grad_norm": 0.7752036186758762, "learning_rate": 3.514376235532413e-07, "loss": 0.3355, "step": 2795 }, { "epoch": 0.8346268656716418, "grad_norm": 0.789049365859591, "learning_rate": 3.5020272735490023e-07, "loss": 0.3275, "step": 2796 }, { "epoch": 0.8349253731343284, "grad_norm": 0.827050594275851, "learning_rate": 3.4896984116460697e-07, "loss": 0.3277, "step": 2797 }, { "epoch": 0.8352238805970149, "grad_norm": 0.9597439299150683, "learning_rate": 3.477389661350811e-07, "loss": 0.3553, "step": 2798 }, { "epoch": 0.8355223880597015, "grad_norm": 0.8061366601518952, "learning_rate": 3.465101034171603e-07, "loss": 0.3236, "step": 2799 }, { "epoch": 0.835820895522388, "grad_norm": 0.8294064364634925, "learning_rate": 3.452832541598014e-07, "loss": 0.3105, "step": 2800 }, { "epoch": 0.8361194029850746, "grad_norm": 0.7827890707493881, "learning_rate": 3.4405841951007907e-07, "loss": 0.3295, "step": 2801 }, { "epoch": 0.8364179104477611, "grad_norm": 0.8274777597695597, "learning_rate": 3.4283560061318276e-07, "loss": 0.3279, "step": 2802 }, { "epoch": 0.8367164179104477, "grad_norm": 0.8368484624668843, "learning_rate": 3.416147986124185e-07, "loss": 0.3126, "step": 2803 }, { "epoch": 0.8370149253731344, "grad_norm": 0.8310718332591835, "learning_rate": 3.403960146492072e-07, "loss": 0.307, "step": 2804 }, { "epoch": 0.8373134328358209, "grad_norm": 1.1731444253779697, "learning_rate": 3.391792498630819e-07, "loss": 0.3181, "step": 2805 }, { "epoch": 0.8376119402985075, "grad_norm": 0.8303714814188278, "learning_rate": 3.3796450539168855e-07, "loss": 0.3548, "step": 2806 }, { "epoch": 0.837910447761194, "grad_norm": 0.8551149029659069, "learning_rate": 3.367517823707822e-07, "loss": 0.3245, "step": 2807 }, { "epoch": 0.8382089552238806, "grad_norm": 0.8090639602963838, "learning_rate": 3.355410819342303e-07, "loss": 0.3548, "step": 2808 }, { "epoch": 0.8385074626865672, "grad_norm": 0.8944815883645018, "learning_rate": 3.343324052140079e-07, "loss": 0.3413, "step": 2809 }, { "epoch": 0.8388059701492537, "grad_norm": 0.832569540017415, "learning_rate": 3.331257533401985e-07, "loss": 0.3283, "step": 2810 }, { "epoch": 0.8391044776119403, "grad_norm": 0.9058511577320896, "learning_rate": 3.3192112744099255e-07, "loss": 0.3237, "step": 2811 }, { "epoch": 0.8394029850746269, "grad_norm": 0.8466505427268364, "learning_rate": 3.3071852864268476e-07, "loss": 0.3318, "step": 2812 }, { "epoch": 0.8397014925373134, "grad_norm": 0.8482105495452263, "learning_rate": 3.2951795806967667e-07, "loss": 0.3832, "step": 2813 }, { "epoch": 0.84, "grad_norm": 0.760237333698982, "learning_rate": 3.28319416844472e-07, "loss": 0.3267, "step": 2814 }, { "epoch": 0.8402985074626865, "grad_norm": 0.907957579652845, "learning_rate": 3.2712290608767796e-07, "loss": 0.353, "step": 2815 }, { "epoch": 0.8405970149253731, "grad_norm": 0.8309270569530571, "learning_rate": 3.259284269180027e-07, "loss": 0.3212, "step": 2816 }, { "epoch": 0.8408955223880596, "grad_norm": 0.8751671504075764, "learning_rate": 3.247359804522557e-07, "loss": 0.3945, "step": 2817 }, { "epoch": 0.8411940298507463, "grad_norm": 0.7771991272718066, "learning_rate": 3.2354556780534423e-07, "loss": 0.3495, "step": 2818 }, { "epoch": 0.8414925373134329, "grad_norm": 0.7300330257017509, "learning_rate": 3.223571900902758e-07, "loss": 0.2848, "step": 2819 }, { "epoch": 0.8417910447761194, "grad_norm": 0.7636406065115092, "learning_rate": 3.2117084841815445e-07, "loss": 0.3463, "step": 2820 }, { "epoch": 0.842089552238806, "grad_norm": 0.7679199725342977, "learning_rate": 3.199865438981808e-07, "loss": 0.3386, "step": 2821 }, { "epoch": 0.8423880597014926, "grad_norm": 0.8654021210207075, "learning_rate": 3.18804277637651e-07, "loss": 0.374, "step": 2822 }, { "epoch": 0.8426865671641791, "grad_norm": 0.8588936766950285, "learning_rate": 3.1762405074195505e-07, "loss": 0.2811, "step": 2823 }, { "epoch": 0.8429850746268657, "grad_norm": 0.8219003046328764, "learning_rate": 3.164458643145757e-07, "loss": 0.3322, "step": 2824 }, { "epoch": 0.8432835820895522, "grad_norm": 0.8476446887601962, "learning_rate": 3.152697194570892e-07, "loss": 0.3511, "step": 2825 }, { "epoch": 0.8435820895522388, "grad_norm": 0.8506563729141355, "learning_rate": 3.14095617269162e-07, "loss": 0.3435, "step": 2826 }, { "epoch": 0.8438805970149253, "grad_norm": 0.8442762907020646, "learning_rate": 3.12923558848551e-07, "loss": 0.2946, "step": 2827 }, { "epoch": 0.8441791044776119, "grad_norm": 0.9559566043579282, "learning_rate": 3.11753545291103e-07, "loss": 0.3085, "step": 2828 }, { "epoch": 0.8444776119402985, "grad_norm": 0.8597722096339706, "learning_rate": 3.1058557769075127e-07, "loss": 0.3444, "step": 2829 }, { "epoch": 0.844776119402985, "grad_norm": 0.7840152793786416, "learning_rate": 3.0941965713951723e-07, "loss": 0.3346, "step": 2830 }, { "epoch": 0.8450746268656717, "grad_norm": 0.7867462780601826, "learning_rate": 3.0825578472750806e-07, "loss": 0.3301, "step": 2831 }, { "epoch": 0.8453731343283583, "grad_norm": 1.183250521423748, "learning_rate": 3.070939615429167e-07, "loss": 0.3585, "step": 2832 }, { "epoch": 0.8456716417910448, "grad_norm": 0.8624837995021237, "learning_rate": 3.0593418867201877e-07, "loss": 0.3036, "step": 2833 }, { "epoch": 0.8459701492537314, "grad_norm": 0.7736676319547169, "learning_rate": 3.047764671991749e-07, "loss": 0.3162, "step": 2834 }, { "epoch": 0.8462686567164179, "grad_norm": 0.8290268285441122, "learning_rate": 3.0362079820682485e-07, "loss": 0.3256, "step": 2835 }, { "epoch": 0.8465671641791045, "grad_norm": 0.7956305622703035, "learning_rate": 3.0246718277549157e-07, "loss": 0.2927, "step": 2836 }, { "epoch": 0.846865671641791, "grad_norm": 0.9449677011825268, "learning_rate": 3.0131562198377763e-07, "loss": 0.3221, "step": 2837 }, { "epoch": 0.8471641791044776, "grad_norm": 0.8544674441401762, "learning_rate": 3.001661169083639e-07, "loss": 0.3511, "step": 2838 }, { "epoch": 0.8474626865671642, "grad_norm": 0.8378591210472813, "learning_rate": 2.990186686240104e-07, "loss": 0.3299, "step": 2839 }, { "epoch": 0.8477611940298507, "grad_norm": 0.7415214017957397, "learning_rate": 2.978732782035518e-07, "loss": 0.335, "step": 2840 }, { "epoch": 0.8480597014925373, "grad_norm": 0.9365276617601792, "learning_rate": 2.967299467179019e-07, "loss": 0.3189, "step": 2841 }, { "epoch": 0.8483582089552238, "grad_norm": 0.803685125509278, "learning_rate": 2.955886752360468e-07, "loss": 0.3054, "step": 2842 }, { "epoch": 0.8486567164179104, "grad_norm": 0.8054257795996826, "learning_rate": 2.944494648250476e-07, "loss": 0.3736, "step": 2843 }, { "epoch": 0.848955223880597, "grad_norm": 0.8427531576716745, "learning_rate": 2.93312316550039e-07, "loss": 0.3018, "step": 2844 }, { "epoch": 0.8492537313432836, "grad_norm": 0.7542191685088523, "learning_rate": 2.9217723147422603e-07, "loss": 0.3425, "step": 2845 }, { "epoch": 0.8495522388059702, "grad_norm": 0.8846788028740314, "learning_rate": 2.9104421065888566e-07, "loss": 0.3495, "step": 2846 }, { "epoch": 0.8498507462686568, "grad_norm": 0.7448582027758741, "learning_rate": 2.8991325516336516e-07, "loss": 0.2709, "step": 2847 }, { "epoch": 0.8501492537313433, "grad_norm": 0.9689479036479011, "learning_rate": 2.887843660450798e-07, "loss": 0.3394, "step": 2848 }, { "epoch": 0.8504477611940299, "grad_norm": 0.9730142586691932, "learning_rate": 2.8765754435951446e-07, "loss": 0.3772, "step": 2849 }, { "epoch": 0.8507462686567164, "grad_norm": 0.8711119065481331, "learning_rate": 2.865327911602189e-07, "loss": 0.3281, "step": 2850 }, { "epoch": 0.851044776119403, "grad_norm": 0.8353842456994517, "learning_rate": 2.8541010749881e-07, "loss": 0.3442, "step": 2851 }, { "epoch": 0.8513432835820895, "grad_norm": 0.8230749752409625, "learning_rate": 2.8428949442496996e-07, "loss": 0.328, "step": 2852 }, { "epoch": 0.8516417910447761, "grad_norm": 0.8209618759870918, "learning_rate": 2.831709529864446e-07, "loss": 0.3234, "step": 2853 }, { "epoch": 0.8519402985074627, "grad_norm": 0.8756915629518247, "learning_rate": 2.820544842290429e-07, "loss": 0.3458, "step": 2854 }, { "epoch": 0.8522388059701492, "grad_norm": 0.8506109532491893, "learning_rate": 2.809400891966363e-07, "loss": 0.3495, "step": 2855 }, { "epoch": 0.8525373134328358, "grad_norm": 0.7609796908968595, "learning_rate": 2.798277689311563e-07, "loss": 0.3166, "step": 2856 }, { "epoch": 0.8528358208955223, "grad_norm": 0.9292199709147685, "learning_rate": 2.7871752447259564e-07, "loss": 0.3637, "step": 2857 }, { "epoch": 0.8531343283582089, "grad_norm": 0.8140854628051475, "learning_rate": 2.7760935685900576e-07, "loss": 0.3422, "step": 2858 }, { "epoch": 0.8534328358208956, "grad_norm": 0.8210792005498305, "learning_rate": 2.765032671264961e-07, "loss": 0.2837, "step": 2859 }, { "epoch": 0.8537313432835821, "grad_norm": 0.7602160506021929, "learning_rate": 2.7539925630923473e-07, "loss": 0.3127, "step": 2860 }, { "epoch": 0.8540298507462687, "grad_norm": 0.8787744611254319, "learning_rate": 2.7429732543944323e-07, "loss": 0.3677, "step": 2861 }, { "epoch": 0.8543283582089553, "grad_norm": 0.7774552964487964, "learning_rate": 2.7319747554740096e-07, "loss": 0.3148, "step": 2862 }, { "epoch": 0.8546268656716418, "grad_norm": 0.8133612375263352, "learning_rate": 2.720997076614407e-07, "loss": 0.301, "step": 2863 }, { "epoch": 0.8549253731343284, "grad_norm": 0.8837935762329368, "learning_rate": 2.710040228079486e-07, "loss": 0.3679, "step": 2864 }, { "epoch": 0.8552238805970149, "grad_norm": 0.8348061759356361, "learning_rate": 2.6991042201136327e-07, "loss": 0.351, "step": 2865 }, { "epoch": 0.8555223880597015, "grad_norm": 0.9256883374968553, "learning_rate": 2.688189062941754e-07, "loss": 0.3454, "step": 2866 }, { "epoch": 0.855820895522388, "grad_norm": 0.8766267825494243, "learning_rate": 2.677294766769245e-07, "loss": 0.3868, "step": 2867 }, { "epoch": 0.8561194029850746, "grad_norm": 0.7835550371050818, "learning_rate": 2.6664213417820104e-07, "loss": 0.299, "step": 2868 }, { "epoch": 0.8564179104477612, "grad_norm": 0.758646833090557, "learning_rate": 2.655568798146443e-07, "loss": 0.3072, "step": 2869 }, { "epoch": 0.8567164179104477, "grad_norm": 0.7818620602758218, "learning_rate": 2.644737146009402e-07, "loss": 0.3201, "step": 2870 }, { "epoch": 0.8570149253731343, "grad_norm": 0.7465671035572722, "learning_rate": 2.633926395498218e-07, "loss": 0.3185, "step": 2871 }, { "epoch": 0.8573134328358208, "grad_norm": 0.8201353059035935, "learning_rate": 2.6231365567206844e-07, "loss": 0.3589, "step": 2872 }, { "epoch": 0.8576119402985075, "grad_norm": 0.8180993739540929, "learning_rate": 2.6123676397650314e-07, "loss": 0.3187, "step": 2873 }, { "epoch": 0.8579104477611941, "grad_norm": 0.8056216721587721, "learning_rate": 2.601619654699933e-07, "loss": 0.3326, "step": 2874 }, { "epoch": 0.8582089552238806, "grad_norm": 0.786235331989252, "learning_rate": 2.5908926115744997e-07, "loss": 0.2917, "step": 2875 }, { "epoch": 0.8585074626865672, "grad_norm": 0.7791737700058262, "learning_rate": 2.5801865204182486e-07, "loss": 0.3158, "step": 2876 }, { "epoch": 0.8588059701492538, "grad_norm": 0.8728995256661342, "learning_rate": 2.569501391241122e-07, "loss": 0.387, "step": 2877 }, { "epoch": 0.8591044776119403, "grad_norm": 0.76411187451701, "learning_rate": 2.5588372340334427e-07, "loss": 0.3338, "step": 2878 }, { "epoch": 0.8594029850746269, "grad_norm": 0.7959879543698507, "learning_rate": 2.548194058765949e-07, "loss": 0.3169, "step": 2879 }, { "epoch": 0.8597014925373134, "grad_norm": 0.7821916530349274, "learning_rate": 2.5375718753897493e-07, "loss": 0.3115, "step": 2880 }, { "epoch": 0.86, "grad_norm": 0.910323877482503, "learning_rate": 2.5269706938363196e-07, "loss": 0.3825, "step": 2881 }, { "epoch": 0.8602985074626865, "grad_norm": 0.8333574082581855, "learning_rate": 2.5163905240175175e-07, "loss": 0.3386, "step": 2882 }, { "epoch": 0.8605970149253731, "grad_norm": 1.4726326278298185, "learning_rate": 2.505831375825532e-07, "loss": 0.3895, "step": 2883 }, { "epoch": 0.8608955223880597, "grad_norm": 0.7646029930002108, "learning_rate": 2.495293259132914e-07, "loss": 0.3438, "step": 2884 }, { "epoch": 0.8611940298507462, "grad_norm": 0.917822050579998, "learning_rate": 2.484776183792545e-07, "loss": 0.4223, "step": 2885 }, { "epoch": 0.8614925373134328, "grad_norm": 0.9260960233947864, "learning_rate": 2.474280159637635e-07, "loss": 0.34, "step": 2886 }, { "epoch": 0.8617910447761195, "grad_norm": 0.843848719502158, "learning_rate": 2.463805196481714e-07, "loss": 0.3272, "step": 2887 }, { "epoch": 0.862089552238806, "grad_norm": 0.8251578240160266, "learning_rate": 2.453351304118609e-07, "loss": 0.3677, "step": 2888 }, { "epoch": 0.8623880597014926, "grad_norm": 0.7232437460080843, "learning_rate": 2.442918492322463e-07, "loss": 0.3187, "step": 2889 }, { "epoch": 0.8626865671641791, "grad_norm": 0.7702961173547361, "learning_rate": 2.4325067708476924e-07, "loss": 0.3192, "step": 2890 }, { "epoch": 0.8629850746268657, "grad_norm": 0.8797092318207074, "learning_rate": 2.42211614942901e-07, "loss": 0.3281, "step": 2891 }, { "epoch": 0.8632835820895522, "grad_norm": 0.7723014760959183, "learning_rate": 2.4117466377813927e-07, "loss": 0.3015, "step": 2892 }, { "epoch": 0.8635820895522388, "grad_norm": 0.770812872138971, "learning_rate": 2.4013982456000813e-07, "loss": 0.3174, "step": 2893 }, { "epoch": 0.8638805970149254, "grad_norm": 0.8891261100131612, "learning_rate": 2.3910709825605645e-07, "loss": 0.3458, "step": 2894 }, { "epoch": 0.8641791044776119, "grad_norm": 0.8361856897100667, "learning_rate": 2.380764858318585e-07, "loss": 0.3498, "step": 2895 }, { "epoch": 0.8644776119402985, "grad_norm": 0.9085874630351144, "learning_rate": 2.370479882510121e-07, "loss": 0.3246, "step": 2896 }, { "epoch": 0.864776119402985, "grad_norm": 0.8197815633910296, "learning_rate": 2.3602160647513693e-07, "loss": 0.3716, "step": 2897 }, { "epoch": 0.8650746268656716, "grad_norm": 0.8746527165561627, "learning_rate": 2.3499734146387565e-07, "loss": 0.3132, "step": 2898 }, { "epoch": 0.8653731343283582, "grad_norm": 0.8083218694058425, "learning_rate": 2.3397519417489022e-07, "loss": 0.3384, "step": 2899 }, { "epoch": 0.8656716417910447, "grad_norm": 0.8505934710512888, "learning_rate": 2.3295516556386372e-07, "loss": 0.3654, "step": 2900 }, { "epoch": 0.8659701492537314, "grad_norm": 0.8163302330969157, "learning_rate": 2.3193725658449794e-07, "loss": 0.3231, "step": 2901 }, { "epoch": 0.866268656716418, "grad_norm": 0.9049008261636325, "learning_rate": 2.3092146818851357e-07, "loss": 0.3785, "step": 2902 }, { "epoch": 0.8665671641791045, "grad_norm": 0.8815041271123635, "learning_rate": 2.2990780132564729e-07, "loss": 0.3432, "step": 2903 }, { "epoch": 0.8668656716417911, "grad_norm": 0.8388624020002282, "learning_rate": 2.288962569436537e-07, "loss": 0.3187, "step": 2904 }, { "epoch": 0.8671641791044776, "grad_norm": 0.9394777411904285, "learning_rate": 2.2788683598830101e-07, "loss": 0.3456, "step": 2905 }, { "epoch": 0.8674626865671642, "grad_norm": 0.8140678456634438, "learning_rate": 2.2687953940337403e-07, "loss": 0.3278, "step": 2906 }, { "epoch": 0.8677611940298507, "grad_norm": 0.8236736666005042, "learning_rate": 2.258743681306702e-07, "loss": 0.3195, "step": 2907 }, { "epoch": 0.8680597014925373, "grad_norm": 0.8327417961490085, "learning_rate": 2.2487132311000055e-07, "loss": 0.3545, "step": 2908 }, { "epoch": 0.8683582089552239, "grad_norm": 0.8708133440716065, "learning_rate": 2.2387040527918708e-07, "loss": 0.3167, "step": 2909 }, { "epoch": 0.8686567164179104, "grad_norm": 0.812026737884432, "learning_rate": 2.2287161557406455e-07, "loss": 0.2901, "step": 2910 }, { "epoch": 0.868955223880597, "grad_norm": 0.7460255803585844, "learning_rate": 2.21874954928476e-07, "loss": 0.2604, "step": 2911 }, { "epoch": 0.8692537313432835, "grad_norm": 0.8937718738194345, "learning_rate": 2.2088042427427515e-07, "loss": 0.361, "step": 2912 }, { "epoch": 0.8695522388059701, "grad_norm": 0.8470926320063893, "learning_rate": 2.19888024541324e-07, "loss": 0.3481, "step": 2913 }, { "epoch": 0.8698507462686568, "grad_norm": 0.7648679073540151, "learning_rate": 2.188977566574921e-07, "loss": 0.2797, "step": 2914 }, { "epoch": 0.8701492537313433, "grad_norm": 0.7915971704662402, "learning_rate": 2.179096215486562e-07, "loss": 0.3656, "step": 2915 }, { "epoch": 0.8704477611940299, "grad_norm": 0.7609586429057615, "learning_rate": 2.1692362013869705e-07, "loss": 0.3232, "step": 2916 }, { "epoch": 0.8707462686567164, "grad_norm": 0.9187944488879568, "learning_rate": 2.1593975334950363e-07, "loss": 0.3782, "step": 2917 }, { "epoch": 0.871044776119403, "grad_norm": 0.8742675214436783, "learning_rate": 2.14958022100967e-07, "loss": 0.35, "step": 2918 }, { "epoch": 0.8713432835820896, "grad_norm": 0.773745759889458, "learning_rate": 2.139784273109813e-07, "loss": 0.3365, "step": 2919 }, { "epoch": 0.8716417910447761, "grad_norm": 0.9230337430192534, "learning_rate": 2.1300096989544494e-07, "loss": 0.3368, "step": 2920 }, { "epoch": 0.8719402985074627, "grad_norm": 0.8006731655410944, "learning_rate": 2.1202565076825554e-07, "loss": 0.3213, "step": 2921 }, { "epoch": 0.8722388059701492, "grad_norm": 0.7820026265974506, "learning_rate": 2.1105247084131308e-07, "loss": 0.3169, "step": 2922 }, { "epoch": 0.8725373134328358, "grad_norm": 0.7990682212516456, "learning_rate": 2.100814310245175e-07, "loss": 0.3447, "step": 2923 }, { "epoch": 0.8728358208955224, "grad_norm": 0.793417044723113, "learning_rate": 2.0911253222576673e-07, "loss": 0.3729, "step": 2924 }, { "epoch": 0.8731343283582089, "grad_norm": 0.8157859780494326, "learning_rate": 2.081457753509586e-07, "loss": 0.2663, "step": 2925 }, { "epoch": 0.8734328358208955, "grad_norm": 0.7620840057646765, "learning_rate": 2.0718116130398592e-07, "loss": 0.3219, "step": 2926 }, { "epoch": 0.873731343283582, "grad_norm": 0.8189546759348862, "learning_rate": 2.0621869098673974e-07, "loss": 0.3365, "step": 2927 }, { "epoch": 0.8740298507462687, "grad_norm": 0.8239722079023885, "learning_rate": 2.0525836529910665e-07, "loss": 0.3365, "step": 2928 }, { "epoch": 0.8743283582089553, "grad_norm": 0.7714328059715659, "learning_rate": 2.0430018513896754e-07, "loss": 0.3434, "step": 2929 }, { "epoch": 0.8746268656716418, "grad_norm": 0.8713435989825455, "learning_rate": 2.033441514021975e-07, "loss": 0.3619, "step": 2930 }, { "epoch": 0.8749253731343284, "grad_norm": 0.8802611699490916, "learning_rate": 2.0239026498266535e-07, "loss": 0.3763, "step": 2931 }, { "epoch": 0.875223880597015, "grad_norm": 0.8223587805396835, "learning_rate": 2.0143852677223074e-07, "loss": 0.3458, "step": 2932 }, { "epoch": 0.8755223880597015, "grad_norm": 0.82268739377152, "learning_rate": 2.0048893766074608e-07, "loss": 0.3284, "step": 2933 }, { "epoch": 0.8758208955223881, "grad_norm": 0.8682099708563312, "learning_rate": 1.9954149853605386e-07, "loss": 0.3522, "step": 2934 }, { "epoch": 0.8761194029850746, "grad_norm": 0.8184563001143988, "learning_rate": 1.9859621028398697e-07, "loss": 0.326, "step": 2935 }, { "epoch": 0.8764179104477612, "grad_norm": 0.8396543309711917, "learning_rate": 1.9765307378836695e-07, "loss": 0.3512, "step": 2936 }, { "epoch": 0.8767164179104477, "grad_norm": 0.7884392531195238, "learning_rate": 1.9671208993100292e-07, "loss": 0.3335, "step": 2937 }, { "epoch": 0.8770149253731343, "grad_norm": 0.8535929495828234, "learning_rate": 1.957732595916917e-07, "loss": 0.3245, "step": 2938 }, { "epoch": 0.8773134328358209, "grad_norm": 0.8257674644312324, "learning_rate": 1.9483658364821744e-07, "loss": 0.3041, "step": 2939 }, { "epoch": 0.8776119402985074, "grad_norm": 0.8124568964960845, "learning_rate": 1.9390206297634912e-07, "loss": 0.3304, "step": 2940 }, { "epoch": 0.877910447761194, "grad_norm": 0.8215547653162424, "learning_rate": 1.9296969844984054e-07, "loss": 0.2944, "step": 2941 }, { "epoch": 0.8782089552238806, "grad_norm": 0.8357215000420828, "learning_rate": 1.9203949094043024e-07, "loss": 0.3408, "step": 2942 }, { "epoch": 0.8785074626865672, "grad_norm": 0.8231402612725451, "learning_rate": 1.9111144131783914e-07, "loss": 0.3437, "step": 2943 }, { "epoch": 0.8788059701492538, "grad_norm": 0.9024692930586209, "learning_rate": 1.90185550449771e-07, "loss": 0.3731, "step": 2944 }, { "epoch": 0.8791044776119403, "grad_norm": 0.7126389443706246, "learning_rate": 1.892618192019116e-07, "loss": 0.2756, "step": 2945 }, { "epoch": 0.8794029850746269, "grad_norm": 0.7750142540785868, "learning_rate": 1.883402484379268e-07, "loss": 0.3293, "step": 2946 }, { "epoch": 0.8797014925373134, "grad_norm": 0.8251890473704655, "learning_rate": 1.8742083901946317e-07, "loss": 0.2914, "step": 2947 }, { "epoch": 0.88, "grad_norm": 0.8619570811698483, "learning_rate": 1.8650359180614557e-07, "loss": 0.3061, "step": 2948 }, { "epoch": 0.8802985074626866, "grad_norm": 0.805795293872737, "learning_rate": 1.85588507655578e-07, "loss": 0.3264, "step": 2949 }, { "epoch": 0.8805970149253731, "grad_norm": 0.8331813857436107, "learning_rate": 1.8467558742334219e-07, "loss": 0.3111, "step": 2950 }, { "epoch": 0.8808955223880597, "grad_norm": 0.7786687931228663, "learning_rate": 1.837648319629956e-07, "loss": 0.2909, "step": 2951 }, { "epoch": 0.8811940298507462, "grad_norm": 0.7905242232392001, "learning_rate": 1.8285624212607322e-07, "loss": 0.3318, "step": 2952 }, { "epoch": 0.8814925373134328, "grad_norm": 0.7851585515172644, "learning_rate": 1.819498187620841e-07, "loss": 0.2932, "step": 2953 }, { "epoch": 0.8817910447761194, "grad_norm": 0.8344992885181066, "learning_rate": 1.810455627185112e-07, "loss": 0.3474, "step": 2954 }, { "epoch": 0.8820895522388059, "grad_norm": 0.8778172382134446, "learning_rate": 1.801434748408129e-07, "loss": 0.3121, "step": 2955 }, { "epoch": 0.8823880597014926, "grad_norm": 0.8166282816674805, "learning_rate": 1.7924355597241927e-07, "loss": 0.3558, "step": 2956 }, { "epoch": 0.8826865671641791, "grad_norm": 0.8719815044140756, "learning_rate": 1.7834580695473254e-07, "loss": 0.38, "step": 2957 }, { "epoch": 0.8829850746268657, "grad_norm": 0.8076792947849909, "learning_rate": 1.774502286271254e-07, "loss": 0.3016, "step": 2958 }, { "epoch": 0.8832835820895523, "grad_norm": 0.9801413967360424, "learning_rate": 1.7655682182694228e-07, "loss": 0.4087, "step": 2959 }, { "epoch": 0.8835820895522388, "grad_norm": 0.8289310466533708, "learning_rate": 1.756655873894969e-07, "loss": 0.3196, "step": 2960 }, { "epoch": 0.8838805970149254, "grad_norm": 0.770440394202249, "learning_rate": 1.7477652614807134e-07, "loss": 0.3295, "step": 2961 }, { "epoch": 0.8841791044776119, "grad_norm": 0.8473369272270318, "learning_rate": 1.7388963893391676e-07, "loss": 0.3282, "step": 2962 }, { "epoch": 0.8844776119402985, "grad_norm": 0.8319003049685355, "learning_rate": 1.7300492657625094e-07, "loss": 0.3384, "step": 2963 }, { "epoch": 0.8847761194029851, "grad_norm": 0.69805804384644, "learning_rate": 1.7212238990225756e-07, "loss": 0.3107, "step": 2964 }, { "epoch": 0.8850746268656716, "grad_norm": 0.8316282683239232, "learning_rate": 1.7124202973708788e-07, "loss": 0.3394, "step": 2965 }, { "epoch": 0.8853731343283582, "grad_norm": 0.8189231259197391, "learning_rate": 1.7036384690385681e-07, "loss": 0.3359, "step": 2966 }, { "epoch": 0.8856716417910447, "grad_norm": 0.8816856984730904, "learning_rate": 1.6948784222364372e-07, "loss": 0.3283, "step": 2967 }, { "epoch": 0.8859701492537313, "grad_norm": 0.9024059566893298, "learning_rate": 1.6861401651549203e-07, "loss": 0.3404, "step": 2968 }, { "epoch": 0.8862686567164179, "grad_norm": 0.7660680841699447, "learning_rate": 1.6774237059640764e-07, "loss": 0.3183, "step": 2969 }, { "epoch": 0.8865671641791045, "grad_norm": 0.9030202060790797, "learning_rate": 1.6687290528135725e-07, "loss": 0.3394, "step": 2970 }, { "epoch": 0.8868656716417911, "grad_norm": 0.8416102003078251, "learning_rate": 1.660056213832706e-07, "loss": 0.3156, "step": 2971 }, { "epoch": 0.8871641791044776, "grad_norm": 0.8428340924275619, "learning_rate": 1.651405197130368e-07, "loss": 0.343, "step": 2972 }, { "epoch": 0.8874626865671642, "grad_norm": 0.8500621805800072, "learning_rate": 1.642776010795047e-07, "loss": 0.3628, "step": 2973 }, { "epoch": 0.8877611940298508, "grad_norm": 0.99516910680903, "learning_rate": 1.634168662894825e-07, "loss": 0.3386, "step": 2974 }, { "epoch": 0.8880597014925373, "grad_norm": 0.9229080750641229, "learning_rate": 1.6255831614773594e-07, "loss": 0.3529, "step": 2975 }, { "epoch": 0.8883582089552239, "grad_norm": 0.791162434060006, "learning_rate": 1.6170195145698842e-07, "loss": 0.3473, "step": 2976 }, { "epoch": 0.8886567164179104, "grad_norm": 0.7220798985091988, "learning_rate": 1.6084777301792031e-07, "loss": 0.3026, "step": 2977 }, { "epoch": 0.888955223880597, "grad_norm": 0.747801405803127, "learning_rate": 1.5999578162916723e-07, "loss": 0.2975, "step": 2978 }, { "epoch": 0.8892537313432836, "grad_norm": 0.8265984529095876, "learning_rate": 1.5914597808732085e-07, "loss": 0.3365, "step": 2979 }, { "epoch": 0.8895522388059701, "grad_norm": 0.7476801517306761, "learning_rate": 1.58298363186927e-07, "loss": 0.3497, "step": 2980 }, { "epoch": 0.8898507462686567, "grad_norm": 0.9086508062842112, "learning_rate": 1.5745293772048393e-07, "loss": 0.3154, "step": 2981 }, { "epoch": 0.8901492537313432, "grad_norm": 0.8224473282900949, "learning_rate": 1.5660970247844437e-07, "loss": 0.354, "step": 2982 }, { "epoch": 0.8904477611940298, "grad_norm": 0.7683788999407549, "learning_rate": 1.5576865824921295e-07, "loss": 0.3067, "step": 2983 }, { "epoch": 0.8907462686567165, "grad_norm": 1.013004681398411, "learning_rate": 1.5492980581914535e-07, "loss": 0.3294, "step": 2984 }, { "epoch": 0.891044776119403, "grad_norm": 0.8640995726797704, "learning_rate": 1.5409314597254864e-07, "loss": 0.3699, "step": 2985 }, { "epoch": 0.8913432835820896, "grad_norm": 0.8465105815609653, "learning_rate": 1.5325867949167823e-07, "loss": 0.3255, "step": 2986 }, { "epoch": 0.8916417910447761, "grad_norm": 0.9416694811441639, "learning_rate": 1.5242640715674079e-07, "loss": 0.3372, "step": 2987 }, { "epoch": 0.8919402985074627, "grad_norm": 0.8527515210916904, "learning_rate": 1.5159632974589028e-07, "loss": 0.3654, "step": 2988 }, { "epoch": 0.8922388059701493, "grad_norm": 0.8989050252519671, "learning_rate": 1.507684480352292e-07, "loss": 0.3533, "step": 2989 }, { "epoch": 0.8925373134328358, "grad_norm": 0.8129306542683746, "learning_rate": 1.4994276279880648e-07, "loss": 0.3405, "step": 2990 }, { "epoch": 0.8928358208955224, "grad_norm": 0.8832284244419019, "learning_rate": 1.4911927480861843e-07, "loss": 0.3266, "step": 2991 }, { "epoch": 0.8931343283582089, "grad_norm": 0.9068919741311265, "learning_rate": 1.4829798483460471e-07, "loss": 0.3655, "step": 2992 }, { "epoch": 0.8934328358208955, "grad_norm": 0.7024871886169809, "learning_rate": 1.474788936446525e-07, "loss": 0.3323, "step": 2993 }, { "epoch": 0.8937313432835821, "grad_norm": 0.7474825332275427, "learning_rate": 1.4666200200459224e-07, "loss": 0.3027, "step": 2994 }, { "epoch": 0.8940298507462686, "grad_norm": 0.8040131965806334, "learning_rate": 1.458473106781977e-07, "loss": 0.3541, "step": 2995 }, { "epoch": 0.8943283582089552, "grad_norm": 0.7470197078575609, "learning_rate": 1.450348204271848e-07, "loss": 0.3267, "step": 2996 }, { "epoch": 0.8946268656716417, "grad_norm": 0.800564447211567, "learning_rate": 1.4422453201121234e-07, "loss": 0.365, "step": 2997 }, { "epoch": 0.8949253731343284, "grad_norm": 0.7546300002347592, "learning_rate": 1.4341644618788037e-07, "loss": 0.2846, "step": 2998 }, { "epoch": 0.895223880597015, "grad_norm": 0.7717377048978583, "learning_rate": 1.4261056371272953e-07, "loss": 0.3157, "step": 2999 }, { "epoch": 0.8955223880597015, "grad_norm": 0.9958856245080583, "learning_rate": 1.4180688533924014e-07, "loss": 0.3462, "step": 3000 }, { "epoch": 0.8958208955223881, "grad_norm": 0.7740493958337736, "learning_rate": 1.4100541181883225e-07, "loss": 0.2892, "step": 3001 }, { "epoch": 0.8961194029850746, "grad_norm": 0.8871065072732562, "learning_rate": 1.402061439008634e-07, "loss": 0.3561, "step": 3002 }, { "epoch": 0.8964179104477612, "grad_norm": 0.7521482462008626, "learning_rate": 1.394090823326297e-07, "loss": 0.2957, "step": 3003 }, { "epoch": 0.8967164179104478, "grad_norm": 0.8691973266040441, "learning_rate": 1.386142278593647e-07, "loss": 0.3191, "step": 3004 }, { "epoch": 0.8970149253731343, "grad_norm": 0.8681542943381358, "learning_rate": 1.3782158122423783e-07, "loss": 0.362, "step": 3005 }, { "epoch": 0.8973134328358209, "grad_norm": 0.8478073154828707, "learning_rate": 1.3703114316835436e-07, "loss": 0.3601, "step": 3006 }, { "epoch": 0.8976119402985074, "grad_norm": 0.7832272639251721, "learning_rate": 1.3624291443075505e-07, "loss": 0.3219, "step": 3007 }, { "epoch": 0.897910447761194, "grad_norm": 0.8259684062539756, "learning_rate": 1.3545689574841341e-07, "loss": 0.3591, "step": 3008 }, { "epoch": 0.8982089552238806, "grad_norm": 0.8048015763714349, "learning_rate": 1.3467308785623856e-07, "loss": 0.3752, "step": 3009 }, { "epoch": 0.8985074626865671, "grad_norm": 0.8513994404977496, "learning_rate": 1.3389149148707176e-07, "loss": 0.3726, "step": 3010 }, { "epoch": 0.8988059701492538, "grad_norm": 0.8112900887766165, "learning_rate": 1.3311210737168624e-07, "loss": 0.3092, "step": 3011 }, { "epoch": 0.8991044776119403, "grad_norm": 0.8531040207429486, "learning_rate": 1.3233493623878796e-07, "loss": 0.3495, "step": 3012 }, { "epoch": 0.8994029850746269, "grad_norm": 0.872868595281458, "learning_rate": 1.3155997881501181e-07, "loss": 0.3465, "step": 3013 }, { "epoch": 0.8997014925373135, "grad_norm": 0.8868213740208793, "learning_rate": 1.307872358249246e-07, "loss": 0.3229, "step": 3014 }, { "epoch": 0.9, "grad_norm": 0.8061758493940485, "learning_rate": 1.300167079910225e-07, "loss": 0.3782, "step": 3015 }, { "epoch": 0.9002985074626866, "grad_norm": 0.7826850147527407, "learning_rate": 1.2924839603372986e-07, "loss": 0.3566, "step": 3016 }, { "epoch": 0.9005970149253731, "grad_norm": 0.7652865811941827, "learning_rate": 1.2848230067139977e-07, "loss": 0.2958, "step": 3017 }, { "epoch": 0.9008955223880597, "grad_norm": 0.8684758983402867, "learning_rate": 1.2771842262031293e-07, "loss": 0.3499, "step": 3018 }, { "epoch": 0.9011940298507463, "grad_norm": 0.8121980361863542, "learning_rate": 1.2695676259467632e-07, "loss": 0.342, "step": 3019 }, { "epoch": 0.9014925373134328, "grad_norm": 0.8695703424315466, "learning_rate": 1.2619732130662365e-07, "loss": 0.3466, "step": 3020 }, { "epoch": 0.9017910447761194, "grad_norm": 0.7408411983724599, "learning_rate": 1.2544009946621417e-07, "loss": 0.3154, "step": 3021 }, { "epoch": 0.9020895522388059, "grad_norm": 0.7520252535004338, "learning_rate": 1.246850977814315e-07, "loss": 0.2926, "step": 3022 }, { "epoch": 0.9023880597014925, "grad_norm": 0.8015680226963483, "learning_rate": 1.2393231695818435e-07, "loss": 0.3424, "step": 3023 }, { "epoch": 0.902686567164179, "grad_norm": 0.8844284820852588, "learning_rate": 1.2318175770030388e-07, "loss": 0.3086, "step": 3024 }, { "epoch": 0.9029850746268657, "grad_norm": 1.4074341950451352, "learning_rate": 1.2243342070954485e-07, "loss": 0.3021, "step": 3025 }, { "epoch": 0.9032835820895523, "grad_norm": 0.8769608874212167, "learning_rate": 1.216873066855845e-07, "loss": 0.3686, "step": 3026 }, { "epoch": 0.9035820895522388, "grad_norm": 0.8331928786433949, "learning_rate": 1.2094341632602063e-07, "loss": 0.3711, "step": 3027 }, { "epoch": 0.9038805970149254, "grad_norm": 0.8114919419794893, "learning_rate": 1.2020175032637278e-07, "loss": 0.3104, "step": 3028 }, { "epoch": 0.904179104477612, "grad_norm": 0.8382734489680684, "learning_rate": 1.194623093800809e-07, "loss": 0.3222, "step": 3029 }, { "epoch": 0.9044776119402985, "grad_norm": 0.8090241891495422, "learning_rate": 1.1872509417850425e-07, "loss": 0.3221, "step": 3030 }, { "epoch": 0.9047761194029851, "grad_norm": 0.7762081906772144, "learning_rate": 1.179901054109206e-07, "loss": 0.3045, "step": 3031 }, { "epoch": 0.9050746268656716, "grad_norm": 0.7687394313447948, "learning_rate": 1.1725734376452691e-07, "loss": 0.3097, "step": 3032 }, { "epoch": 0.9053731343283582, "grad_norm": 0.8494786804724461, "learning_rate": 1.1652680992443765e-07, "loss": 0.2838, "step": 3033 }, { "epoch": 0.9056716417910448, "grad_norm": 0.7426720969321979, "learning_rate": 1.1579850457368342e-07, "loss": 0.3068, "step": 3034 }, { "epoch": 0.9059701492537313, "grad_norm": 0.8542802364049378, "learning_rate": 1.1507242839321281e-07, "loss": 0.3151, "step": 3035 }, { "epoch": 0.9062686567164179, "grad_norm": 0.8047828696688691, "learning_rate": 1.1434858206188864e-07, "loss": 0.3468, "step": 3036 }, { "epoch": 0.9065671641791044, "grad_norm": 0.8784472135696588, "learning_rate": 1.1362696625648983e-07, "loss": 0.3373, "step": 3037 }, { "epoch": 0.906865671641791, "grad_norm": 0.8176344259816754, "learning_rate": 1.1290758165171e-07, "loss": 0.3417, "step": 3038 }, { "epoch": 0.9071641791044777, "grad_norm": 0.84514932245467, "learning_rate": 1.1219042892015586e-07, "loss": 0.3071, "step": 3039 }, { "epoch": 0.9074626865671642, "grad_norm": 0.7963261643887735, "learning_rate": 1.1147550873234769e-07, "loss": 0.2971, "step": 3040 }, { "epoch": 0.9077611940298508, "grad_norm": 0.8505558295228394, "learning_rate": 1.1076282175671832e-07, "loss": 0.2992, "step": 3041 }, { "epoch": 0.9080597014925373, "grad_norm": 0.9346232263581281, "learning_rate": 1.1005236865961277e-07, "loss": 0.3754, "step": 3042 }, { "epoch": 0.9083582089552239, "grad_norm": 0.7821481899052896, "learning_rate": 1.0934415010528748e-07, "loss": 0.3066, "step": 3043 }, { "epoch": 0.9086567164179105, "grad_norm": 0.8906980599877051, "learning_rate": 1.0863816675590999e-07, "loss": 0.3545, "step": 3044 }, { "epoch": 0.908955223880597, "grad_norm": 0.8482058328893826, "learning_rate": 1.0793441927155673e-07, "loss": 0.3525, "step": 3045 }, { "epoch": 0.9092537313432836, "grad_norm": 0.8148838793718065, "learning_rate": 1.0723290831021471e-07, "loss": 0.3553, "step": 3046 }, { "epoch": 0.9095522388059701, "grad_norm": 0.8334685767932932, "learning_rate": 1.0653363452777953e-07, "loss": 0.3473, "step": 3047 }, { "epoch": 0.9098507462686567, "grad_norm": 0.7622666260376155, "learning_rate": 1.0583659857805545e-07, "loss": 0.3193, "step": 3048 }, { "epoch": 0.9101492537313433, "grad_norm": 0.8736868598440297, "learning_rate": 1.0514180111275391e-07, "loss": 0.3266, "step": 3049 }, { "epoch": 0.9104477611940298, "grad_norm": 0.8589697295961596, "learning_rate": 1.044492427814936e-07, "loss": 0.3075, "step": 3050 }, { "epoch": 0.9107462686567164, "grad_norm": 0.8373246294908004, "learning_rate": 1.0375892423179962e-07, "loss": 0.2927, "step": 3051 }, { "epoch": 0.9110447761194029, "grad_norm": 0.9038840022385726, "learning_rate": 1.030708461091029e-07, "loss": 0.3729, "step": 3052 }, { "epoch": 0.9113432835820896, "grad_norm": 0.846932324486075, "learning_rate": 1.0238500905673992e-07, "loss": 0.3424, "step": 3053 }, { "epoch": 0.9116417910447762, "grad_norm": 0.7863656327081202, "learning_rate": 1.0170141371595138e-07, "loss": 0.3041, "step": 3054 }, { "epoch": 0.9119402985074627, "grad_norm": 0.861611603226045, "learning_rate": 1.0102006072588239e-07, "loss": 0.303, "step": 3055 }, { "epoch": 0.9122388059701493, "grad_norm": 0.8354159151319559, "learning_rate": 1.0034095072358196e-07, "loss": 0.2905, "step": 3056 }, { "epoch": 0.9125373134328358, "grad_norm": 0.9076705935835198, "learning_rate": 9.966408434400026e-08, "loss": 0.3596, "step": 3057 }, { "epoch": 0.9128358208955224, "grad_norm": 0.7940993218471586, "learning_rate": 9.898946221999162e-08, "loss": 0.344, "step": 3058 }, { "epoch": 0.913134328358209, "grad_norm": 0.8683284494278827, "learning_rate": 9.83170849823109e-08, "loss": 0.2837, "step": 3059 }, { "epoch": 0.9134328358208955, "grad_norm": 0.8679841336149956, "learning_rate": 9.764695325961471e-08, "loss": 0.3426, "step": 3060 }, { "epoch": 0.9137313432835821, "grad_norm": 0.9136469022117164, "learning_rate": 9.697906767845988e-08, "loss": 0.3216, "step": 3061 }, { "epoch": 0.9140298507462686, "grad_norm": 0.9280938115066125, "learning_rate": 9.631342886330302e-08, "loss": 0.3441, "step": 3062 }, { "epoch": 0.9143283582089552, "grad_norm": 0.7989785840668151, "learning_rate": 9.565003743650019e-08, "loss": 0.3037, "step": 3063 }, { "epoch": 0.9146268656716418, "grad_norm": 0.7840797063644848, "learning_rate": 9.498889401830636e-08, "loss": 0.3593, "step": 3064 }, { "epoch": 0.9149253731343283, "grad_norm": 0.8742322626184073, "learning_rate": 9.432999922687397e-08, "loss": 0.3853, "step": 3065 }, { "epoch": 0.9152238805970149, "grad_norm": 0.8309689917110207, "learning_rate": 9.367335367825442e-08, "loss": 0.3548, "step": 3066 }, { "epoch": 0.9155223880597015, "grad_norm": 0.7682603888770035, "learning_rate": 9.301895798639465e-08, "loss": 0.3458, "step": 3067 }, { "epoch": 0.9158208955223881, "grad_norm": 0.8406021828495258, "learning_rate": 9.236681276313914e-08, "loss": 0.3439, "step": 3068 }, { "epoch": 0.9161194029850747, "grad_norm": 0.7676640227562936, "learning_rate": 9.171691861822735e-08, "loss": 0.3377, "step": 3069 }, { "epoch": 0.9164179104477612, "grad_norm": 0.8241123436966133, "learning_rate": 9.106927615929462e-08, "loss": 0.3461, "step": 3070 }, { "epoch": 0.9167164179104478, "grad_norm": 0.7865175510079805, "learning_rate": 9.042388599187158e-08, "loss": 0.3356, "step": 3071 }, { "epoch": 0.9170149253731343, "grad_norm": 0.8416832778666247, "learning_rate": 8.978074871938109e-08, "loss": 0.3225, "step": 3072 }, { "epoch": 0.9173134328358209, "grad_norm": 0.925094121401764, "learning_rate": 8.91398649431413e-08, "loss": 0.35, "step": 3073 }, { "epoch": 0.9176119402985075, "grad_norm": 0.9245170617415597, "learning_rate": 8.850123526236292e-08, "loss": 0.3473, "step": 3074 }, { "epoch": 0.917910447761194, "grad_norm": 0.8830284323162437, "learning_rate": 8.786486027414942e-08, "loss": 0.3263, "step": 3075 }, { "epoch": 0.9182089552238806, "grad_norm": 0.8322762071581037, "learning_rate": 8.723074057349568e-08, "loss": 0.3267, "step": 3076 }, { "epoch": 0.9185074626865671, "grad_norm": 0.8218742132897513, "learning_rate": 8.659887675328826e-08, "loss": 0.3525, "step": 3077 }, { "epoch": 0.9188059701492537, "grad_norm": 0.7524701289147285, "learning_rate": 8.596926940430406e-08, "loss": 0.347, "step": 3078 }, { "epoch": 0.9191044776119403, "grad_norm": 0.7973411802668999, "learning_rate": 8.534191911521106e-08, "loss": 0.3203, "step": 3079 }, { "epoch": 0.9194029850746268, "grad_norm": 0.755081736754136, "learning_rate": 8.471682647256619e-08, "loss": 0.3012, "step": 3080 }, { "epoch": 0.9197014925373135, "grad_norm": 0.7506268914890571, "learning_rate": 8.409399206081609e-08, "loss": 0.3365, "step": 3081 }, { "epoch": 0.92, "grad_norm": 0.8943820675775366, "learning_rate": 8.347341646229578e-08, "loss": 0.3694, "step": 3082 }, { "epoch": 0.9202985074626866, "grad_norm": 0.8565711585170784, "learning_rate": 8.285510025722781e-08, "loss": 0.3066, "step": 3083 }, { "epoch": 0.9205970149253732, "grad_norm": 0.6915296836970478, "learning_rate": 8.223904402372334e-08, "loss": 0.2471, "step": 3084 }, { "epoch": 0.9208955223880597, "grad_norm": 0.7910425039296988, "learning_rate": 8.16252483377794e-08, "loss": 0.3063, "step": 3085 }, { "epoch": 0.9211940298507463, "grad_norm": 0.8626761328335215, "learning_rate": 8.101371377328055e-08, "loss": 0.323, "step": 3086 }, { "epoch": 0.9214925373134328, "grad_norm": 0.811154258383053, "learning_rate": 8.040444090199634e-08, "loss": 0.3412, "step": 3087 }, { "epoch": 0.9217910447761194, "grad_norm": 0.8740316342417893, "learning_rate": 7.97974302935825e-08, "loss": 0.3647, "step": 3088 }, { "epoch": 0.922089552238806, "grad_norm": 0.9132701160611257, "learning_rate": 7.919268251557838e-08, "loss": 0.3722, "step": 3089 }, { "epoch": 0.9223880597014925, "grad_norm": 0.7691408535525245, "learning_rate": 7.85901981334089e-08, "loss": 0.3096, "step": 3090 }, { "epoch": 0.9226865671641791, "grad_norm": 0.839417162083958, "learning_rate": 7.798997771038236e-08, "loss": 0.3487, "step": 3091 }, { "epoch": 0.9229850746268656, "grad_norm": 0.869748021900408, "learning_rate": 7.739202180769013e-08, "loss": 0.3477, "step": 3092 }, { "epoch": 0.9232835820895522, "grad_norm": 0.8159308852427606, "learning_rate": 7.679633098440609e-08, "loss": 0.3281, "step": 3093 }, { "epoch": 0.9235820895522389, "grad_norm": 0.9129695074897101, "learning_rate": 7.620290579748723e-08, "loss": 0.356, "step": 3094 }, { "epoch": 0.9238805970149254, "grad_norm": 0.8713069828640914, "learning_rate": 7.561174680177114e-08, "loss": 0.329, "step": 3095 }, { "epoch": 0.924179104477612, "grad_norm": 0.7559944876673397, "learning_rate": 7.502285454997732e-08, "loss": 0.3056, "step": 3096 }, { "epoch": 0.9244776119402985, "grad_norm": 0.829846374274675, "learning_rate": 7.443622959270535e-08, "loss": 0.3374, "step": 3097 }, { "epoch": 0.9247761194029851, "grad_norm": 0.9373023004429427, "learning_rate": 7.385187247843567e-08, "loss": 0.3328, "step": 3098 }, { "epoch": 0.9250746268656717, "grad_norm": 0.7512508155243499, "learning_rate": 7.32697837535279e-08, "loss": 0.3161, "step": 3099 }, { "epoch": 0.9253731343283582, "grad_norm": 0.7853324032451395, "learning_rate": 7.268996396222056e-08, "loss": 0.3507, "step": 3100 }, { "epoch": 0.9256716417910448, "grad_norm": 0.8502143296601243, "learning_rate": 7.211241364663113e-08, "loss": 0.3854, "step": 3101 }, { "epoch": 0.9259701492537313, "grad_norm": 0.8250790098331866, "learning_rate": 7.153713334675516e-08, "loss": 0.3361, "step": 3102 }, { "epoch": 0.9262686567164179, "grad_norm": 0.8122977611367114, "learning_rate": 7.096412360046545e-08, "loss": 0.3371, "step": 3103 }, { "epoch": 0.9265671641791045, "grad_norm": 0.8020325644972325, "learning_rate": 7.039338494351261e-08, "loss": 0.327, "step": 3104 }, { "epoch": 0.926865671641791, "grad_norm": 0.8098687914753534, "learning_rate": 6.982491790952284e-08, "loss": 0.3228, "step": 3105 }, { "epoch": 0.9271641791044776, "grad_norm": 0.8343057527737529, "learning_rate": 6.925872302999931e-08, "loss": 0.3383, "step": 3106 }, { "epoch": 0.9274626865671641, "grad_norm": 0.8582956934198305, "learning_rate": 6.86948008343205e-08, "loss": 0.3705, "step": 3107 }, { "epoch": 0.9277611940298508, "grad_norm": 0.8051927725802557, "learning_rate": 6.813315184973968e-08, "loss": 0.3325, "step": 3108 }, { "epoch": 0.9280597014925374, "grad_norm": 0.7892911817116857, "learning_rate": 6.757377660138508e-08, "loss": 0.3413, "step": 3109 }, { "epoch": 0.9283582089552239, "grad_norm": 0.7927728140846071, "learning_rate": 6.701667561225894e-08, "loss": 0.3533, "step": 3110 }, { "epoch": 0.9286567164179105, "grad_norm": 0.8148550066294056, "learning_rate": 6.64618494032368e-08, "loss": 0.3803, "step": 3111 }, { "epoch": 0.928955223880597, "grad_norm": 0.8284362735918354, "learning_rate": 6.590929849306788e-08, "loss": 0.3344, "step": 3112 }, { "epoch": 0.9292537313432836, "grad_norm": 0.7868083459028574, "learning_rate": 6.535902339837392e-08, "loss": 0.3141, "step": 3113 }, { "epoch": 0.9295522388059702, "grad_norm": 0.8202287813945078, "learning_rate": 6.481102463364864e-08, "loss": 0.3267, "step": 3114 }, { "epoch": 0.9298507462686567, "grad_norm": 1.011406528561342, "learning_rate": 6.426530271125775e-08, "loss": 0.3625, "step": 3115 }, { "epoch": 0.9301492537313433, "grad_norm": 0.9568538573975606, "learning_rate": 6.372185814143756e-08, "loss": 0.3092, "step": 3116 }, { "epoch": 0.9304477611940298, "grad_norm": 0.8120426908903172, "learning_rate": 6.31806914322955e-08, "loss": 0.2998, "step": 3117 }, { "epoch": 0.9307462686567164, "grad_norm": 0.8154259717600727, "learning_rate": 6.264180308980933e-08, "loss": 0.3441, "step": 3118 }, { "epoch": 0.931044776119403, "grad_norm": 0.8366039008724436, "learning_rate": 6.210519361782685e-08, "loss": 0.3123, "step": 3119 }, { "epoch": 0.9313432835820895, "grad_norm": 0.7249537665139266, "learning_rate": 6.157086351806451e-08, "loss": 0.3187, "step": 3120 }, { "epoch": 0.9316417910447761, "grad_norm": 0.834336635014129, "learning_rate": 6.103881329010797e-08, "loss": 0.3238, "step": 3121 }, { "epoch": 0.9319402985074627, "grad_norm": 0.921548871205445, "learning_rate": 6.050904343141095e-08, "loss": 0.3376, "step": 3122 }, { "epoch": 0.9322388059701493, "grad_norm": 0.8938173280654804, "learning_rate": 5.998155443729586e-08, "loss": 0.3725, "step": 3123 }, { "epoch": 0.9325373134328359, "grad_norm": 0.8368698873876483, "learning_rate": 5.9456346800951805e-08, "loss": 0.352, "step": 3124 }, { "epoch": 0.9328358208955224, "grad_norm": 0.8916547607557948, "learning_rate": 5.8933421013435135e-08, "loss": 0.3883, "step": 3125 }, { "epoch": 0.933134328358209, "grad_norm": 0.8029972819508524, "learning_rate": 5.841277756366892e-08, "loss": 0.273, "step": 3126 }, { "epoch": 0.9334328358208955, "grad_norm": 0.9090008716050973, "learning_rate": 5.7894416938441834e-08, "loss": 0.3846, "step": 3127 }, { "epoch": 0.9337313432835821, "grad_norm": 0.737087695091503, "learning_rate": 5.737833962240841e-08, "loss": 0.3197, "step": 3128 }, { "epoch": 0.9340298507462687, "grad_norm": 0.9304110314164876, "learning_rate": 5.68645460980885e-08, "loss": 0.3527, "step": 3129 }, { "epoch": 0.9343283582089552, "grad_norm": 0.8089432663681636, "learning_rate": 5.635303684586646e-08, "loss": 0.3151, "step": 3130 }, { "epoch": 0.9346268656716418, "grad_norm": 1.165243151408962, "learning_rate": 5.584381234399111e-08, "loss": 0.3267, "step": 3131 }, { "epoch": 0.9349253731343283, "grad_norm": 0.7348025145722581, "learning_rate": 5.533687306857466e-08, "loss": 0.2696, "step": 3132 }, { "epoch": 0.9352238805970149, "grad_norm": 0.8262555992907743, "learning_rate": 5.483221949359324e-08, "loss": 0.3516, "step": 3133 }, { "epoch": 0.9355223880597014, "grad_norm": 0.9156843006759944, "learning_rate": 5.432985209088526e-08, "loss": 0.3481, "step": 3134 }, { "epoch": 0.935820895522388, "grad_norm": 0.8572239132276424, "learning_rate": 5.3829771330152495e-08, "loss": 0.3254, "step": 3135 }, { "epoch": 0.9361194029850747, "grad_norm": 1.028962020667184, "learning_rate": 5.3331977678958145e-08, "loss": 0.3123, "step": 3136 }, { "epoch": 0.9364179104477612, "grad_norm": 0.83452568472891, "learning_rate": 5.2836471602727144e-08, "loss": 0.3421, "step": 3137 }, { "epoch": 0.9367164179104478, "grad_norm": 0.8286325370096002, "learning_rate": 5.234325356474529e-08, "loss": 0.3499, "step": 3138 }, { "epoch": 0.9370149253731344, "grad_norm": 0.8160036150223631, "learning_rate": 5.185232402615953e-08, "loss": 0.3105, "step": 3139 }, { "epoch": 0.9373134328358209, "grad_norm": 0.8066012270688009, "learning_rate": 5.1363683445977144e-08, "loss": 0.3075, "step": 3140 }, { "epoch": 0.9376119402985075, "grad_norm": 0.8033304417193656, "learning_rate": 5.087733228106517e-08, "loss": 0.3218, "step": 3141 }, { "epoch": 0.937910447761194, "grad_norm": 0.8465912538140584, "learning_rate": 5.0393270986150155e-08, "loss": 0.3174, "step": 3142 }, { "epoch": 0.9382089552238806, "grad_norm": 0.8023677069921564, "learning_rate": 4.991150001381756e-08, "loss": 0.3562, "step": 3143 }, { "epoch": 0.9385074626865672, "grad_norm": 0.9290258509536511, "learning_rate": 4.9432019814511235e-08, "loss": 0.3718, "step": 3144 }, { "epoch": 0.9388059701492537, "grad_norm": 0.8889616159939564, "learning_rate": 4.8954830836533963e-08, "loss": 0.3495, "step": 3145 }, { "epoch": 0.9391044776119403, "grad_norm": 0.811509055722367, "learning_rate": 4.847993352604524e-08, "loss": 0.3429, "step": 3146 }, { "epoch": 0.9394029850746268, "grad_norm": 0.8199197753898051, "learning_rate": 4.800732832706323e-08, "loss": 0.3656, "step": 3147 }, { "epoch": 0.9397014925373134, "grad_norm": 0.9063245467477791, "learning_rate": 4.753701568146168e-08, "loss": 0.3317, "step": 3148 }, { "epoch": 0.94, "grad_norm": 0.8342353851057329, "learning_rate": 4.706899602897136e-08, "loss": 0.3312, "step": 3149 }, { "epoch": 0.9402985074626866, "grad_norm": 0.8720693495116465, "learning_rate": 4.6603269807179716e-08, "loss": 0.3593, "step": 3150 }, { "epoch": 0.9405970149253732, "grad_norm": 0.8586192607934766, "learning_rate": 4.6139837451529004e-08, "loss": 0.2832, "step": 3151 }, { "epoch": 0.9408955223880597, "grad_norm": 0.9695827592543284, "learning_rate": 4.5678699395317326e-08, "loss": 0.409, "step": 3152 }, { "epoch": 0.9411940298507463, "grad_norm": 0.8419990980437638, "learning_rate": 4.5219856069697866e-08, "loss": 0.3268, "step": 3153 }, { "epoch": 0.9414925373134329, "grad_norm": 0.8452212009450679, "learning_rate": 4.476330790367717e-08, "loss": 0.3211, "step": 3154 }, { "epoch": 0.9417910447761194, "grad_norm": 0.8498008661362966, "learning_rate": 4.4309055324117386e-08, "loss": 0.3485, "step": 3155 }, { "epoch": 0.942089552238806, "grad_norm": 0.8431790042502505, "learning_rate": 4.385709875573324e-08, "loss": 0.3482, "step": 3156 }, { "epoch": 0.9423880597014925, "grad_norm": 0.8448462756069092, "learning_rate": 4.340743862109309e-08, "loss": 0.3571, "step": 3157 }, { "epoch": 0.9426865671641791, "grad_norm": 0.8098626117139088, "learning_rate": 4.296007534061869e-08, "loss": 0.3451, "step": 3158 }, { "epoch": 0.9429850746268656, "grad_norm": 0.9323258425698503, "learning_rate": 4.2515009332582954e-08, "loss": 0.3325, "step": 3159 }, { "epoch": 0.9432835820895522, "grad_norm": 0.8628485400460011, "learning_rate": 4.207224101311247e-08, "loss": 0.3334, "step": 3160 }, { "epoch": 0.9435820895522388, "grad_norm": 0.8956281853483311, "learning_rate": 4.163177079618441e-08, "loss": 0.3362, "step": 3161 }, { "epoch": 0.9438805970149253, "grad_norm": 0.8074849186203403, "learning_rate": 4.1193599093627964e-08, "loss": 0.3114, "step": 3162 }, { "epoch": 0.9441791044776119, "grad_norm": 0.8490755318936722, "learning_rate": 4.0757726315122646e-08, "loss": 0.3558, "step": 3163 }, { "epoch": 0.9444776119402986, "grad_norm": 0.7610965070669408, "learning_rate": 4.032415286819941e-08, "loss": 0.3158, "step": 3164 }, { "epoch": 0.9447761194029851, "grad_norm": 0.7655473588220373, "learning_rate": 3.989287915823842e-08, "loss": 0.3121, "step": 3165 }, { "epoch": 0.9450746268656717, "grad_norm": 0.8444397049637858, "learning_rate": 3.9463905588470186e-08, "loss": 0.3323, "step": 3166 }, { "epoch": 0.9453731343283582, "grad_norm": 0.8236973140905199, "learning_rate": 3.9037232559974714e-08, "loss": 0.3348, "step": 3167 }, { "epoch": 0.9456716417910448, "grad_norm": 0.8003724680619415, "learning_rate": 3.861286047168067e-08, "loss": 0.3795, "step": 3168 }, { "epoch": 0.9459701492537314, "grad_norm": 1.0280919880161592, "learning_rate": 3.8190789720365665e-08, "loss": 0.3209, "step": 3169 }, { "epoch": 0.9462686567164179, "grad_norm": 0.8000546455088566, "learning_rate": 3.777102070065569e-08, "loss": 0.3505, "step": 3170 }, { "epoch": 0.9465671641791045, "grad_norm": 0.8935839440883604, "learning_rate": 3.735355380502431e-08, "loss": 0.3476, "step": 3171 }, { "epoch": 0.946865671641791, "grad_norm": 0.8848630298774833, "learning_rate": 3.693838942379291e-08, "loss": 0.3168, "step": 3172 }, { "epoch": 0.9471641791044776, "grad_norm": 0.7685629417514018, "learning_rate": 3.6525527945130424e-08, "loss": 0.3267, "step": 3173 }, { "epoch": 0.9474626865671641, "grad_norm": 0.7874491430144344, "learning_rate": 3.611496975505169e-08, "loss": 0.3076, "step": 3174 }, { "epoch": 0.9477611940298507, "grad_norm": 0.8513167764285567, "learning_rate": 3.5706715237419366e-08, "loss": 0.3351, "step": 3175 }, { "epoch": 0.9480597014925373, "grad_norm": 0.7679536427243635, "learning_rate": 3.5300764773940896e-08, "loss": 0.333, "step": 3176 }, { "epoch": 0.9483582089552239, "grad_norm": 0.7672420889067244, "learning_rate": 3.4897118744170175e-08, "loss": 0.3435, "step": 3177 }, { "epoch": 0.9486567164179105, "grad_norm": 0.872282837076114, "learning_rate": 3.4495777525506703e-08, "loss": 0.3501, "step": 3178 }, { "epoch": 0.948955223880597, "grad_norm": 0.8460843443255879, "learning_rate": 3.4096741493194196e-08, "loss": 0.3564, "step": 3179 }, { "epoch": 0.9492537313432836, "grad_norm": 0.8100002833851134, "learning_rate": 3.3700011020322e-08, "loss": 0.3143, "step": 3180 }, { "epoch": 0.9495522388059702, "grad_norm": 0.7984025736476822, "learning_rate": 3.330558647782312e-08, "loss": 0.3666, "step": 3181 }, { "epoch": 0.9498507462686567, "grad_norm": 0.754287300991587, "learning_rate": 3.291346823447533e-08, "loss": 0.3031, "step": 3182 }, { "epoch": 0.9501492537313433, "grad_norm": 0.7752734270024597, "learning_rate": 3.252365665689955e-08, "loss": 0.3004, "step": 3183 }, { "epoch": 0.9504477611940298, "grad_norm": 0.8191799374185267, "learning_rate": 3.213615210955978e-08, "loss": 0.3527, "step": 3184 }, { "epoch": 0.9507462686567164, "grad_norm": 0.907730673037401, "learning_rate": 3.1750954954763716e-08, "loss": 0.3272, "step": 3185 }, { "epoch": 0.951044776119403, "grad_norm": 0.767757547991333, "learning_rate": 3.136806555266103e-08, "loss": 0.278, "step": 3186 }, { "epoch": 0.9513432835820895, "grad_norm": 0.8243908687352361, "learning_rate": 3.098748426124398e-08, "loss": 0.3542, "step": 3187 }, { "epoch": 0.9516417910447761, "grad_norm": 0.712848097621701, "learning_rate": 3.0609211436347095e-08, "loss": 0.2936, "step": 3188 }, { "epoch": 0.9519402985074626, "grad_norm": 0.8024929014526022, "learning_rate": 3.02332474316458e-08, "loss": 0.325, "step": 3189 }, { "epoch": 0.9522388059701492, "grad_norm": 0.8494487804866518, "learning_rate": 2.985959259865778e-08, "loss": 0.3904, "step": 3190 }, { "epoch": 0.9525373134328359, "grad_norm": 0.7778696644607079, "learning_rate": 2.9488247286740546e-08, "loss": 0.3079, "step": 3191 }, { "epoch": 0.9528358208955224, "grad_norm": 0.7349126770422241, "learning_rate": 2.9119211843093574e-08, "loss": 0.2997, "step": 3192 }, { "epoch": 0.953134328358209, "grad_norm": 0.7872392631708472, "learning_rate": 2.8752486612755593e-08, "loss": 0.2926, "step": 3193 }, { "epoch": 0.9534328358208956, "grad_norm": 0.8767962603475438, "learning_rate": 2.8388071938605655e-08, "loss": 0.3712, "step": 3194 }, { "epoch": 0.9537313432835821, "grad_norm": 0.779215026325899, "learning_rate": 2.802596816136316e-08, "loss": 0.3147, "step": 3195 }, { "epoch": 0.9540298507462687, "grad_norm": 0.7598752044921813, "learning_rate": 2.766617561958618e-08, "loss": 0.3079, "step": 3196 }, { "epoch": 0.9543283582089552, "grad_norm": 0.7165696161507437, "learning_rate": 2.7308694649671453e-08, "loss": 0.3027, "step": 3197 }, { "epoch": 0.9546268656716418, "grad_norm": 0.8812874250973933, "learning_rate": 2.6953525585855233e-08, "loss": 0.3666, "step": 3198 }, { "epoch": 0.9549253731343283, "grad_norm": 0.876566087660891, "learning_rate": 2.660066876021189e-08, "loss": 0.3457, "step": 3199 }, { "epoch": 0.9552238805970149, "grad_norm": 0.7854233313802679, "learning_rate": 2.625012450265446e-08, "loss": 0.3486, "step": 3200 }, { "epoch": 0.9555223880597015, "grad_norm": 0.8325761566945635, "learning_rate": 2.5901893140932444e-08, "loss": 0.3254, "step": 3201 }, { "epoch": 0.955820895522388, "grad_norm": 0.8166375313979258, "learning_rate": 2.555597500063456e-08, "loss": 0.3446, "step": 3202 }, { "epoch": 0.9561194029850746, "grad_norm": 0.9210937286901646, "learning_rate": 2.521237040518515e-08, "loss": 0.3436, "step": 3203 }, { "epoch": 0.9564179104477611, "grad_norm": 0.7559699274090332, "learning_rate": 2.4871079675846398e-08, "loss": 0.3649, "step": 3204 }, { "epoch": 0.9567164179104478, "grad_norm": 0.7400183635230114, "learning_rate": 2.4532103131716668e-08, "loss": 0.2962, "step": 3205 }, { "epoch": 0.9570149253731344, "grad_norm": 0.7802880622650665, "learning_rate": 2.419544108973104e-08, "loss": 0.2953, "step": 3206 }, { "epoch": 0.9573134328358209, "grad_norm": 0.8494660495105342, "learning_rate": 2.3861093864660233e-08, "loss": 0.3352, "step": 3207 }, { "epoch": 0.9576119402985075, "grad_norm": 0.8112621260443245, "learning_rate": 2.3529061769110573e-08, "loss": 0.3118, "step": 3208 }, { "epoch": 0.957910447761194, "grad_norm": 0.7589488656121378, "learning_rate": 2.3199345113524007e-08, "loss": 0.2918, "step": 3209 }, { "epoch": 0.9582089552238806, "grad_norm": 0.8313292700601174, "learning_rate": 2.287194420617783e-08, "loss": 0.3353, "step": 3210 }, { "epoch": 0.9585074626865672, "grad_norm": 0.871232307176741, "learning_rate": 2.254685935318357e-08, "loss": 0.273, "step": 3211 }, { "epoch": 0.9588059701492537, "grad_norm": 0.8200691382729434, "learning_rate": 2.222409085848809e-08, "loss": 0.3358, "step": 3212 }, { "epoch": 0.9591044776119403, "grad_norm": 0.8045701864458568, "learning_rate": 2.1903639023871658e-08, "loss": 0.3378, "step": 3213 }, { "epoch": 0.9594029850746268, "grad_norm": 0.8197181943630443, "learning_rate": 2.1585504148949056e-08, "loss": 0.3174, "step": 3214 }, { "epoch": 0.9597014925373134, "grad_norm": 0.8494644190786257, "learning_rate": 2.1269686531168456e-08, "loss": 0.3223, "step": 3215 }, { "epoch": 0.96, "grad_norm": 0.9396349631933137, "learning_rate": 2.095618646581199e-08, "loss": 0.3686, "step": 3216 }, { "epoch": 0.9602985074626865, "grad_norm": 0.8323583157384636, "learning_rate": 2.064500424599436e-08, "loss": 0.3196, "step": 3217 }, { "epoch": 0.9605970149253731, "grad_norm": 0.8892166342620139, "learning_rate": 2.0336140162663386e-08, "loss": 0.3216, "step": 3218 }, { "epoch": 0.9608955223880598, "grad_norm": 0.8483612538763416, "learning_rate": 2.002959450459918e-08, "loss": 0.3309, "step": 3219 }, { "epoch": 0.9611940298507463, "grad_norm": 0.7559863326126044, "learning_rate": 1.9725367558415253e-08, "loss": 0.3091, "step": 3220 }, { "epoch": 0.9614925373134329, "grad_norm": 0.8020646850199934, "learning_rate": 1.9423459608555462e-08, "loss": 0.3299, "step": 3221 }, { "epoch": 0.9617910447761194, "grad_norm": 0.7418019278634108, "learning_rate": 1.912387093729706e-08, "loss": 0.302, "step": 3222 }, { "epoch": 0.962089552238806, "grad_norm": 0.7754195649660184, "learning_rate": 1.8826601824747936e-08, "loss": 0.3138, "step": 3223 }, { "epoch": 0.9623880597014925, "grad_norm": 0.9516965112975473, "learning_rate": 1.8531652548847146e-08, "loss": 0.384, "step": 3224 }, { "epoch": 0.9626865671641791, "grad_norm": 0.8417351207690223, "learning_rate": 1.8239023385365484e-08, "loss": 0.3457, "step": 3225 }, { "epoch": 0.9629850746268657, "grad_norm": 0.7831713431001753, "learning_rate": 1.7948714607903816e-08, "loss": 0.3093, "step": 3226 }, { "epoch": 0.9632835820895522, "grad_norm": 0.7856443433041905, "learning_rate": 1.7660726487894188e-08, "loss": 0.314, "step": 3227 }, { "epoch": 0.9635820895522388, "grad_norm": 0.872583774357775, "learning_rate": 1.7375059294598152e-08, "loss": 0.3313, "step": 3228 }, { "epoch": 0.9638805970149253, "grad_norm": 0.7486216858015543, "learning_rate": 1.7091713295107337e-08, "loss": 0.2724, "step": 3229 }, { "epoch": 0.9641791044776119, "grad_norm": 0.8129958391856543, "learning_rate": 1.6810688754343717e-08, "loss": 0.3504, "step": 3230 }, { "epoch": 0.9644776119402985, "grad_norm": 0.7739654221427428, "learning_rate": 1.6531985935058504e-08, "loss": 0.2831, "step": 3231 }, { "epoch": 0.964776119402985, "grad_norm": 0.8564741384154801, "learning_rate": 1.6255605097831584e-08, "loss": 0.3567, "step": 3232 }, { "epoch": 0.9650746268656717, "grad_norm": 0.8973102182259116, "learning_rate": 1.598154650107264e-08, "loss": 0.3538, "step": 3233 }, { "epoch": 0.9653731343283583, "grad_norm": 0.8793936915110651, "learning_rate": 1.570981040101949e-08, "loss": 0.3381, "step": 3234 }, { "epoch": 0.9656716417910448, "grad_norm": 0.7753860087917981, "learning_rate": 1.5440397051739163e-08, "loss": 0.2941, "step": 3235 }, { "epoch": 0.9659701492537314, "grad_norm": 0.7683611135195979, "learning_rate": 1.517330670512629e-08, "loss": 0.3361, "step": 3236 }, { "epoch": 0.9662686567164179, "grad_norm": 0.8618392461978379, "learning_rate": 1.4908539610903882e-08, "loss": 0.3968, "step": 3237 }, { "epoch": 0.9665671641791045, "grad_norm": 0.7885885536521883, "learning_rate": 1.4646096016622813e-08, "loss": 0.3317, "step": 3238 }, { "epoch": 0.966865671641791, "grad_norm": 0.8240073075438972, "learning_rate": 1.4385976167661241e-08, "loss": 0.3054, "step": 3239 }, { "epoch": 0.9671641791044776, "grad_norm": 0.8544446967641939, "learning_rate": 1.412818030722546e-08, "loss": 0.3274, "step": 3240 }, { "epoch": 0.9674626865671642, "grad_norm": 0.7981604304770294, "learning_rate": 1.387270867634738e-08, "loss": 0.3452, "step": 3241 }, { "epoch": 0.9677611940298507, "grad_norm": 0.7709030094314585, "learning_rate": 1.3619561513887603e-08, "loss": 0.3228, "step": 3242 }, { "epoch": 0.9680597014925373, "grad_norm": 0.9060585553579815, "learning_rate": 1.33687390565318e-08, "loss": 0.3067, "step": 3243 }, { "epoch": 0.9683582089552238, "grad_norm": 0.819120949141105, "learning_rate": 1.3120241538793487e-08, "loss": 0.3123, "step": 3244 }, { "epoch": 0.9686567164179104, "grad_norm": 0.7972272943948284, "learning_rate": 1.287406919301154e-08, "loss": 0.3486, "step": 3245 }, { "epoch": 0.968955223880597, "grad_norm": 0.8855995326432275, "learning_rate": 1.2630222249351287e-08, "loss": 0.3682, "step": 3246 }, { "epoch": 0.9692537313432836, "grad_norm": 0.7894355860076914, "learning_rate": 1.2388700935803133e-08, "loss": 0.3357, "step": 3247 }, { "epoch": 0.9695522388059702, "grad_norm": 0.8366342328352925, "learning_rate": 1.214950547818422e-08, "loss": 0.35, "step": 3248 }, { "epoch": 0.9698507462686567, "grad_norm": 0.8043634556674311, "learning_rate": 1.191263610013621e-08, "loss": 0.3296, "step": 3249 }, { "epoch": 0.9701492537313433, "grad_norm": 0.8860564254201281, "learning_rate": 1.1678093023126392e-08, "loss": 0.3182, "step": 3250 }, { "epoch": 0.9704477611940299, "grad_norm": 0.8007581402509059, "learning_rate": 1.144587646644657e-08, "loss": 0.3505, "step": 3251 }, { "epoch": 0.9707462686567164, "grad_norm": 0.7966262785299418, "learning_rate": 1.121598664721335e-08, "loss": 0.34, "step": 3252 }, { "epoch": 0.971044776119403, "grad_norm": 0.7323979717681582, "learning_rate": 1.0988423780368685e-08, "loss": 0.2943, "step": 3253 }, { "epoch": 0.9713432835820895, "grad_norm": 0.8685782481705663, "learning_rate": 1.0763188078678211e-08, "loss": 0.2686, "step": 3254 }, { "epoch": 0.9716417910447761, "grad_norm": 0.8163158923186468, "learning_rate": 1.0540279752731252e-08, "loss": 0.3165, "step": 3255 }, { "epoch": 0.9719402985074627, "grad_norm": 0.8202125520717879, "learning_rate": 1.0319699010942207e-08, "loss": 0.2752, "step": 3256 }, { "epoch": 0.9722388059701492, "grad_norm": 0.7860376878250228, "learning_rate": 1.0101446059548604e-08, "loss": 0.3302, "step": 3257 }, { "epoch": 0.9725373134328358, "grad_norm": 0.7983641641459549, "learning_rate": 9.88552110261165e-09, "loss": 0.3623, "step": 3258 }, { "epoch": 0.9728358208955223, "grad_norm": 0.8275216318089765, "learning_rate": 9.671924342015692e-09, "loss": 0.354, "step": 3259 }, { "epoch": 0.9731343283582089, "grad_norm": 0.8231617072873623, "learning_rate": 9.460655977468757e-09, "loss": 0.2916, "step": 3260 }, { "epoch": 0.9734328358208956, "grad_norm": 0.8054748071183675, "learning_rate": 9.251716206501449e-09, "loss": 0.3232, "step": 3261 }, { "epoch": 0.9737313432835821, "grad_norm": 0.9502388363988287, "learning_rate": 9.045105224467221e-09, "loss": 0.374, "step": 3262 }, { "epoch": 0.9740298507462687, "grad_norm": 0.8180489799155873, "learning_rate": 8.84082322454266e-09, "loss": 0.2988, "step": 3263 }, { "epoch": 0.9743283582089552, "grad_norm": 0.8508694679094144, "learning_rate": 8.638870397726374e-09, "loss": 0.352, "step": 3264 }, { "epoch": 0.9746268656716418, "grad_norm": 0.8289100432687119, "learning_rate": 8.439246932839262e-09, "loss": 0.3237, "step": 3265 }, { "epoch": 0.9749253731343284, "grad_norm": 0.8121348616630165, "learning_rate": 8.241953016524251e-09, "loss": 0.3371, "step": 3266 }, { "epoch": 0.9752238805970149, "grad_norm": 0.8052003128568289, "learning_rate": 8.04698883324656e-09, "loss": 0.3187, "step": 3267 }, { "epoch": 0.9755223880597015, "grad_norm": 0.7856612139320543, "learning_rate": 7.854354565292877e-09, "loss": 0.3022, "step": 3268 }, { "epoch": 0.975820895522388, "grad_norm": 0.8114505322634475, "learning_rate": 7.66405039277135e-09, "loss": 0.3531, "step": 3269 }, { "epoch": 0.9761194029850746, "grad_norm": 0.9170043571131018, "learning_rate": 7.47607649361215e-09, "loss": 0.3955, "step": 3270 }, { "epoch": 0.9764179104477612, "grad_norm": 0.8428130140130116, "learning_rate": 7.290433043565803e-09, "loss": 0.3136, "step": 3271 }, { "epoch": 0.9767164179104477, "grad_norm": 0.843369641143616, "learning_rate": 7.107120216205132e-09, "loss": 0.3433, "step": 3272 }, { "epoch": 0.9770149253731343, "grad_norm": 0.7547310709601116, "learning_rate": 6.926138182922204e-09, "loss": 0.2943, "step": 3273 }, { "epoch": 0.977313432835821, "grad_norm": 0.7834878975003178, "learning_rate": 6.747487112931661e-09, "loss": 0.3006, "step": 3274 }, { "epoch": 0.9776119402985075, "grad_norm": 0.8222964720647756, "learning_rate": 6.57116717326739e-09, "loss": 0.3323, "step": 3275 }, { "epoch": 0.9779104477611941, "grad_norm": 0.7184401155481505, "learning_rate": 6.397178528784464e-09, "loss": 0.2594, "step": 3276 }, { "epoch": 0.9782089552238806, "grad_norm": 0.8094077697755245, "learning_rate": 6.225521342158036e-09, "loss": 0.3232, "step": 3277 }, { "epoch": 0.9785074626865672, "grad_norm": 0.8180072345165923, "learning_rate": 6.056195773883056e-09, "loss": 0.3605, "step": 3278 }, { "epoch": 0.9788059701492537, "grad_norm": 0.8328869774640257, "learning_rate": 5.889201982275383e-09, "loss": 0.3574, "step": 3279 }, { "epoch": 0.9791044776119403, "grad_norm": 0.80665947348926, "learning_rate": 5.724540123469569e-09, "loss": 0.3129, "step": 3280 }, { "epoch": 0.9794029850746269, "grad_norm": 0.803178162202097, "learning_rate": 5.562210351420794e-09, "loss": 0.3278, "step": 3281 }, { "epoch": 0.9797014925373134, "grad_norm": 0.7597600431927148, "learning_rate": 5.402212817903207e-09, "loss": 0.2825, "step": 3282 }, { "epoch": 0.98, "grad_norm": 0.7663037088333483, "learning_rate": 5.244547672510758e-09, "loss": 0.2932, "step": 3283 }, { "epoch": 0.9802985074626865, "grad_norm": 0.9425580206593992, "learning_rate": 5.0892150626566384e-09, "loss": 0.3402, "step": 3284 }, { "epoch": 0.9805970149253731, "grad_norm": 0.8617742011068167, "learning_rate": 4.93621513357273e-09, "loss": 0.3574, "step": 3285 }, { "epoch": 0.9808955223880597, "grad_norm": 0.9143536666483425, "learning_rate": 4.785548028310438e-09, "loss": 0.3447, "step": 3286 }, { "epoch": 0.9811940298507462, "grad_norm": 0.7753440496822905, "learning_rate": 4.637213887739856e-09, "loss": 0.3166, "step": 3287 }, { "epoch": 0.9814925373134329, "grad_norm": 0.870713877541679, "learning_rate": 4.4912128505497644e-09, "loss": 0.3141, "step": 3288 }, { "epoch": 0.9817910447761194, "grad_norm": 0.9813206925694251, "learning_rate": 4.347545053247637e-09, "loss": 0.3278, "step": 3289 }, { "epoch": 0.982089552238806, "grad_norm": 0.8586592952573566, "learning_rate": 4.20621063015908e-09, "loss": 0.3509, "step": 3290 }, { "epoch": 0.9823880597014926, "grad_norm": 0.7818349485955345, "learning_rate": 4.067209713428388e-09, "loss": 0.3258, "step": 3291 }, { "epoch": 0.9826865671641791, "grad_norm": 0.7927890018750996, "learning_rate": 3.930542433018547e-09, "loss": 0.3466, "step": 3292 }, { "epoch": 0.9829850746268657, "grad_norm": 0.7627515359114002, "learning_rate": 3.796208916709565e-09, "loss": 0.3308, "step": 3293 }, { "epoch": 0.9832835820895522, "grad_norm": 0.8615140995217084, "learning_rate": 3.66420929010014e-09, "loss": 0.3603, "step": 3294 }, { "epoch": 0.9835820895522388, "grad_norm": 0.8056178237621043, "learning_rate": 3.5345436766065498e-09, "loss": 0.3186, "step": 3295 }, { "epoch": 0.9838805970149254, "grad_norm": 0.8976274390176755, "learning_rate": 3.407212197463483e-09, "loss": 0.3176, "step": 3296 }, { "epoch": 0.9841791044776119, "grad_norm": 0.896342713527404, "learning_rate": 3.282214971722375e-09, "loss": 0.345, "step": 3297 }, { "epoch": 0.9844776119402985, "grad_norm": 0.8134845252531054, "learning_rate": 3.159552116252795e-09, "loss": 0.3436, "step": 3298 }, { "epoch": 0.984776119402985, "grad_norm": 0.7899298838638901, "learning_rate": 3.0392237457413377e-09, "loss": 0.3398, "step": 3299 }, { "epoch": 0.9850746268656716, "grad_norm": 0.8684483689426634, "learning_rate": 2.9212299726921746e-09, "loss": 0.3693, "step": 3300 }, { "epoch": 0.9853731343283582, "grad_norm": 0.7802224704645674, "learning_rate": 2.80557090742678e-09, "loss": 0.3245, "step": 3301 }, { "epoch": 0.9856716417910448, "grad_norm": 0.759169607116071, "learning_rate": 2.6922466580830975e-09, "loss": 0.2923, "step": 3302 }, { "epoch": 0.9859701492537314, "grad_norm": 0.8262473666639696, "learning_rate": 2.5812573306169263e-09, "loss": 0.3262, "step": 3303 }, { "epoch": 0.986268656716418, "grad_norm": 0.8618008393149142, "learning_rate": 2.4726030288005356e-09, "loss": 0.3574, "step": 3304 }, { "epoch": 0.9865671641791045, "grad_norm": 0.7676567792662928, "learning_rate": 2.3662838542229392e-09, "loss": 0.2872, "step": 3305 }, { "epoch": 0.9868656716417911, "grad_norm": 0.7963744228618038, "learning_rate": 2.2622999062899e-09, "loss": 0.3347, "step": 3306 }, { "epoch": 0.9871641791044776, "grad_norm": 0.862404131304968, "learning_rate": 2.160651282224202e-09, "loss": 0.3633, "step": 3307 }, { "epoch": 0.9874626865671642, "grad_norm": 0.8788985953060575, "learning_rate": 2.0613380770645452e-09, "loss": 0.3345, "step": 3308 }, { "epoch": 0.9877611940298507, "grad_norm": 0.8092901595234544, "learning_rate": 1.9643603836666527e-09, "loss": 0.364, "step": 3309 }, { "epoch": 0.9880597014925373, "grad_norm": 0.8618273355933869, "learning_rate": 1.869718292701883e-09, "loss": 0.3583, "step": 3310 }, { "epoch": 0.9883582089552239, "grad_norm": 0.7424770027809615, "learning_rate": 1.7774118926586204e-09, "loss": 0.3108, "step": 3311 }, { "epoch": 0.9886567164179104, "grad_norm": 0.8322502568478948, "learning_rate": 1.6874412698408837e-09, "loss": 0.3482, "step": 3312 }, { "epoch": 0.988955223880597, "grad_norm": 0.767877374434005, "learning_rate": 1.599806508368884e-09, "loss": 0.2953, "step": 3313 }, { "epoch": 0.9892537313432835, "grad_norm": 0.7552281111497002, "learning_rate": 1.5145076901795785e-09, "loss": 0.3104, "step": 3314 }, { "epoch": 0.9895522388059701, "grad_norm": 0.8290814127495036, "learning_rate": 1.431544895024728e-09, "loss": 0.3276, "step": 3315 }, { "epoch": 0.9898507462686568, "grad_norm": 0.9192486683083075, "learning_rate": 1.3509182004725618e-09, "loss": 0.3404, "step": 3316 }, { "epoch": 0.9901492537313433, "grad_norm": 0.8117587672326521, "learning_rate": 1.2726276819075012e-09, "loss": 0.3382, "step": 3317 }, { "epoch": 0.9904477611940299, "grad_norm": 0.8043944237738767, "learning_rate": 1.1966734125287704e-09, "loss": 0.3145, "step": 3318 }, { "epoch": 0.9907462686567164, "grad_norm": 0.7769831757123559, "learning_rate": 1.1230554633523406e-09, "loss": 0.3126, "step": 3319 }, { "epoch": 0.991044776119403, "grad_norm": 0.8223550217599328, "learning_rate": 1.0517739032084307e-09, "loss": 0.3554, "step": 3320 }, { "epoch": 0.9913432835820896, "grad_norm": 0.7735518380562161, "learning_rate": 9.828287987442842e-10, "loss": 0.3166, "step": 3321 }, { "epoch": 0.9916417910447761, "grad_norm": 0.8254997943791005, "learning_rate": 9.162202144213927e-10, "loss": 0.353, "step": 3322 }, { "epoch": 0.9919402985074627, "grad_norm": 0.813538581959752, "learning_rate": 8.519482125171618e-10, "loss": 0.3559, "step": 3323 }, { "epoch": 0.9922388059701492, "grad_norm": 0.7599430996322734, "learning_rate": 7.900128531249107e-10, "loss": 0.3131, "step": 3324 }, { "epoch": 0.9925373134328358, "grad_norm": 0.8113270433764241, "learning_rate": 7.304141941522069e-10, "loss": 0.3595, "step": 3325 }, { "epoch": 0.9928358208955224, "grad_norm": 0.9811336819537293, "learning_rate": 6.731522913222544e-10, "loss": 0.3199, "step": 3326 }, { "epoch": 0.9931343283582089, "grad_norm": 0.8454076686303549, "learning_rate": 6.18227198173893e-10, "loss": 0.3376, "step": 3327 }, { "epoch": 0.9934328358208955, "grad_norm": 0.8629013881810315, "learning_rate": 5.656389660604888e-10, "loss": 0.3391, "step": 3328 }, { "epoch": 0.993731343283582, "grad_norm": 0.9074954180694659, "learning_rate": 5.153876441510441e-10, "loss": 0.3412, "step": 3329 }, { "epoch": 0.9940298507462687, "grad_norm": 0.8314510642754133, "learning_rate": 4.674732794288095e-10, "loss": 0.3593, "step": 3330 }, { "epoch": 0.9943283582089553, "grad_norm": 0.9116668780870614, "learning_rate": 4.2189591669322684e-10, "loss": 0.3986, "step": 3331 }, { "epoch": 0.9946268656716418, "grad_norm": 0.7948460052793381, "learning_rate": 3.786555985574314e-10, "loss": 0.2573, "step": 3332 }, { "epoch": 0.9949253731343284, "grad_norm": 0.7312524937623769, "learning_rate": 3.3775236545019464e-10, "loss": 0.298, "step": 3333 }, { "epoch": 0.9952238805970149, "grad_norm": 0.7302545942476094, "learning_rate": 2.9918625561536907e-10, "loss": 0.3069, "step": 3334 }, { "epoch": 0.9955223880597015, "grad_norm": 0.8697911226163374, "learning_rate": 2.6295730511105564e-10, "loss": 0.3235, "step": 3335 }, { "epoch": 0.9958208955223881, "grad_norm": 0.8879989503033112, "learning_rate": 2.2906554781043645e-10, "loss": 0.3554, "step": 3336 }, { "epoch": 0.9961194029850746, "grad_norm": 0.7574075071972748, "learning_rate": 1.9751101540149697e-10, "loss": 0.3168, "step": 3337 }, { "epoch": 0.9964179104477612, "grad_norm": 0.9752022971438031, "learning_rate": 1.6829373738702636e-10, "loss": 0.3387, "step": 3338 }, { "epoch": 0.9967164179104477, "grad_norm": 0.7698823309368659, "learning_rate": 1.4141374108433968e-10, "loss": 0.2791, "step": 3339 }, { "epoch": 0.9970149253731343, "grad_norm": 0.8921339982339014, "learning_rate": 1.1687105162583311e-10, "loss": 0.3688, "step": 3340 }, { "epoch": 0.9973134328358209, "grad_norm": 0.7650117442391594, "learning_rate": 9.466569195787367e-11, "loss": 0.3043, "step": 3341 }, { "epoch": 0.9976119402985074, "grad_norm": 0.8092924984819057, "learning_rate": 7.479768284246458e-11, "loss": 0.3063, "step": 3342 }, { "epoch": 0.997910447761194, "grad_norm": 0.7743808795054051, "learning_rate": 5.726704285530238e-11, "loss": 0.3209, "step": 3343 }, { "epoch": 0.9982089552238806, "grad_norm": 0.8125424691553139, "learning_rate": 4.207378838744225e-11, "loss": 0.3303, "step": 3344 }, { "epoch": 0.9985074626865672, "grad_norm": 0.814816088361442, "learning_rate": 2.9217933643910236e-11, "loss": 0.33, "step": 3345 }, { "epoch": 0.9988059701492538, "grad_norm": 0.7687063639245223, "learning_rate": 1.8699490644813467e-11, "loss": 0.2951, "step": 3346 }, { "epoch": 0.9991044776119403, "grad_norm": 0.8274962606742341, "learning_rate": 1.05184692245075e-11, "loss": 0.2992, "step": 3347 }, { "epoch": 0.9994029850746269, "grad_norm": 0.7641695113854454, "learning_rate": 4.6748770321514145e-12, "loss": 0.3547, "step": 3348 }, { "epoch": 0.9997014925373134, "grad_norm": 0.7988942617182022, "learning_rate": 1.1687195311527176e-12, "loss": 0.3496, "step": 3349 }, { "epoch": 1.0, "grad_norm": 0.8568561243747301, "learning_rate": 0.0, "loss": 0.3795, "step": 3350 }, { "epoch": 1.0, "step": 3350, "total_flos": 2762833588060160.0, "train_loss": 0.3609271458858874, "train_runtime": 54753.0786, "train_samples_per_second": 5.873, "train_steps_per_second": 0.061 } ], "logging_steps": 1.0, "max_steps": 3350, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2762833588060160.0, "train_batch_size": 24, "trial_name": null, "trial_params": null }