{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 33732, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011858176212498517, "grad_norm": 29.7334115858346, "learning_rate": 4.940711462450593e-08, "loss": 1.8796, "step": 1 }, { "epoch": 0.00023716352424997035, "grad_norm": 36.986189981478525, "learning_rate": 9.881422924901186e-08, "loss": 2.284, "step": 2 }, { "epoch": 0.0003557452863749555, "grad_norm": 40.7859748549836, "learning_rate": 1.4822134387351778e-07, "loss": 2.2242, "step": 3 }, { "epoch": 0.0004743270484999407, "grad_norm": 36.48457054375701, "learning_rate": 1.976284584980237e-07, "loss": 2.0602, "step": 4 }, { "epoch": 0.0005929088106249259, "grad_norm": 26.626520027662174, "learning_rate": 2.4703557312252967e-07, "loss": 2.1763, "step": 5 }, { "epoch": 0.000711490572749911, "grad_norm": 28.75938841669468, "learning_rate": 2.9644268774703555e-07, "loss": 2.1749, "step": 6 }, { "epoch": 0.0008300723348748962, "grad_norm": 26.450691626862728, "learning_rate": 3.4584980237154154e-07, "loss": 2.0659, "step": 7 }, { "epoch": 0.0009486540969998814, "grad_norm": 32.388409037114734, "learning_rate": 3.952569169960474e-07, "loss": 2.269, "step": 8 }, { "epoch": 0.0010672358591248667, "grad_norm": 27.818829596147488, "learning_rate": 4.446640316205534e-07, "loss": 2.0405, "step": 9 }, { "epoch": 0.0011858176212498517, "grad_norm": 27.213551128824204, "learning_rate": 4.940711462450593e-07, "loss": 2.0429, "step": 10 }, { "epoch": 0.001304399383374837, "grad_norm": 24.677212610588604, "learning_rate": 5.434782608695653e-07, "loss": 2.0898, "step": 11 }, { "epoch": 0.001422981145499822, "grad_norm": 22.13518130020256, "learning_rate": 5.928853754940711e-07, "loss": 1.9264, "step": 12 }, { "epoch": 0.0015415629076248074, "grad_norm": 26.311666508606532, "learning_rate": 6.422924901185771e-07, "loss": 2.2384, "step": 13 }, { "epoch": 0.0016601446697497924, "grad_norm": 20.846841284164107, "learning_rate": 6.916996047430831e-07, "loss": 2.0143, "step": 14 }, { "epoch": 0.0017787264318747777, "grad_norm": 22.083048308127594, "learning_rate": 7.411067193675889e-07, "loss": 1.8876, "step": 15 }, { "epoch": 0.0018973081939997628, "grad_norm": 24.8360423578799, "learning_rate": 7.905138339920948e-07, "loss": 2.0197, "step": 16 }, { "epoch": 0.002015889956124748, "grad_norm": 17.354519140819882, "learning_rate": 8.399209486166009e-07, "loss": 1.8757, "step": 17 }, { "epoch": 0.0021344717182497333, "grad_norm": 17.472884260289447, "learning_rate": 8.893280632411068e-07, "loss": 1.9005, "step": 18 }, { "epoch": 0.0022530534803747184, "grad_norm": 20.275167427552432, "learning_rate": 9.387351778656126e-07, "loss": 1.7199, "step": 19 }, { "epoch": 0.0023716352424997035, "grad_norm": 14.28383937483396, "learning_rate": 9.881422924901187e-07, "loss": 1.8742, "step": 20 }, { "epoch": 0.0024902170046246885, "grad_norm": 19.785370699910622, "learning_rate": 1.0375494071146247e-06, "loss": 1.6949, "step": 21 }, { "epoch": 0.002608798766749674, "grad_norm": 12.311019168455724, "learning_rate": 1.0869565217391306e-06, "loss": 1.7765, "step": 22 }, { "epoch": 0.002727380528874659, "grad_norm": 14.037359974966835, "learning_rate": 1.1363636363636364e-06, "loss": 1.4765, "step": 23 }, { "epoch": 0.002845962290999644, "grad_norm": 15.329905744804806, "learning_rate": 1.1857707509881422e-06, "loss": 1.6874, "step": 24 }, { "epoch": 0.0029645440531246296, "grad_norm": 9.241739188353932, "learning_rate": 1.2351778656126482e-06, "loss": 1.6788, "step": 25 }, { "epoch": 0.0030831258152496147, "grad_norm": 10.658162413453047, "learning_rate": 1.2845849802371543e-06, "loss": 1.6139, "step": 26 }, { "epoch": 0.0032017075773745998, "grad_norm": 11.484737065357146, "learning_rate": 1.3339920948616601e-06, "loss": 1.6647, "step": 27 }, { "epoch": 0.003320289339499585, "grad_norm": 5.809020273223719, "learning_rate": 1.3833992094861662e-06, "loss": 1.6373, "step": 28 }, { "epoch": 0.0034388711016245703, "grad_norm": 5.094718107564571, "learning_rate": 1.432806324110672e-06, "loss": 1.5396, "step": 29 }, { "epoch": 0.0035574528637495554, "grad_norm": 5.475453114625414, "learning_rate": 1.4822134387351778e-06, "loss": 1.3301, "step": 30 }, { "epoch": 0.0036760346258745405, "grad_norm": 5.252787264813708, "learning_rate": 1.5316205533596839e-06, "loss": 1.2724, "step": 31 }, { "epoch": 0.0037946163879995255, "grad_norm": 5.791267103219969, "learning_rate": 1.5810276679841897e-06, "loss": 0.8098, "step": 32 }, { "epoch": 0.003913198150124511, "grad_norm": 4.485775929175042, "learning_rate": 1.6304347826086957e-06, "loss": 1.3726, "step": 33 }, { "epoch": 0.004031779912249496, "grad_norm": 4.654827189747514, "learning_rate": 1.6798418972332018e-06, "loss": 1.5377, "step": 34 }, { "epoch": 0.004150361674374482, "grad_norm": 3.6622689865341744, "learning_rate": 1.7292490118577076e-06, "loss": 1.5224, "step": 35 }, { "epoch": 0.004268943436499467, "grad_norm": 4.979949858514664, "learning_rate": 1.7786561264822136e-06, "loss": 1.5067, "step": 36 }, { "epoch": 0.004387525198624452, "grad_norm": 4.501677583957293, "learning_rate": 1.8280632411067192e-06, "loss": 1.3756, "step": 37 }, { "epoch": 0.004506106960749437, "grad_norm": 4.91743142836715, "learning_rate": 1.8774703557312253e-06, "loss": 1.5613, "step": 38 }, { "epoch": 0.004624688722874422, "grad_norm": 4.806424592516933, "learning_rate": 1.9268774703557313e-06, "loss": 1.1191, "step": 39 }, { "epoch": 0.004743270484999407, "grad_norm": 5.047904499811148, "learning_rate": 1.9762845849802374e-06, "loss": 1.4491, "step": 40 }, { "epoch": 0.004861852247124392, "grad_norm": 4.810873280149616, "learning_rate": 2.0256916996047434e-06, "loss": 1.2913, "step": 41 }, { "epoch": 0.004980434009249377, "grad_norm": 4.839732027601575, "learning_rate": 2.0750988142292494e-06, "loss": 1.4684, "step": 42 }, { "epoch": 0.005099015771374363, "grad_norm": 4.205533048559967, "learning_rate": 2.124505928853755e-06, "loss": 1.4724, "step": 43 }, { "epoch": 0.005217597533499348, "grad_norm": 4.649302570240601, "learning_rate": 2.173913043478261e-06, "loss": 1.3319, "step": 44 }, { "epoch": 0.005336179295624333, "grad_norm": 4.9705205235957814, "learning_rate": 2.2233201581027667e-06, "loss": 1.4927, "step": 45 }, { "epoch": 0.005454761057749318, "grad_norm": 4.636719619809015, "learning_rate": 2.2727272727272728e-06, "loss": 1.2349, "step": 46 }, { "epoch": 0.005573342819874303, "grad_norm": 3.971633369633716, "learning_rate": 2.322134387351779e-06, "loss": 1.4677, "step": 47 }, { "epoch": 0.005691924581999288, "grad_norm": 3.8899464489610818, "learning_rate": 2.3715415019762844e-06, "loss": 1.3009, "step": 48 }, { "epoch": 0.005810506344124273, "grad_norm": 4.329629116852109, "learning_rate": 2.4209486166007905e-06, "loss": 1.403, "step": 49 }, { "epoch": 0.005929088106249259, "grad_norm": 4.218784231886035, "learning_rate": 2.4703557312252965e-06, "loss": 1.4691, "step": 50 }, { "epoch": 0.006047669868374244, "grad_norm": 3.8196953426633433, "learning_rate": 2.5197628458498025e-06, "loss": 1.5109, "step": 51 }, { "epoch": 0.006166251630499229, "grad_norm": 3.5799719246204194, "learning_rate": 2.5691699604743086e-06, "loss": 1.5903, "step": 52 }, { "epoch": 0.0062848333926242145, "grad_norm": 3.5789978015119397, "learning_rate": 2.6185770750988146e-06, "loss": 1.3442, "step": 53 }, { "epoch": 0.0064034151547491995, "grad_norm": 3.2603597513307245, "learning_rate": 2.6679841897233202e-06, "loss": 1.5437, "step": 54 }, { "epoch": 0.006521996916874185, "grad_norm": 3.4341440061642086, "learning_rate": 2.7173913043478263e-06, "loss": 1.5123, "step": 55 }, { "epoch": 0.00664057867899917, "grad_norm": 3.987972758410137, "learning_rate": 2.7667984189723323e-06, "loss": 1.3457, "step": 56 }, { "epoch": 0.006759160441124155, "grad_norm": 3.977475845211303, "learning_rate": 2.816205533596838e-06, "loss": 1.4073, "step": 57 }, { "epoch": 0.006877742203249141, "grad_norm": 4.203700275846737, "learning_rate": 2.865612648221344e-06, "loss": 1.3345, "step": 58 }, { "epoch": 0.006996323965374126, "grad_norm": 3.3146057498522437, "learning_rate": 2.91501976284585e-06, "loss": 1.4505, "step": 59 }, { "epoch": 0.007114905727499111, "grad_norm": 3.5921841458375234, "learning_rate": 2.9644268774703556e-06, "loss": 1.1119, "step": 60 }, { "epoch": 0.007233487489624096, "grad_norm": 4.534186220908105, "learning_rate": 3.0138339920948617e-06, "loss": 1.2502, "step": 61 }, { "epoch": 0.007352069251749081, "grad_norm": 3.4930738853086996, "learning_rate": 3.0632411067193677e-06, "loss": 1.4959, "step": 62 }, { "epoch": 0.007470651013874066, "grad_norm": 3.793249085036293, "learning_rate": 3.1126482213438737e-06, "loss": 1.5658, "step": 63 }, { "epoch": 0.007589232775999051, "grad_norm": 3.7272413318236564, "learning_rate": 3.1620553359683794e-06, "loss": 1.4786, "step": 64 }, { "epoch": 0.007707814538124036, "grad_norm": 3.7666046621727376, "learning_rate": 3.211462450592886e-06, "loss": 1.3404, "step": 65 }, { "epoch": 0.007826396300249021, "grad_norm": 3.6489802688735518, "learning_rate": 3.2608695652173914e-06, "loss": 1.3084, "step": 66 }, { "epoch": 0.007944978062374007, "grad_norm": 3.393741805739487, "learning_rate": 3.310276679841898e-06, "loss": 1.4177, "step": 67 }, { "epoch": 0.008063559824498991, "grad_norm": 5.2200674212050435, "learning_rate": 3.3596837944664035e-06, "loss": 1.223, "step": 68 }, { "epoch": 0.008182141586623977, "grad_norm": 3.5668605052898017, "learning_rate": 3.409090909090909e-06, "loss": 1.2097, "step": 69 }, { "epoch": 0.008300723348748963, "grad_norm": 3.6280712755852456, "learning_rate": 3.458498023715415e-06, "loss": 1.3289, "step": 70 }, { "epoch": 0.008419305110873947, "grad_norm": 3.268044709665363, "learning_rate": 3.507905138339921e-06, "loss": 1.564, "step": 71 }, { "epoch": 0.008537886872998933, "grad_norm": 3.9903293434366716, "learning_rate": 3.5573122529644273e-06, "loss": 1.3967, "step": 72 }, { "epoch": 0.008656468635123917, "grad_norm": 3.276506449815736, "learning_rate": 3.606719367588933e-06, "loss": 1.4608, "step": 73 }, { "epoch": 0.008775050397248903, "grad_norm": 3.349344682930467, "learning_rate": 3.6561264822134385e-06, "loss": 1.2848, "step": 74 }, { "epoch": 0.008893632159373888, "grad_norm": 3.8759647234303696, "learning_rate": 3.705533596837945e-06, "loss": 1.4331, "step": 75 }, { "epoch": 0.009012213921498874, "grad_norm": 4.032042318942088, "learning_rate": 3.7549407114624506e-06, "loss": 1.2687, "step": 76 }, { "epoch": 0.00913079568362386, "grad_norm": 4.05463512824053, "learning_rate": 3.804347826086957e-06, "loss": 1.3397, "step": 77 }, { "epoch": 0.009249377445748844, "grad_norm": 3.570905635236118, "learning_rate": 3.853754940711463e-06, "loss": 1.4733, "step": 78 }, { "epoch": 0.00936795920787383, "grad_norm": 3.779305628665041, "learning_rate": 3.903162055335968e-06, "loss": 1.4018, "step": 79 }, { "epoch": 0.009486540969998814, "grad_norm": 3.7854719623767825, "learning_rate": 3.952569169960475e-06, "loss": 1.4833, "step": 80 }, { "epoch": 0.0096051227321238, "grad_norm": 3.6505406776124714, "learning_rate": 4.00197628458498e-06, "loss": 1.1614, "step": 81 }, { "epoch": 0.009723704494248784, "grad_norm": 3.4311507500208274, "learning_rate": 4.051383399209487e-06, "loss": 1.5358, "step": 82 }, { "epoch": 0.00984228625637377, "grad_norm": 3.1149203272886146, "learning_rate": 4.1007905138339924e-06, "loss": 1.3102, "step": 83 }, { "epoch": 0.009960868018498754, "grad_norm": 3.3638340217134637, "learning_rate": 4.150197628458499e-06, "loss": 1.5811, "step": 84 }, { "epoch": 0.01007944978062374, "grad_norm": 3.7390399791979925, "learning_rate": 4.1996047430830045e-06, "loss": 1.2584, "step": 85 }, { "epoch": 0.010198031542748726, "grad_norm": 3.4182632034218603, "learning_rate": 4.24901185770751e-06, "loss": 1.6082, "step": 86 }, { "epoch": 0.01031661330487371, "grad_norm": 3.354729488177407, "learning_rate": 4.298418972332017e-06, "loss": 1.1889, "step": 87 }, { "epoch": 0.010435195066998696, "grad_norm": 3.5089261935914022, "learning_rate": 4.347826086956522e-06, "loss": 1.3345, "step": 88 }, { "epoch": 0.01055377682912368, "grad_norm": 3.8462814773896525, "learning_rate": 4.397233201581028e-06, "loss": 1.1596, "step": 89 }, { "epoch": 0.010672358591248666, "grad_norm": 3.6272409788759012, "learning_rate": 4.4466403162055334e-06, "loss": 1.4737, "step": 90 }, { "epoch": 0.01079094035337365, "grad_norm": 3.13130266377601, "learning_rate": 4.496047430830039e-06, "loss": 1.434, "step": 91 }, { "epoch": 0.010909522115498636, "grad_norm": 3.196212648459861, "learning_rate": 4.5454545454545455e-06, "loss": 1.4654, "step": 92 }, { "epoch": 0.011028103877623622, "grad_norm": 3.1293886723406406, "learning_rate": 4.594861660079051e-06, "loss": 1.2483, "step": 93 }, { "epoch": 0.011146685639748606, "grad_norm": 3.6867170578203434, "learning_rate": 4.644268774703558e-06, "loss": 1.5428, "step": 94 }, { "epoch": 0.011265267401873592, "grad_norm": 3.212927331712037, "learning_rate": 4.693675889328063e-06, "loss": 1.5376, "step": 95 }, { "epoch": 0.011383849163998577, "grad_norm": 3.1924142182429613, "learning_rate": 4.743083003952569e-06, "loss": 1.7211, "step": 96 }, { "epoch": 0.011502430926123563, "grad_norm": 3.381404143138639, "learning_rate": 4.792490118577075e-06, "loss": 1.4578, "step": 97 }, { "epoch": 0.011621012688248547, "grad_norm": 3.502019885762863, "learning_rate": 4.841897233201581e-06, "loss": 1.4686, "step": 98 }, { "epoch": 0.011739594450373533, "grad_norm": 3.4239685805832365, "learning_rate": 4.891304347826087e-06, "loss": 1.4302, "step": 99 }, { "epoch": 0.011858176212498519, "grad_norm": 3.2866255993402373, "learning_rate": 4.940711462450593e-06, "loss": 1.5406, "step": 100 }, { "epoch": 0.011976757974623503, "grad_norm": 3.8695232957002332, "learning_rate": 4.9901185770750995e-06, "loss": 1.2577, "step": 101 }, { "epoch": 0.012095339736748489, "grad_norm": 3.1954221106642087, "learning_rate": 5.039525691699605e-06, "loss": 1.6125, "step": 102 }, { "epoch": 0.012213921498873473, "grad_norm": 3.4547480778911455, "learning_rate": 5.088932806324111e-06, "loss": 1.0321, "step": 103 }, { "epoch": 0.012332503260998459, "grad_norm": 3.490136777704389, "learning_rate": 5.138339920948617e-06, "loss": 1.1058, "step": 104 }, { "epoch": 0.012451085023123443, "grad_norm": 3.0967678463388544, "learning_rate": 5.187747035573123e-06, "loss": 1.5521, "step": 105 }, { "epoch": 0.012569666785248429, "grad_norm": 3.172889434152784, "learning_rate": 5.237154150197629e-06, "loss": 1.5146, "step": 106 }, { "epoch": 0.012688248547373413, "grad_norm": 3.693898674128569, "learning_rate": 5.286561264822135e-06, "loss": 1.3215, "step": 107 }, { "epoch": 0.012806830309498399, "grad_norm": 3.398769183694832, "learning_rate": 5.3359683794466405e-06, "loss": 1.3303, "step": 108 }, { "epoch": 0.012925412071623385, "grad_norm": 3.7096404981782327, "learning_rate": 5.385375494071147e-06, "loss": 1.3601, "step": 109 }, { "epoch": 0.01304399383374837, "grad_norm": 3.347710020043152, "learning_rate": 5.4347826086956525e-06, "loss": 1.4923, "step": 110 }, { "epoch": 0.013162575595873355, "grad_norm": 3.584519958005256, "learning_rate": 5.484189723320159e-06, "loss": 0.8943, "step": 111 }, { "epoch": 0.01328115735799834, "grad_norm": 3.3351266533490636, "learning_rate": 5.533596837944665e-06, "loss": 1.4478, "step": 112 }, { "epoch": 0.013399739120123325, "grad_norm": 3.2227745201141342, "learning_rate": 5.58300395256917e-06, "loss": 1.497, "step": 113 }, { "epoch": 0.01351832088224831, "grad_norm": 3.5761240428588277, "learning_rate": 5.632411067193676e-06, "loss": 1.2487, "step": 114 }, { "epoch": 0.013636902644373295, "grad_norm": 3.5964241142188733, "learning_rate": 5.681818181818182e-06, "loss": 1.5315, "step": 115 }, { "epoch": 0.013755484406498281, "grad_norm": 3.532485779559769, "learning_rate": 5.731225296442688e-06, "loss": 1.5633, "step": 116 }, { "epoch": 0.013874066168623266, "grad_norm": 4.458237380787414, "learning_rate": 5.7806324110671936e-06, "loss": 1.1755, "step": 117 }, { "epoch": 0.013992647930748251, "grad_norm": 3.401693392114225, "learning_rate": 5.8300395256917e-06, "loss": 1.1613, "step": 118 }, { "epoch": 0.014111229692873236, "grad_norm": 3.43659936937958, "learning_rate": 5.879446640316206e-06, "loss": 1.4882, "step": 119 }, { "epoch": 0.014229811454998222, "grad_norm": 3.933037361894272, "learning_rate": 5.928853754940711e-06, "loss": 1.2865, "step": 120 }, { "epoch": 0.014348393217123206, "grad_norm": 3.326242133376254, "learning_rate": 5.978260869565218e-06, "loss": 1.2986, "step": 121 }, { "epoch": 0.014466974979248192, "grad_norm": 3.1079589329108264, "learning_rate": 6.027667984189723e-06, "loss": 1.4292, "step": 122 }, { "epoch": 0.014585556741373178, "grad_norm": 3.4170179875454467, "learning_rate": 6.07707509881423e-06, "loss": 1.4056, "step": 123 }, { "epoch": 0.014704138503498162, "grad_norm": 3.2897278731100053, "learning_rate": 6.126482213438735e-06, "loss": 0.9931, "step": 124 }, { "epoch": 0.014822720265623148, "grad_norm": 3.0168456876655223, "learning_rate": 6.175889328063241e-06, "loss": 1.4632, "step": 125 }, { "epoch": 0.014941302027748132, "grad_norm": 4.051435358425506, "learning_rate": 6.2252964426877475e-06, "loss": 1.0668, "step": 126 }, { "epoch": 0.015059883789873118, "grad_norm": 3.2358187287341686, "learning_rate": 6.274703557312253e-06, "loss": 1.4251, "step": 127 }, { "epoch": 0.015178465551998102, "grad_norm": 3.6497534834135124, "learning_rate": 6.324110671936759e-06, "loss": 1.4442, "step": 128 }, { "epoch": 0.015297047314123088, "grad_norm": 3.2445532742932106, "learning_rate": 6.373517786561266e-06, "loss": 1.284, "step": 129 }, { "epoch": 0.015415629076248072, "grad_norm": 3.6969900055784333, "learning_rate": 6.422924901185772e-06, "loss": 1.2353, "step": 130 }, { "epoch": 0.015534210838373058, "grad_norm": 3.2840007521188865, "learning_rate": 6.472332015810277e-06, "loss": 1.5767, "step": 131 }, { "epoch": 0.015652792600498042, "grad_norm": 3.359971817996501, "learning_rate": 6.521739130434783e-06, "loss": 1.4194, "step": 132 }, { "epoch": 0.01577137436262303, "grad_norm": 3.686163280364927, "learning_rate": 6.5711462450592885e-06, "loss": 1.3864, "step": 133 }, { "epoch": 0.015889956124748014, "grad_norm": 3.517698750818215, "learning_rate": 6.620553359683796e-06, "loss": 1.3154, "step": 134 }, { "epoch": 0.016008537886873, "grad_norm": 3.6938279258929088, "learning_rate": 6.6699604743083014e-06, "loss": 1.243, "step": 135 }, { "epoch": 0.016127119648997983, "grad_norm": 3.8472159360375247, "learning_rate": 6.719367588932807e-06, "loss": 1.0296, "step": 136 }, { "epoch": 0.01624570141112297, "grad_norm": 3.5873584587564187, "learning_rate": 6.768774703557313e-06, "loss": 1.3103, "step": 137 }, { "epoch": 0.016364283173247954, "grad_norm": 3.557262653873429, "learning_rate": 6.818181818181818e-06, "loss": 1.4158, "step": 138 }, { "epoch": 0.01648286493537294, "grad_norm": 3.4128391682193606, "learning_rate": 6.867588932806325e-06, "loss": 1.4815, "step": 139 }, { "epoch": 0.016601446697497926, "grad_norm": 3.5543485647000184, "learning_rate": 6.91699604743083e-06, "loss": 1.2262, "step": 140 }, { "epoch": 0.01672002845962291, "grad_norm": 4.040205557480039, "learning_rate": 6.966403162055336e-06, "loss": 1.3685, "step": 141 }, { "epoch": 0.016838610221747895, "grad_norm": 3.571608477900209, "learning_rate": 7.015810276679842e-06, "loss": 1.1649, "step": 142 }, { "epoch": 0.01695719198387288, "grad_norm": 3.5082596175908534, "learning_rate": 7.065217391304347e-06, "loss": 1.3327, "step": 143 }, { "epoch": 0.017075773745997867, "grad_norm": 3.2121355373777987, "learning_rate": 7.1146245059288545e-06, "loss": 1.3955, "step": 144 }, { "epoch": 0.01719435550812285, "grad_norm": 3.5775456145700737, "learning_rate": 7.16403162055336e-06, "loss": 1.419, "step": 145 }, { "epoch": 0.017312937270247835, "grad_norm": 3.04031831251046, "learning_rate": 7.213438735177866e-06, "loss": 1.5208, "step": 146 }, { "epoch": 0.017431519032372823, "grad_norm": 4.129023970677371, "learning_rate": 7.262845849802371e-06, "loss": 1.2054, "step": 147 }, { "epoch": 0.017550100794497807, "grad_norm": 3.052172301065618, "learning_rate": 7.312252964426877e-06, "loss": 1.5402, "step": 148 }, { "epoch": 0.01766868255662279, "grad_norm": 3.4338766812762933, "learning_rate": 7.361660079051384e-06, "loss": 1.2949, "step": 149 }, { "epoch": 0.017787264318747775, "grad_norm": 3.2861687031919553, "learning_rate": 7.41106719367589e-06, "loss": 1.4539, "step": 150 }, { "epoch": 0.017905846080872763, "grad_norm": 3.614448911565531, "learning_rate": 7.4604743083003955e-06, "loss": 1.4611, "step": 151 }, { "epoch": 0.018024427842997747, "grad_norm": 3.2149909098883334, "learning_rate": 7.509881422924901e-06, "loss": 1.0467, "step": 152 }, { "epoch": 0.01814300960512273, "grad_norm": 3.684892340896345, "learning_rate": 7.559288537549407e-06, "loss": 1.3944, "step": 153 }, { "epoch": 0.01826159136724772, "grad_norm": 3.376670522018421, "learning_rate": 7.608695652173914e-06, "loss": 1.3431, "step": 154 }, { "epoch": 0.018380173129372703, "grad_norm": 3.3011577275242083, "learning_rate": 7.65810276679842e-06, "loss": 1.2032, "step": 155 }, { "epoch": 0.018498754891497687, "grad_norm": 3.334271435215534, "learning_rate": 7.707509881422925e-06, "loss": 1.1876, "step": 156 }, { "epoch": 0.01861733665362267, "grad_norm": 3.4977331301136263, "learning_rate": 7.756916996047431e-06, "loss": 1.0686, "step": 157 }, { "epoch": 0.01873591841574766, "grad_norm": 3.616540296725585, "learning_rate": 7.806324110671937e-06, "loss": 1.1448, "step": 158 }, { "epoch": 0.018854500177872643, "grad_norm": 3.647057966520627, "learning_rate": 7.855731225296444e-06, "loss": 1.3524, "step": 159 }, { "epoch": 0.018973081939997628, "grad_norm": 3.521250636690629, "learning_rate": 7.90513833992095e-06, "loss": 1.3396, "step": 160 }, { "epoch": 0.019091663702122612, "grad_norm": 3.4665570121511724, "learning_rate": 7.954545454545455e-06, "loss": 1.4676, "step": 161 }, { "epoch": 0.0192102454642476, "grad_norm": 4.101700616924512, "learning_rate": 8.00395256916996e-06, "loss": 1.0273, "step": 162 }, { "epoch": 0.019328827226372584, "grad_norm": 3.210628877875299, "learning_rate": 8.053359683794468e-06, "loss": 1.4024, "step": 163 }, { "epoch": 0.019447408988497568, "grad_norm": 3.4017394309666957, "learning_rate": 8.102766798418974e-06, "loss": 1.3566, "step": 164 }, { "epoch": 0.019565990750622556, "grad_norm": 3.566329320796935, "learning_rate": 8.15217391304348e-06, "loss": 1.2346, "step": 165 }, { "epoch": 0.01968457251274754, "grad_norm": 3.4620705984226285, "learning_rate": 8.201581027667985e-06, "loss": 1.4025, "step": 166 }, { "epoch": 0.019803154274872524, "grad_norm": 3.145918115470483, "learning_rate": 8.25098814229249e-06, "loss": 1.3182, "step": 167 }, { "epoch": 0.019921736036997508, "grad_norm": 3.297200804982551, "learning_rate": 8.300395256916998e-06, "loss": 1.5242, "step": 168 }, { "epoch": 0.020040317799122496, "grad_norm": 3.2933296628880044, "learning_rate": 8.349802371541503e-06, "loss": 1.4143, "step": 169 }, { "epoch": 0.02015889956124748, "grad_norm": 3.1545288713113306, "learning_rate": 8.399209486166009e-06, "loss": 1.4978, "step": 170 }, { "epoch": 0.020277481323372464, "grad_norm": 3.2121797354568313, "learning_rate": 8.448616600790515e-06, "loss": 1.2417, "step": 171 }, { "epoch": 0.020396063085497452, "grad_norm": 3.4702710147199416, "learning_rate": 8.49802371541502e-06, "loss": 1.5379, "step": 172 }, { "epoch": 0.020514644847622436, "grad_norm": 3.327819772531745, "learning_rate": 8.547430830039528e-06, "loss": 1.4314, "step": 173 }, { "epoch": 0.02063322660974742, "grad_norm": 2.9313494652986996, "learning_rate": 8.596837944664033e-06, "loss": 1.494, "step": 174 }, { "epoch": 0.020751808371872404, "grad_norm": 3.3530024113440766, "learning_rate": 8.646245059288539e-06, "loss": 1.4671, "step": 175 }, { "epoch": 0.020870390133997392, "grad_norm": 3.7084244554771724, "learning_rate": 8.695652173913044e-06, "loss": 1.2303, "step": 176 }, { "epoch": 0.020988971896122376, "grad_norm": 3.4031030399903384, "learning_rate": 8.74505928853755e-06, "loss": 1.2403, "step": 177 }, { "epoch": 0.02110755365824736, "grad_norm": 3.253593424578815, "learning_rate": 8.794466403162056e-06, "loss": 1.4751, "step": 178 }, { "epoch": 0.021226135420372348, "grad_norm": 3.4444607383500707, "learning_rate": 8.843873517786561e-06, "loss": 1.2758, "step": 179 }, { "epoch": 0.021344717182497332, "grad_norm": 3.2018005057847154, "learning_rate": 8.893280632411067e-06, "loss": 1.5201, "step": 180 }, { "epoch": 0.021463298944622317, "grad_norm": 3.043906797607487, "learning_rate": 8.942687747035572e-06, "loss": 1.2787, "step": 181 }, { "epoch": 0.0215818807067473, "grad_norm": 3.1622398033223305, "learning_rate": 8.992094861660078e-06, "loss": 1.4893, "step": 182 }, { "epoch": 0.02170046246887229, "grad_norm": 3.0029573752796885, "learning_rate": 9.041501976284585e-06, "loss": 1.3394, "step": 183 }, { "epoch": 0.021819044230997273, "grad_norm": 3.4803964110259655, "learning_rate": 9.090909090909091e-06, "loss": 1.3506, "step": 184 }, { "epoch": 0.021937625993122257, "grad_norm": 3.1091340550524516, "learning_rate": 9.140316205533597e-06, "loss": 1.3211, "step": 185 }, { "epoch": 0.022056207755247245, "grad_norm": 3.317771728202137, "learning_rate": 9.189723320158102e-06, "loss": 1.3114, "step": 186 }, { "epoch": 0.02217478951737223, "grad_norm": 3.3820404696210717, "learning_rate": 9.239130434782608e-06, "loss": 1.4481, "step": 187 }, { "epoch": 0.022293371279497213, "grad_norm": 3.1482997926234413, "learning_rate": 9.288537549407115e-06, "loss": 1.0854, "step": 188 }, { "epoch": 0.022411953041622197, "grad_norm": 3.04452907062631, "learning_rate": 9.33794466403162e-06, "loss": 1.0224, "step": 189 }, { "epoch": 0.022530534803747185, "grad_norm": 3.5779674496527667, "learning_rate": 9.387351778656126e-06, "loss": 0.8139, "step": 190 }, { "epoch": 0.02264911656587217, "grad_norm": 3.1118150715574235, "learning_rate": 9.436758893280632e-06, "loss": 1.0896, "step": 191 }, { "epoch": 0.022767698327997153, "grad_norm": 3.1440109613818974, "learning_rate": 9.486166007905138e-06, "loss": 1.1657, "step": 192 }, { "epoch": 0.02288628009012214, "grad_norm": 3.5495503065498046, "learning_rate": 9.535573122529645e-06, "loss": 1.2247, "step": 193 }, { "epoch": 0.023004861852247125, "grad_norm": 3.2695848907794716, "learning_rate": 9.58498023715415e-06, "loss": 1.2869, "step": 194 }, { "epoch": 0.02312344361437211, "grad_norm": 3.619782453638658, "learning_rate": 9.634387351778656e-06, "loss": 1.1305, "step": 195 }, { "epoch": 0.023242025376497093, "grad_norm": 3.77958219860935, "learning_rate": 9.683794466403162e-06, "loss": 1.1507, "step": 196 }, { "epoch": 0.02336060713862208, "grad_norm": 3.231305756325196, "learning_rate": 9.733201581027667e-06, "loss": 1.3795, "step": 197 }, { "epoch": 0.023479188900747065, "grad_norm": 3.272144840163199, "learning_rate": 9.782608695652175e-06, "loss": 1.2991, "step": 198 }, { "epoch": 0.02359777066287205, "grad_norm": 3.2163198350672153, "learning_rate": 9.83201581027668e-06, "loss": 1.6104, "step": 199 }, { "epoch": 0.023716352424997037, "grad_norm": 2.988892808386588, "learning_rate": 9.881422924901186e-06, "loss": 1.435, "step": 200 }, { "epoch": 0.02383493418712202, "grad_norm": 3.107283218262279, "learning_rate": 9.930830039525692e-06, "loss": 1.4289, "step": 201 }, { "epoch": 0.023953515949247006, "grad_norm": 3.4543468151119154, "learning_rate": 9.980237154150199e-06, "loss": 1.2277, "step": 202 }, { "epoch": 0.02407209771137199, "grad_norm": 2.974171011998451, "learning_rate": 1.0029644268774705e-05, "loss": 1.2908, "step": 203 }, { "epoch": 0.024190679473496977, "grad_norm": 3.326076310722646, "learning_rate": 1.007905138339921e-05, "loss": 1.1085, "step": 204 }, { "epoch": 0.02430926123562196, "grad_norm": 2.794461991541987, "learning_rate": 1.0128458498023716e-05, "loss": 1.0693, "step": 205 }, { "epoch": 0.024427842997746946, "grad_norm": 2.98309651474819, "learning_rate": 1.0177865612648221e-05, "loss": 1.4743, "step": 206 }, { "epoch": 0.02454642475987193, "grad_norm": 2.736817891772091, "learning_rate": 1.0227272727272729e-05, "loss": 1.4711, "step": 207 }, { "epoch": 0.024665006521996918, "grad_norm": 3.275712278506568, "learning_rate": 1.0276679841897234e-05, "loss": 1.1462, "step": 208 }, { "epoch": 0.024783588284121902, "grad_norm": 3.6558416270782783, "learning_rate": 1.032608695652174e-05, "loss": 1.3439, "step": 209 }, { "epoch": 0.024902170046246886, "grad_norm": 3.232849042632529, "learning_rate": 1.0375494071146246e-05, "loss": 1.4936, "step": 210 }, { "epoch": 0.025020751808371874, "grad_norm": 3.233829764608336, "learning_rate": 1.0424901185770751e-05, "loss": 1.2881, "step": 211 }, { "epoch": 0.025139333570496858, "grad_norm": 3.0654443181694226, "learning_rate": 1.0474308300395258e-05, "loss": 1.1639, "step": 212 }, { "epoch": 0.025257915332621842, "grad_norm": 2.7865196739296247, "learning_rate": 1.0523715415019764e-05, "loss": 1.2722, "step": 213 }, { "epoch": 0.025376497094746826, "grad_norm": 3.115314074935528, "learning_rate": 1.057312252964427e-05, "loss": 1.5186, "step": 214 }, { "epoch": 0.025495078856871814, "grad_norm": 3.5258294570883684, "learning_rate": 1.0622529644268775e-05, "loss": 1.4668, "step": 215 }, { "epoch": 0.025613660618996798, "grad_norm": 2.931127286710215, "learning_rate": 1.0671936758893281e-05, "loss": 1.3485, "step": 216 }, { "epoch": 0.025732242381121782, "grad_norm": 3.0732311709708835, "learning_rate": 1.0721343873517788e-05, "loss": 1.2787, "step": 217 }, { "epoch": 0.02585082414324677, "grad_norm": 2.723098769970518, "learning_rate": 1.0770750988142294e-05, "loss": 1.2318, "step": 218 }, { "epoch": 0.025969405905371754, "grad_norm": 3.071806327744816, "learning_rate": 1.08201581027668e-05, "loss": 1.389, "step": 219 }, { "epoch": 0.02608798766749674, "grad_norm": 3.2375957118465037, "learning_rate": 1.0869565217391305e-05, "loss": 1.1371, "step": 220 }, { "epoch": 0.026206569429621723, "grad_norm": 3.001458532632069, "learning_rate": 1.091897233201581e-05, "loss": 1.3853, "step": 221 }, { "epoch": 0.02632515119174671, "grad_norm": 3.487329871009364, "learning_rate": 1.0968379446640318e-05, "loss": 1.095, "step": 222 }, { "epoch": 0.026443732953871694, "grad_norm": 3.1835632178903013, "learning_rate": 1.1017786561264824e-05, "loss": 1.6332, "step": 223 }, { "epoch": 0.02656231471599668, "grad_norm": 3.467523876220792, "learning_rate": 1.106719367588933e-05, "loss": 1.0143, "step": 224 }, { "epoch": 0.026680896478121666, "grad_norm": 2.911364808687225, "learning_rate": 1.1116600790513835e-05, "loss": 1.1766, "step": 225 }, { "epoch": 0.02679947824024665, "grad_norm": 3.2167562309227353, "learning_rate": 1.116600790513834e-05, "loss": 1.5045, "step": 226 }, { "epoch": 0.026918060002371635, "grad_norm": 3.4759934540061406, "learning_rate": 1.1215415019762846e-05, "loss": 1.1668, "step": 227 }, { "epoch": 0.02703664176449662, "grad_norm": 3.297419934469312, "learning_rate": 1.1264822134387352e-05, "loss": 1.3496, "step": 228 }, { "epoch": 0.027155223526621607, "grad_norm": 3.364102618831967, "learning_rate": 1.1314229249011857e-05, "loss": 1.1864, "step": 229 }, { "epoch": 0.02727380528874659, "grad_norm": 3.1796901912912996, "learning_rate": 1.1363636363636365e-05, "loss": 1.4592, "step": 230 }, { "epoch": 0.027392387050871575, "grad_norm": 2.800510376598655, "learning_rate": 1.141304347826087e-05, "loss": 1.2089, "step": 231 }, { "epoch": 0.027510968812996563, "grad_norm": 4.005827348979385, "learning_rate": 1.1462450592885376e-05, "loss": 1.2746, "step": 232 }, { "epoch": 0.027629550575121547, "grad_norm": 3.090867193648447, "learning_rate": 1.1511857707509881e-05, "loss": 1.3314, "step": 233 }, { "epoch": 0.02774813233724653, "grad_norm": 3.39522468659474, "learning_rate": 1.1561264822134387e-05, "loss": 1.2716, "step": 234 }, { "epoch": 0.027866714099371515, "grad_norm": 3.0192711391082647, "learning_rate": 1.1610671936758893e-05, "loss": 1.4077, "step": 235 }, { "epoch": 0.027985295861496503, "grad_norm": 3.2403561609241636, "learning_rate": 1.16600790513834e-05, "loss": 1.2828, "step": 236 }, { "epoch": 0.028103877623621487, "grad_norm": 3.1603540755334136, "learning_rate": 1.1709486166007906e-05, "loss": 1.1665, "step": 237 }, { "epoch": 0.02822245938574647, "grad_norm": 3.3813771701826134, "learning_rate": 1.1758893280632411e-05, "loss": 1.4781, "step": 238 }, { "epoch": 0.02834104114787146, "grad_norm": 3.0019164087983548, "learning_rate": 1.1808300395256917e-05, "loss": 1.1131, "step": 239 }, { "epoch": 0.028459622909996443, "grad_norm": 3.1578127892215218, "learning_rate": 1.1857707509881423e-05, "loss": 1.3916, "step": 240 }, { "epoch": 0.028578204672121427, "grad_norm": 2.9470404708927567, "learning_rate": 1.190711462450593e-05, "loss": 1.174, "step": 241 }, { "epoch": 0.02869678643424641, "grad_norm": 3.1050812047836174, "learning_rate": 1.1956521739130435e-05, "loss": 1.3575, "step": 242 }, { "epoch": 0.0288153681963714, "grad_norm": 3.1615698850784657, "learning_rate": 1.2005928853754941e-05, "loss": 1.2436, "step": 243 }, { "epoch": 0.028933949958496383, "grad_norm": 3.3625992795196717, "learning_rate": 1.2055335968379447e-05, "loss": 1.3217, "step": 244 }, { "epoch": 0.029052531720621368, "grad_norm": 3.3399551855723257, "learning_rate": 1.2104743083003952e-05, "loss": 1.5199, "step": 245 }, { "epoch": 0.029171113482746355, "grad_norm": 3.129576888473782, "learning_rate": 1.215415019762846e-05, "loss": 1.0011, "step": 246 }, { "epoch": 0.02928969524487134, "grad_norm": 3.335255382333624, "learning_rate": 1.2203557312252965e-05, "loss": 1.4456, "step": 247 }, { "epoch": 0.029408277006996324, "grad_norm": 2.9507200211167444, "learning_rate": 1.225296442687747e-05, "loss": 1.2832, "step": 248 }, { "epoch": 0.029526858769121308, "grad_norm": 2.854895707501369, "learning_rate": 1.2302371541501976e-05, "loss": 1.3801, "step": 249 }, { "epoch": 0.029645440531246296, "grad_norm": 2.9750434725073496, "learning_rate": 1.2351778656126482e-05, "loss": 1.4296, "step": 250 }, { "epoch": 0.02976402229337128, "grad_norm": 2.858011029472128, "learning_rate": 1.240118577075099e-05, "loss": 1.3666, "step": 251 }, { "epoch": 0.029882604055496264, "grad_norm": 2.89441377368366, "learning_rate": 1.2450592885375495e-05, "loss": 1.1648, "step": 252 }, { "epoch": 0.030001185817621248, "grad_norm": 2.794815472722316, "learning_rate": 1.25e-05, "loss": 1.5307, "step": 253 }, { "epoch": 0.030119767579746236, "grad_norm": 2.9336641981417286, "learning_rate": 1.2549407114624506e-05, "loss": 1.1884, "step": 254 }, { "epoch": 0.03023834934187122, "grad_norm": 2.939555396418759, "learning_rate": 1.2598814229249012e-05, "loss": 1.236, "step": 255 }, { "epoch": 0.030356931103996204, "grad_norm": 3.02904822723086, "learning_rate": 1.2648221343873517e-05, "loss": 1.1735, "step": 256 }, { "epoch": 0.030475512866121192, "grad_norm": 3.2708882494099565, "learning_rate": 1.2697628458498023e-05, "loss": 1.5093, "step": 257 }, { "epoch": 0.030594094628246176, "grad_norm": 2.8132740117639696, "learning_rate": 1.2747035573122532e-05, "loss": 0.991, "step": 258 }, { "epoch": 0.03071267639037116, "grad_norm": 3.628748609206158, "learning_rate": 1.2796442687747038e-05, "loss": 1.4408, "step": 259 }, { "epoch": 0.030831258152496144, "grad_norm": 2.690392885950218, "learning_rate": 1.2845849802371543e-05, "loss": 1.4896, "step": 260 }, { "epoch": 0.030949839914621132, "grad_norm": 2.7253767001875304, "learning_rate": 1.2895256916996049e-05, "loss": 1.44, "step": 261 }, { "epoch": 0.031068421676746116, "grad_norm": 2.996097305741969, "learning_rate": 1.2944664031620555e-05, "loss": 1.215, "step": 262 }, { "epoch": 0.0311870034388711, "grad_norm": 3.0305816572516777, "learning_rate": 1.299407114624506e-05, "loss": 1.3951, "step": 263 }, { "epoch": 0.031305585200996085, "grad_norm": 2.813865807960468, "learning_rate": 1.3043478260869566e-05, "loss": 1.3024, "step": 264 }, { "epoch": 0.03142416696312107, "grad_norm": 3.17286501915085, "learning_rate": 1.3092885375494071e-05, "loss": 1.3738, "step": 265 }, { "epoch": 0.03154274872524606, "grad_norm": 2.91800033507398, "learning_rate": 1.3142292490118577e-05, "loss": 1.2549, "step": 266 }, { "epoch": 0.03166133048737104, "grad_norm": 3.0355879144815265, "learning_rate": 1.3191699604743083e-05, "loss": 1.5729, "step": 267 }, { "epoch": 0.03177991224949603, "grad_norm": 3.268922250348648, "learning_rate": 1.3241106719367592e-05, "loss": 1.1893, "step": 268 }, { "epoch": 0.031898494011621016, "grad_norm": 3.588822855096603, "learning_rate": 1.3290513833992097e-05, "loss": 1.1747, "step": 269 }, { "epoch": 0.032017075773746, "grad_norm": 2.9715923441486374, "learning_rate": 1.3339920948616603e-05, "loss": 1.0844, "step": 270 }, { "epoch": 0.032135657535870985, "grad_norm": 2.9028381236258163, "learning_rate": 1.3389328063241108e-05, "loss": 1.328, "step": 271 }, { "epoch": 0.032254239297995965, "grad_norm": 3.0676383468952255, "learning_rate": 1.3438735177865614e-05, "loss": 1.4886, "step": 272 }, { "epoch": 0.03237282106012095, "grad_norm": 3.1607629298318454, "learning_rate": 1.348814229249012e-05, "loss": 1.4551, "step": 273 }, { "epoch": 0.03249140282224594, "grad_norm": 2.939682519335106, "learning_rate": 1.3537549407114625e-05, "loss": 1.3138, "step": 274 }, { "epoch": 0.03260998458437092, "grad_norm": 3.1490689635709477, "learning_rate": 1.3586956521739131e-05, "loss": 1.364, "step": 275 }, { "epoch": 0.03272856634649591, "grad_norm": 3.228298996345851, "learning_rate": 1.3636363636363637e-05, "loss": 1.0217, "step": 276 }, { "epoch": 0.0328471481086209, "grad_norm": 3.4447793508579405, "learning_rate": 1.3685770750988142e-05, "loss": 1.3476, "step": 277 }, { "epoch": 0.03296572987074588, "grad_norm": 3.6130432267633124, "learning_rate": 1.373517786561265e-05, "loss": 1.1731, "step": 278 }, { "epoch": 0.033084311632870865, "grad_norm": 3.42749570332953, "learning_rate": 1.3784584980237155e-05, "loss": 1.3049, "step": 279 }, { "epoch": 0.03320289339499585, "grad_norm": 2.7633062234667114, "learning_rate": 1.383399209486166e-05, "loss": 1.4279, "step": 280 }, { "epoch": 0.03332147515712083, "grad_norm": 2.81269753352603, "learning_rate": 1.3883399209486166e-05, "loss": 1.3276, "step": 281 }, { "epoch": 0.03344005691924582, "grad_norm": 2.992052958959111, "learning_rate": 1.3932806324110672e-05, "loss": 1.273, "step": 282 }, { "epoch": 0.0335586386813708, "grad_norm": 2.8044042641908282, "learning_rate": 1.3982213438735178e-05, "loss": 1.29, "step": 283 }, { "epoch": 0.03367722044349579, "grad_norm": 3.0718594133503485, "learning_rate": 1.4031620553359683e-05, "loss": 1.3044, "step": 284 }, { "epoch": 0.03379580220562078, "grad_norm": 3.007170895230799, "learning_rate": 1.4081027667984189e-05, "loss": 1.2988, "step": 285 }, { "epoch": 0.03391438396774576, "grad_norm": 3.0035641600216296, "learning_rate": 1.4130434782608694e-05, "loss": 1.3672, "step": 286 }, { "epoch": 0.034032965729870746, "grad_norm": 2.879150954864047, "learning_rate": 1.4179841897233202e-05, "loss": 1.3355, "step": 287 }, { "epoch": 0.03415154749199573, "grad_norm": 2.7608409030046612, "learning_rate": 1.4229249011857709e-05, "loss": 1.4106, "step": 288 }, { "epoch": 0.034270129254120714, "grad_norm": 3.120967959460571, "learning_rate": 1.4278656126482215e-05, "loss": 1.3414, "step": 289 }, { "epoch": 0.0343887110162457, "grad_norm": 3.29530761471481, "learning_rate": 1.432806324110672e-05, "loss": 1.4086, "step": 290 }, { "epoch": 0.03450729277837069, "grad_norm": 2.77052267374074, "learning_rate": 1.4377470355731226e-05, "loss": 1.2666, "step": 291 }, { "epoch": 0.03462587454049567, "grad_norm": 2.8929161211593755, "learning_rate": 1.4426877470355732e-05, "loss": 1.2327, "step": 292 }, { "epoch": 0.03474445630262066, "grad_norm": 2.6688492013211835, "learning_rate": 1.4476284584980237e-05, "loss": 1.1343, "step": 293 }, { "epoch": 0.034863038064745645, "grad_norm": 2.8700171016090184, "learning_rate": 1.4525691699604743e-05, "loss": 1.3562, "step": 294 }, { "epoch": 0.034981619826870626, "grad_norm": 3.07740375797054, "learning_rate": 1.4575098814229248e-05, "loss": 1.4666, "step": 295 }, { "epoch": 0.035100201588995614, "grad_norm": 2.715821803820841, "learning_rate": 1.4624505928853754e-05, "loss": 1.2865, "step": 296 }, { "epoch": 0.035218783351120594, "grad_norm": 2.6460045131291086, "learning_rate": 1.4673913043478263e-05, "loss": 1.5349, "step": 297 }, { "epoch": 0.03533736511324558, "grad_norm": 2.790848655126991, "learning_rate": 1.4723320158102769e-05, "loss": 1.0609, "step": 298 }, { "epoch": 0.03545594687537057, "grad_norm": 3.0207164786220453, "learning_rate": 1.4772727272727274e-05, "loss": 1.4353, "step": 299 }, { "epoch": 0.03557452863749555, "grad_norm": 2.8118512131019666, "learning_rate": 1.482213438735178e-05, "loss": 0.8828, "step": 300 }, { "epoch": 0.03569311039962054, "grad_norm": 2.4199788367860027, "learning_rate": 1.4871541501976285e-05, "loss": 1.2519, "step": 301 }, { "epoch": 0.035811692161745526, "grad_norm": 2.830151537521618, "learning_rate": 1.4920948616600791e-05, "loss": 1.1018, "step": 302 }, { "epoch": 0.03593027392387051, "grad_norm": 3.1894980583872172, "learning_rate": 1.4970355731225297e-05, "loss": 1.1274, "step": 303 }, { "epoch": 0.036048855685995494, "grad_norm": 2.934860539549872, "learning_rate": 1.5019762845849802e-05, "loss": 1.3366, "step": 304 }, { "epoch": 0.03616743744812048, "grad_norm": 3.005695626896576, "learning_rate": 1.5069169960474308e-05, "loss": 1.2271, "step": 305 }, { "epoch": 0.03628601921024546, "grad_norm": 3.062027378007026, "learning_rate": 1.5118577075098814e-05, "loss": 1.2645, "step": 306 }, { "epoch": 0.03640460097237045, "grad_norm": 3.1159478259058675, "learning_rate": 1.5167984189723323e-05, "loss": 1.4309, "step": 307 }, { "epoch": 0.03652318273449544, "grad_norm": 2.8160745677737067, "learning_rate": 1.5217391304347828e-05, "loss": 1.4065, "step": 308 }, { "epoch": 0.03664176449662042, "grad_norm": 3.1389496736732228, "learning_rate": 1.5266798418972334e-05, "loss": 1.4968, "step": 309 }, { "epoch": 0.036760346258745406, "grad_norm": 2.995121412938753, "learning_rate": 1.531620553359684e-05, "loss": 1.0021, "step": 310 }, { "epoch": 0.03687892802087039, "grad_norm": 3.0908821774613187, "learning_rate": 1.5365612648221345e-05, "loss": 1.1323, "step": 311 }, { "epoch": 0.036997509782995375, "grad_norm": 2.632724885137674, "learning_rate": 1.541501976284585e-05, "loss": 1.1269, "step": 312 }, { "epoch": 0.03711609154512036, "grad_norm": 2.6593956631496374, "learning_rate": 1.5464426877470356e-05, "loss": 1.3245, "step": 313 }, { "epoch": 0.03723467330724534, "grad_norm": 2.8490044181183682, "learning_rate": 1.5513833992094862e-05, "loss": 1.3662, "step": 314 }, { "epoch": 0.03735325506937033, "grad_norm": 2.901610373823375, "learning_rate": 1.5563241106719367e-05, "loss": 1.1619, "step": 315 }, { "epoch": 0.03747183683149532, "grad_norm": 2.899104145631089, "learning_rate": 1.5612648221343873e-05, "loss": 1.1575, "step": 316 }, { "epoch": 0.0375904185936203, "grad_norm": 3.1200289361112645, "learning_rate": 1.5662055335968382e-05, "loss": 1.0384, "step": 317 }, { "epoch": 0.03770900035574529, "grad_norm": 2.7294800265354238, "learning_rate": 1.5711462450592888e-05, "loss": 0.9991, "step": 318 }, { "epoch": 0.037827582117870275, "grad_norm": 3.07139464376348, "learning_rate": 1.5760869565217393e-05, "loss": 1.3464, "step": 319 }, { "epoch": 0.037946163879995255, "grad_norm": 2.8207673297276306, "learning_rate": 1.58102766798419e-05, "loss": 1.1102, "step": 320 }, { "epoch": 0.03806474564212024, "grad_norm": 2.708080661837101, "learning_rate": 1.5859683794466405e-05, "loss": 1.2179, "step": 321 }, { "epoch": 0.038183327404245224, "grad_norm": 2.4727939501155034, "learning_rate": 1.590909090909091e-05, "loss": 1.0123, "step": 322 }, { "epoch": 0.03830190916637021, "grad_norm": 3.3053693464665748, "learning_rate": 1.5958498023715416e-05, "loss": 1.2476, "step": 323 }, { "epoch": 0.0384204909284952, "grad_norm": 2.550640356902746, "learning_rate": 1.600790513833992e-05, "loss": 1.1672, "step": 324 }, { "epoch": 0.03853907269062018, "grad_norm": 2.531894398368546, "learning_rate": 1.6057312252964427e-05, "loss": 1.3367, "step": 325 }, { "epoch": 0.03865765445274517, "grad_norm": 2.8996648833907175, "learning_rate": 1.6106719367588936e-05, "loss": 1.3594, "step": 326 }, { "epoch": 0.038776236214870155, "grad_norm": 3.2383767106258246, "learning_rate": 1.615612648221344e-05, "loss": 0.9857, "step": 327 }, { "epoch": 0.038894817976995136, "grad_norm": 2.931043829552459, "learning_rate": 1.6205533596837947e-05, "loss": 1.2328, "step": 328 }, { "epoch": 0.03901339973912012, "grad_norm": 2.568735956061358, "learning_rate": 1.6254940711462453e-05, "loss": 1.229, "step": 329 }, { "epoch": 0.03913198150124511, "grad_norm": 2.7393826944786426, "learning_rate": 1.630434782608696e-05, "loss": 1.2744, "step": 330 }, { "epoch": 0.03925056326337009, "grad_norm": 2.8854648571402017, "learning_rate": 1.6353754940711464e-05, "loss": 1.1236, "step": 331 }, { "epoch": 0.03936914502549508, "grad_norm": 2.75479551720139, "learning_rate": 1.640316205533597e-05, "loss": 1.429, "step": 332 }, { "epoch": 0.03948772678762007, "grad_norm": 3.1814163306804795, "learning_rate": 1.6452569169960475e-05, "loss": 1.2706, "step": 333 }, { "epoch": 0.03960630854974505, "grad_norm": 3.0679336409926017, "learning_rate": 1.650197628458498e-05, "loss": 1.3427, "step": 334 }, { "epoch": 0.039724890311870036, "grad_norm": 2.833714022158204, "learning_rate": 1.6551383399209487e-05, "loss": 1.2048, "step": 335 }, { "epoch": 0.039843472073995016, "grad_norm": 2.811441805328102, "learning_rate": 1.6600790513833996e-05, "loss": 1.3839, "step": 336 }, { "epoch": 0.039962053836120004, "grad_norm": 2.8783084562160934, "learning_rate": 1.66501976284585e-05, "loss": 1.4425, "step": 337 }, { "epoch": 0.04008063559824499, "grad_norm": 2.869220738097393, "learning_rate": 1.6699604743083007e-05, "loss": 1.2402, "step": 338 }, { "epoch": 0.04019921736036997, "grad_norm": 2.4912980928093034, "learning_rate": 1.6749011857707512e-05, "loss": 1.4056, "step": 339 }, { "epoch": 0.04031779912249496, "grad_norm": 2.8780374896546905, "learning_rate": 1.6798418972332018e-05, "loss": 1.5192, "step": 340 }, { "epoch": 0.04043638088461995, "grad_norm": 2.483578857441582, "learning_rate": 1.6847826086956524e-05, "loss": 1.4183, "step": 341 }, { "epoch": 0.04055496264674493, "grad_norm": 2.848729534490631, "learning_rate": 1.689723320158103e-05, "loss": 1.154, "step": 342 }, { "epoch": 0.040673544408869916, "grad_norm": 2.978625663186514, "learning_rate": 1.6946640316205535e-05, "loss": 1.2166, "step": 343 }, { "epoch": 0.040792126170994904, "grad_norm": 3.0852779736945655, "learning_rate": 1.699604743083004e-05, "loss": 1.392, "step": 344 }, { "epoch": 0.040910707933119884, "grad_norm": 2.8000682430258, "learning_rate": 1.7045454545454546e-05, "loss": 1.1834, "step": 345 }, { "epoch": 0.04102928969524487, "grad_norm": 3.471516636774333, "learning_rate": 1.7094861660079055e-05, "loss": 1.2817, "step": 346 }, { "epoch": 0.04114787145736986, "grad_norm": 2.958564646878144, "learning_rate": 1.714426877470356e-05, "loss": 1.1575, "step": 347 }, { "epoch": 0.04126645321949484, "grad_norm": 2.7306316127406682, "learning_rate": 1.7193675889328066e-05, "loss": 1.3868, "step": 348 }, { "epoch": 0.04138503498161983, "grad_norm": 2.6542113314104716, "learning_rate": 1.7243083003952572e-05, "loss": 1.0978, "step": 349 }, { "epoch": 0.04150361674374481, "grad_norm": 3.0212890662894334, "learning_rate": 1.7292490118577078e-05, "loss": 1.0651, "step": 350 }, { "epoch": 0.0416221985058698, "grad_norm": 2.589383615461841, "learning_rate": 1.7341897233201583e-05, "loss": 1.3817, "step": 351 }, { "epoch": 0.041740780267994784, "grad_norm": 2.7158519772469196, "learning_rate": 1.739130434782609e-05, "loss": 1.2833, "step": 352 }, { "epoch": 0.041859362030119765, "grad_norm": 2.3988236994354692, "learning_rate": 1.7440711462450594e-05, "loss": 1.5628, "step": 353 }, { "epoch": 0.04197794379224475, "grad_norm": 2.6525156123483264, "learning_rate": 1.74901185770751e-05, "loss": 1.155, "step": 354 }, { "epoch": 0.04209652555436974, "grad_norm": 2.746334648905017, "learning_rate": 1.7539525691699606e-05, "loss": 1.2261, "step": 355 }, { "epoch": 0.04221510731649472, "grad_norm": 2.9308772233452474, "learning_rate": 1.758893280632411e-05, "loss": 1.3595, "step": 356 }, { "epoch": 0.04233368907861971, "grad_norm": 3.2317202838323134, "learning_rate": 1.7638339920948617e-05, "loss": 1.1708, "step": 357 }, { "epoch": 0.042452270840744696, "grad_norm": 2.7751069380383964, "learning_rate": 1.7687747035573123e-05, "loss": 1.4414, "step": 358 }, { "epoch": 0.04257085260286968, "grad_norm": 3.2992330129634166, "learning_rate": 1.7737154150197628e-05, "loss": 1.3721, "step": 359 }, { "epoch": 0.042689434364994665, "grad_norm": 2.6879681968900226, "learning_rate": 1.7786561264822134e-05, "loss": 1.3715, "step": 360 }, { "epoch": 0.04280801612711965, "grad_norm": 2.5248425908653944, "learning_rate": 1.783596837944664e-05, "loss": 1.3598, "step": 361 }, { "epoch": 0.04292659788924463, "grad_norm": 2.768215996766254, "learning_rate": 1.7885375494071145e-05, "loss": 1.2683, "step": 362 }, { "epoch": 0.04304517965136962, "grad_norm": 2.5064096555244366, "learning_rate": 1.793478260869565e-05, "loss": 1.5114, "step": 363 }, { "epoch": 0.0431637614134946, "grad_norm": 2.6248952213619723, "learning_rate": 1.7984189723320156e-05, "loss": 1.0202, "step": 364 }, { "epoch": 0.04328234317561959, "grad_norm": 2.7129786968153313, "learning_rate": 1.8033596837944665e-05, "loss": 1.5015, "step": 365 }, { "epoch": 0.04340092493774458, "grad_norm": 2.7623959890852228, "learning_rate": 1.808300395256917e-05, "loss": 1.3076, "step": 366 }, { "epoch": 0.04351950669986956, "grad_norm": 2.49062022415326, "learning_rate": 1.8132411067193676e-05, "loss": 1.0397, "step": 367 }, { "epoch": 0.043638088461994545, "grad_norm": 2.701721090939951, "learning_rate": 1.8181818181818182e-05, "loss": 1.2362, "step": 368 }, { "epoch": 0.04375667022411953, "grad_norm": 2.693200520797649, "learning_rate": 1.8231225296442688e-05, "loss": 1.3149, "step": 369 }, { "epoch": 0.043875251986244514, "grad_norm": 2.370850380411606, "learning_rate": 1.8280632411067193e-05, "loss": 1.0036, "step": 370 }, { "epoch": 0.0439938337483695, "grad_norm": 3.09354127304867, "learning_rate": 1.83300395256917e-05, "loss": 1.0679, "step": 371 }, { "epoch": 0.04411241551049449, "grad_norm": 2.4601969145644618, "learning_rate": 1.8379446640316205e-05, "loss": 1.0765, "step": 372 }, { "epoch": 0.04423099727261947, "grad_norm": 2.6708841028139467, "learning_rate": 1.842885375494071e-05, "loss": 1.2483, "step": 373 }, { "epoch": 0.04434957903474446, "grad_norm": 2.8173795559657973, "learning_rate": 1.8478260869565216e-05, "loss": 1.4145, "step": 374 }, { "epoch": 0.04446816079686944, "grad_norm": 2.7968738061251455, "learning_rate": 1.8527667984189725e-05, "loss": 1.337, "step": 375 }, { "epoch": 0.044586742558994426, "grad_norm": 2.6730911336625147, "learning_rate": 1.857707509881423e-05, "loss": 1.1814, "step": 376 }, { "epoch": 0.04470532432111941, "grad_norm": 2.2466398240845074, "learning_rate": 1.8626482213438736e-05, "loss": 1.3667, "step": 377 }, { "epoch": 0.044823906083244394, "grad_norm": 2.8047732189056256, "learning_rate": 1.867588932806324e-05, "loss": 1.3818, "step": 378 }, { "epoch": 0.04494248784536938, "grad_norm": 3.1374057943405527, "learning_rate": 1.8725296442687747e-05, "loss": 1.3987, "step": 379 }, { "epoch": 0.04506106960749437, "grad_norm": 2.5800989925266578, "learning_rate": 1.8774703557312253e-05, "loss": 1.3251, "step": 380 }, { "epoch": 0.04517965136961935, "grad_norm": 2.98255007678058, "learning_rate": 1.882411067193676e-05, "loss": 1.3252, "step": 381 }, { "epoch": 0.04529823313174434, "grad_norm": 2.5866644842917115, "learning_rate": 1.8873517786561264e-05, "loss": 1.2671, "step": 382 }, { "epoch": 0.045416814893869326, "grad_norm": 2.452559032895594, "learning_rate": 1.892292490118577e-05, "loss": 1.2186, "step": 383 }, { "epoch": 0.045535396655994306, "grad_norm": 2.192440791600819, "learning_rate": 1.8972332015810275e-05, "loss": 1.4327, "step": 384 }, { "epoch": 0.045653978418119294, "grad_norm": 2.5565570508693742, "learning_rate": 1.9021739130434784e-05, "loss": 1.2116, "step": 385 }, { "epoch": 0.04577256018024428, "grad_norm": 2.4046860349727965, "learning_rate": 1.907114624505929e-05, "loss": 1.4138, "step": 386 }, { "epoch": 0.04589114194236926, "grad_norm": 3.087776738481778, "learning_rate": 1.9120553359683796e-05, "loss": 1.1523, "step": 387 }, { "epoch": 0.04600972370449425, "grad_norm": 2.40347872311201, "learning_rate": 1.91699604743083e-05, "loss": 1.2957, "step": 388 }, { "epoch": 0.04612830546661923, "grad_norm": 2.577403784881223, "learning_rate": 1.9219367588932807e-05, "loss": 1.2516, "step": 389 }, { "epoch": 0.04624688722874422, "grad_norm": 3.627663315251857, "learning_rate": 1.9268774703557312e-05, "loss": 1.1396, "step": 390 }, { "epoch": 0.046365468990869206, "grad_norm": 2.904375696802533, "learning_rate": 1.9318181818181818e-05, "loss": 1.2148, "step": 391 }, { "epoch": 0.04648405075299419, "grad_norm": 2.6248782159696846, "learning_rate": 1.9367588932806324e-05, "loss": 1.2488, "step": 392 }, { "epoch": 0.046602632515119174, "grad_norm": 2.946821241101097, "learning_rate": 1.941699604743083e-05, "loss": 1.3025, "step": 393 }, { "epoch": 0.04672121427724416, "grad_norm": 2.5918187772174686, "learning_rate": 1.9466403162055335e-05, "loss": 1.0872, "step": 394 }, { "epoch": 0.04683979603936914, "grad_norm": 3.0203386230678806, "learning_rate": 1.9515810276679844e-05, "loss": 1.3952, "step": 395 }, { "epoch": 0.04695837780149413, "grad_norm": 2.725483459510198, "learning_rate": 1.956521739130435e-05, "loss": 1.3247, "step": 396 }, { "epoch": 0.04707695956361912, "grad_norm": 2.6259583528738006, "learning_rate": 1.9614624505928855e-05, "loss": 1.2225, "step": 397 }, { "epoch": 0.0471955413257441, "grad_norm": 2.698273418177609, "learning_rate": 1.966403162055336e-05, "loss": 1.1325, "step": 398 }, { "epoch": 0.04731412308786909, "grad_norm": 2.530249329574852, "learning_rate": 1.9713438735177866e-05, "loss": 1.0368, "step": 399 }, { "epoch": 0.047432704849994074, "grad_norm": 2.7126273436438573, "learning_rate": 1.9762845849802372e-05, "loss": 1.2787, "step": 400 }, { "epoch": 0.047551286612119055, "grad_norm": 2.8273910219060485, "learning_rate": 1.9812252964426878e-05, "loss": 1.3649, "step": 401 }, { "epoch": 0.04766986837424404, "grad_norm": 2.453252721776185, "learning_rate": 1.9861660079051383e-05, "loss": 1.2477, "step": 402 }, { "epoch": 0.04778845013636902, "grad_norm": 2.4934203363558427, "learning_rate": 1.991106719367589e-05, "loss": 1.3257, "step": 403 }, { "epoch": 0.04790703189849401, "grad_norm": 2.630789345122103, "learning_rate": 1.9960474308300398e-05, "loss": 1.4981, "step": 404 }, { "epoch": 0.048025613660619, "grad_norm": 2.5627331187175164, "learning_rate": 2.0009881422924903e-05, "loss": 1.1666, "step": 405 }, { "epoch": 0.04814419542274398, "grad_norm": 2.5838119959030243, "learning_rate": 2.005928853754941e-05, "loss": 1.0082, "step": 406 }, { "epoch": 0.04826277718486897, "grad_norm": 2.918813289254095, "learning_rate": 2.0108695652173915e-05, "loss": 1.2295, "step": 407 }, { "epoch": 0.048381358946993955, "grad_norm": 2.7532907852622728, "learning_rate": 2.015810276679842e-05, "loss": 1.3116, "step": 408 }, { "epoch": 0.048499940709118936, "grad_norm": 2.6796675143457414, "learning_rate": 2.0207509881422926e-05, "loss": 1.4023, "step": 409 }, { "epoch": 0.04861852247124392, "grad_norm": 2.843202942987257, "learning_rate": 2.025691699604743e-05, "loss": 1.2611, "step": 410 }, { "epoch": 0.04873710423336891, "grad_norm": 2.534438516774595, "learning_rate": 2.0306324110671937e-05, "loss": 1.2428, "step": 411 }, { "epoch": 0.04885568599549389, "grad_norm": 2.6450572993182013, "learning_rate": 2.0355731225296443e-05, "loss": 1.271, "step": 412 }, { "epoch": 0.04897426775761888, "grad_norm": 2.487767986910595, "learning_rate": 2.040513833992095e-05, "loss": 1.4073, "step": 413 }, { "epoch": 0.04909284951974386, "grad_norm": 2.779679036561318, "learning_rate": 2.0454545454545457e-05, "loss": 1.2182, "step": 414 }, { "epoch": 0.04921143128186885, "grad_norm": 2.5826758599738158, "learning_rate": 2.0503952569169963e-05, "loss": 1.3293, "step": 415 }, { "epoch": 0.049330013043993835, "grad_norm": 2.679500276811846, "learning_rate": 2.055335968379447e-05, "loss": 1.3586, "step": 416 }, { "epoch": 0.049448594806118816, "grad_norm": 2.5570392799908066, "learning_rate": 2.0602766798418974e-05, "loss": 1.4393, "step": 417 }, { "epoch": 0.049567176568243804, "grad_norm": 2.663263864974672, "learning_rate": 2.065217391304348e-05, "loss": 1.1841, "step": 418 }, { "epoch": 0.04968575833036879, "grad_norm": 2.747566745937756, "learning_rate": 2.0701581027667985e-05, "loss": 1.2873, "step": 419 }, { "epoch": 0.04980434009249377, "grad_norm": 2.633546314291064, "learning_rate": 2.075098814229249e-05, "loss": 1.2836, "step": 420 }, { "epoch": 0.04992292185461876, "grad_norm": 2.7890223401303724, "learning_rate": 2.0800395256916997e-05, "loss": 1.1734, "step": 421 }, { "epoch": 0.05004150361674375, "grad_norm": 2.8685974366873372, "learning_rate": 2.0849802371541502e-05, "loss": 1.416, "step": 422 }, { "epoch": 0.05016008537886873, "grad_norm": 2.1011346736711665, "learning_rate": 2.0899209486166008e-05, "loss": 1.5013, "step": 423 }, { "epoch": 0.050278667140993716, "grad_norm": 2.493094272852067, "learning_rate": 2.0948616600790517e-05, "loss": 1.1525, "step": 424 }, { "epoch": 0.050397248903118703, "grad_norm": 2.508612821617293, "learning_rate": 2.0998023715415023e-05, "loss": 1.2973, "step": 425 }, { "epoch": 0.050515830665243684, "grad_norm": 2.5260690991520383, "learning_rate": 2.1047430830039528e-05, "loss": 1.2556, "step": 426 }, { "epoch": 0.05063441242736867, "grad_norm": 2.3555352058181285, "learning_rate": 2.1096837944664034e-05, "loss": 1.1786, "step": 427 }, { "epoch": 0.05075299418949365, "grad_norm": 2.89919894739498, "learning_rate": 2.114624505928854e-05, "loss": 1.4252, "step": 428 }, { "epoch": 0.05087157595161864, "grad_norm": 2.4757561248996245, "learning_rate": 2.1195652173913045e-05, "loss": 1.107, "step": 429 }, { "epoch": 0.05099015771374363, "grad_norm": 2.682947988089475, "learning_rate": 2.124505928853755e-05, "loss": 1.1692, "step": 430 }, { "epoch": 0.05110873947586861, "grad_norm": 2.4097428191628185, "learning_rate": 2.1294466403162056e-05, "loss": 1.1052, "step": 431 }, { "epoch": 0.051227321237993596, "grad_norm": 2.7409542482830416, "learning_rate": 2.1343873517786562e-05, "loss": 0.9055, "step": 432 }, { "epoch": 0.051345903000118584, "grad_norm": 2.5372125088557453, "learning_rate": 2.1393280632411067e-05, "loss": 1.2053, "step": 433 }, { "epoch": 0.051464484762243565, "grad_norm": 2.762055197192633, "learning_rate": 2.1442687747035576e-05, "loss": 1.298, "step": 434 }, { "epoch": 0.05158306652436855, "grad_norm": 2.3011982487989764, "learning_rate": 2.1492094861660082e-05, "loss": 1.0514, "step": 435 }, { "epoch": 0.05170164828649354, "grad_norm": 2.5255779203953117, "learning_rate": 2.1541501976284588e-05, "loss": 1.264, "step": 436 }, { "epoch": 0.05182023004861852, "grad_norm": 2.435216582525352, "learning_rate": 2.1590909090909093e-05, "loss": 0.7867, "step": 437 }, { "epoch": 0.05193881181074351, "grad_norm": 2.353394507643216, "learning_rate": 2.16403162055336e-05, "loss": 1.2825, "step": 438 }, { "epoch": 0.052057393572868496, "grad_norm": 2.5313845209200934, "learning_rate": 2.1689723320158105e-05, "loss": 1.3053, "step": 439 }, { "epoch": 0.05217597533499348, "grad_norm": 2.5154295218017158, "learning_rate": 2.173913043478261e-05, "loss": 1.1763, "step": 440 }, { "epoch": 0.052294557097118465, "grad_norm": 2.3197559609956264, "learning_rate": 2.1788537549407116e-05, "loss": 1.2474, "step": 441 }, { "epoch": 0.052413138859243445, "grad_norm": 2.4039543623357287, "learning_rate": 2.183794466403162e-05, "loss": 1.1774, "step": 442 }, { "epoch": 0.05253172062136843, "grad_norm": 2.7607236114077995, "learning_rate": 2.188735177865613e-05, "loss": 1.2824, "step": 443 }, { "epoch": 0.05265030238349342, "grad_norm": 2.326240623762275, "learning_rate": 2.1936758893280636e-05, "loss": 1.3178, "step": 444 }, { "epoch": 0.0527688841456184, "grad_norm": 2.2615910131927093, "learning_rate": 2.198616600790514e-05, "loss": 1.4656, "step": 445 }, { "epoch": 0.05288746590774339, "grad_norm": 2.3310789269739747, "learning_rate": 2.2035573122529647e-05, "loss": 1.2193, "step": 446 }, { "epoch": 0.05300604766986838, "grad_norm": 2.2586959253526326, "learning_rate": 2.2084980237154153e-05, "loss": 1.1863, "step": 447 }, { "epoch": 0.05312462943199336, "grad_norm": 2.382749475739641, "learning_rate": 2.213438735177866e-05, "loss": 1.1248, "step": 448 }, { "epoch": 0.053243211194118345, "grad_norm": 2.138637898312669, "learning_rate": 2.2183794466403164e-05, "loss": 1.2273, "step": 449 }, { "epoch": 0.05336179295624333, "grad_norm": 3.4037441782123965, "learning_rate": 2.223320158102767e-05, "loss": 1.3639, "step": 450 }, { "epoch": 0.05348037471836831, "grad_norm": 2.580456951357517, "learning_rate": 2.2282608695652175e-05, "loss": 1.0896, "step": 451 }, { "epoch": 0.0535989564804933, "grad_norm": 2.7394744607809596, "learning_rate": 2.233201581027668e-05, "loss": 1.1623, "step": 452 }, { "epoch": 0.05371753824261829, "grad_norm": 2.653299356035562, "learning_rate": 2.2381422924901187e-05, "loss": 1.267, "step": 453 }, { "epoch": 0.05383612000474327, "grad_norm": 2.500084480235046, "learning_rate": 2.2430830039525692e-05, "loss": 1.0464, "step": 454 }, { "epoch": 0.05395470176686826, "grad_norm": 2.314329189943909, "learning_rate": 2.2480237154150198e-05, "loss": 1.45, "step": 455 }, { "epoch": 0.05407328352899324, "grad_norm": 2.4125235643241596, "learning_rate": 2.2529644268774703e-05, "loss": 1.0963, "step": 456 }, { "epoch": 0.054191865291118226, "grad_norm": 2.3471230259516203, "learning_rate": 2.257905138339921e-05, "loss": 1.1924, "step": 457 }, { "epoch": 0.05431044705324321, "grad_norm": 2.3298907769978063, "learning_rate": 2.2628458498023715e-05, "loss": 1.123, "step": 458 }, { "epoch": 0.054429028815368194, "grad_norm": 2.4931427271189066, "learning_rate": 2.267786561264822e-05, "loss": 0.9861, "step": 459 }, { "epoch": 0.05454761057749318, "grad_norm": 2.7180656690207163, "learning_rate": 2.272727272727273e-05, "loss": 1.481, "step": 460 }, { "epoch": 0.05466619233961817, "grad_norm": 2.3575846367746567, "learning_rate": 2.2776679841897235e-05, "loss": 0.9426, "step": 461 }, { "epoch": 0.05478477410174315, "grad_norm": 2.5332645841909343, "learning_rate": 2.282608695652174e-05, "loss": 1.312, "step": 462 }, { "epoch": 0.05490335586386814, "grad_norm": 2.555982982982781, "learning_rate": 2.2875494071146246e-05, "loss": 1.1929, "step": 463 }, { "epoch": 0.055021937625993125, "grad_norm": 2.40072650974613, "learning_rate": 2.2924901185770752e-05, "loss": 1.2516, "step": 464 }, { "epoch": 0.055140519388118106, "grad_norm": 2.3778537022151767, "learning_rate": 2.2974308300395257e-05, "loss": 1.4855, "step": 465 }, { "epoch": 0.055259101150243094, "grad_norm": 2.5340798929760866, "learning_rate": 2.3023715415019763e-05, "loss": 1.4211, "step": 466 }, { "epoch": 0.055377682912368074, "grad_norm": 2.477747414468036, "learning_rate": 2.307312252964427e-05, "loss": 1.262, "step": 467 }, { "epoch": 0.05549626467449306, "grad_norm": 2.500063974888124, "learning_rate": 2.3122529644268774e-05, "loss": 1.2145, "step": 468 }, { "epoch": 0.05561484643661805, "grad_norm": 2.275696839469494, "learning_rate": 2.317193675889328e-05, "loss": 1.4108, "step": 469 }, { "epoch": 0.05573342819874303, "grad_norm": 2.28745630330474, "learning_rate": 2.3221343873517785e-05, "loss": 1.1709, "step": 470 }, { "epoch": 0.05585200996086802, "grad_norm": 2.420649632953623, "learning_rate": 2.327075098814229e-05, "loss": 1.4523, "step": 471 }, { "epoch": 0.055970591722993006, "grad_norm": 2.6885323833495303, "learning_rate": 2.33201581027668e-05, "loss": 1.4183, "step": 472 }, { "epoch": 0.05608917348511799, "grad_norm": 2.4894451378739184, "learning_rate": 2.3369565217391306e-05, "loss": 1.1483, "step": 473 }, { "epoch": 0.056207755247242974, "grad_norm": 2.313024376563048, "learning_rate": 2.341897233201581e-05, "loss": 1.3377, "step": 474 }, { "epoch": 0.05632633700936796, "grad_norm": 2.294353848201621, "learning_rate": 2.3468379446640317e-05, "loss": 1.3604, "step": 475 }, { "epoch": 0.05644491877149294, "grad_norm": 2.1362500600365713, "learning_rate": 2.3517786561264823e-05, "loss": 1.181, "step": 476 }, { "epoch": 0.05656350053361793, "grad_norm": 2.563769832874949, "learning_rate": 2.3567193675889328e-05, "loss": 1.2985, "step": 477 }, { "epoch": 0.05668208229574292, "grad_norm": 2.9297674376484006, "learning_rate": 2.3616600790513834e-05, "loss": 1.5035, "step": 478 }, { "epoch": 0.0568006640578679, "grad_norm": 2.618748600000825, "learning_rate": 2.366600790513834e-05, "loss": 1.379, "step": 479 }, { "epoch": 0.056919245819992886, "grad_norm": 2.382478210556736, "learning_rate": 2.3715415019762845e-05, "loss": 1.0395, "step": 480 }, { "epoch": 0.05703782758211787, "grad_norm": 2.5129405838238714, "learning_rate": 2.376482213438735e-05, "loss": 1.1808, "step": 481 }, { "epoch": 0.057156409344242855, "grad_norm": 2.133649214639615, "learning_rate": 2.381422924901186e-05, "loss": 1.2312, "step": 482 }, { "epoch": 0.05727499110636784, "grad_norm": 2.5288196179800493, "learning_rate": 2.3863636363636365e-05, "loss": 1.331, "step": 483 }, { "epoch": 0.05739357286849282, "grad_norm": 2.6499928563696824, "learning_rate": 2.391304347826087e-05, "loss": 1.1671, "step": 484 }, { "epoch": 0.05751215463061781, "grad_norm": 2.3145140816381433, "learning_rate": 2.3962450592885376e-05, "loss": 1.3676, "step": 485 }, { "epoch": 0.0576307363927428, "grad_norm": 2.4904999874462903, "learning_rate": 2.4011857707509882e-05, "loss": 1.3468, "step": 486 }, { "epoch": 0.05774931815486778, "grad_norm": 2.2397738579272843, "learning_rate": 2.4061264822134388e-05, "loss": 1.2489, "step": 487 }, { "epoch": 0.05786789991699277, "grad_norm": 2.3628444614984994, "learning_rate": 2.4110671936758893e-05, "loss": 1.2156, "step": 488 }, { "epoch": 0.057986481679117755, "grad_norm": 2.469237961456168, "learning_rate": 2.41600790513834e-05, "loss": 1.1843, "step": 489 }, { "epoch": 0.058105063441242735, "grad_norm": 2.515446717209009, "learning_rate": 2.4209486166007905e-05, "loss": 1.2417, "step": 490 }, { "epoch": 0.05822364520336772, "grad_norm": 2.7227544170275872, "learning_rate": 2.425889328063241e-05, "loss": 1.4287, "step": 491 }, { "epoch": 0.05834222696549271, "grad_norm": 2.4335756707662086, "learning_rate": 2.430830039525692e-05, "loss": 1.2592, "step": 492 }, { "epoch": 0.05846080872761769, "grad_norm": 2.548147958053396, "learning_rate": 2.4357707509881425e-05, "loss": 1.2616, "step": 493 }, { "epoch": 0.05857939048974268, "grad_norm": 2.252372966105802, "learning_rate": 2.440711462450593e-05, "loss": 1.4302, "step": 494 }, { "epoch": 0.05869797225186766, "grad_norm": 2.4237053246414058, "learning_rate": 2.4456521739130436e-05, "loss": 1.284, "step": 495 }, { "epoch": 0.05881655401399265, "grad_norm": 2.4890989881758285, "learning_rate": 2.450592885375494e-05, "loss": 1.326, "step": 496 }, { "epoch": 0.058935135776117635, "grad_norm": 2.419802188364975, "learning_rate": 2.4555335968379447e-05, "loss": 1.4267, "step": 497 }, { "epoch": 0.059053717538242616, "grad_norm": 2.407643603680517, "learning_rate": 2.4604743083003953e-05, "loss": 1.5514, "step": 498 }, { "epoch": 0.0591722993003676, "grad_norm": 3.1014669788604237, "learning_rate": 2.465415019762846e-05, "loss": 1.1942, "step": 499 }, { "epoch": 0.05929088106249259, "grad_norm": 2.4503206694521635, "learning_rate": 2.4703557312252964e-05, "loss": 1.1499, "step": 500 }, { "epoch": 0.05940946282461757, "grad_norm": 2.3932687139334203, "learning_rate": 2.475296442687747e-05, "loss": 1.1471, "step": 501 }, { "epoch": 0.05952804458674256, "grad_norm": 2.277081619942885, "learning_rate": 2.480237154150198e-05, "loss": 1.2769, "step": 502 }, { "epoch": 0.05964662634886755, "grad_norm": 2.655722352609263, "learning_rate": 2.4851778656126484e-05, "loss": 1.116, "step": 503 }, { "epoch": 0.05976520811099253, "grad_norm": 2.264160588330383, "learning_rate": 2.490118577075099e-05, "loss": 1.3582, "step": 504 }, { "epoch": 0.059883789873117516, "grad_norm": 2.993580899931676, "learning_rate": 2.4950592885375496e-05, "loss": 1.2383, "step": 505 }, { "epoch": 0.060002371635242496, "grad_norm": 2.1615567728250693, "learning_rate": 2.5e-05, "loss": 1.3259, "step": 506 }, { "epoch": 0.060120953397367484, "grad_norm": 2.4218557620019143, "learning_rate": 2.5049407114624507e-05, "loss": 1.2555, "step": 507 }, { "epoch": 0.06023953515949247, "grad_norm": 2.601143530731072, "learning_rate": 2.5098814229249012e-05, "loss": 1.0902, "step": 508 }, { "epoch": 0.06035811692161745, "grad_norm": 2.4329648100540986, "learning_rate": 2.5148221343873518e-05, "loss": 1.4724, "step": 509 }, { "epoch": 0.06047669868374244, "grad_norm": 2.299271869439856, "learning_rate": 2.5197628458498024e-05, "loss": 1.3814, "step": 510 }, { "epoch": 0.06059528044586743, "grad_norm": 2.4631391845178703, "learning_rate": 2.524703557312253e-05, "loss": 1.3259, "step": 511 }, { "epoch": 0.06071386220799241, "grad_norm": 2.394392906763372, "learning_rate": 2.5296442687747035e-05, "loss": 1.2841, "step": 512 }, { "epoch": 0.060832443970117396, "grad_norm": 2.340144421472407, "learning_rate": 2.534584980237154e-05, "loss": 1.1505, "step": 513 }, { "epoch": 0.060951025732242384, "grad_norm": 2.3297309759024047, "learning_rate": 2.5395256916996046e-05, "loss": 1.2812, "step": 514 }, { "epoch": 0.061069607494367364, "grad_norm": 2.270780261398767, "learning_rate": 2.5444664031620552e-05, "loss": 1.1226, "step": 515 }, { "epoch": 0.06118818925649235, "grad_norm": 2.2020257546475297, "learning_rate": 2.5494071146245064e-05, "loss": 1.1403, "step": 516 }, { "epoch": 0.06130677101861734, "grad_norm": 2.441677977405497, "learning_rate": 2.554347826086957e-05, "loss": 1.1804, "step": 517 }, { "epoch": 0.06142535278074232, "grad_norm": 2.4882519208622336, "learning_rate": 2.5592885375494075e-05, "loss": 1.1411, "step": 518 }, { "epoch": 0.06154393454286731, "grad_norm": 2.0790833616196047, "learning_rate": 2.564229249011858e-05, "loss": 1.3033, "step": 519 }, { "epoch": 0.06166251630499229, "grad_norm": 2.0819502918020913, "learning_rate": 2.5691699604743087e-05, "loss": 1.273, "step": 520 }, { "epoch": 0.06178109806711728, "grad_norm": 2.2168657280508373, "learning_rate": 2.5741106719367592e-05, "loss": 1.2951, "step": 521 }, { "epoch": 0.061899679829242264, "grad_norm": 2.4205586808550716, "learning_rate": 2.5790513833992098e-05, "loss": 1.3411, "step": 522 }, { "epoch": 0.062018261591367245, "grad_norm": 2.0972680213728694, "learning_rate": 2.5839920948616603e-05, "loss": 1.2263, "step": 523 }, { "epoch": 0.06213684335349223, "grad_norm": 2.365153783525565, "learning_rate": 2.588932806324111e-05, "loss": 1.0177, "step": 524 }, { "epoch": 0.06225542511561722, "grad_norm": 2.052962479136888, "learning_rate": 2.5938735177865615e-05, "loss": 1.3229, "step": 525 }, { "epoch": 0.0623740068777422, "grad_norm": 2.561629781307026, "learning_rate": 2.598814229249012e-05, "loss": 1.4536, "step": 526 }, { "epoch": 0.06249258863986719, "grad_norm": 2.2803189711579734, "learning_rate": 2.6037549407114626e-05, "loss": 1.4335, "step": 527 }, { "epoch": 0.06261117040199217, "grad_norm": 2.347795278790725, "learning_rate": 2.608695652173913e-05, "loss": 1.5287, "step": 528 }, { "epoch": 0.06272975216411716, "grad_norm": 2.138694960674397, "learning_rate": 2.6136363636363637e-05, "loss": 1.54, "step": 529 }, { "epoch": 0.06284833392624214, "grad_norm": 2.2938027333713453, "learning_rate": 2.6185770750988143e-05, "loss": 1.2111, "step": 530 }, { "epoch": 0.06296691568836713, "grad_norm": 2.4954802735517307, "learning_rate": 2.623517786561265e-05, "loss": 1.1311, "step": 531 }, { "epoch": 0.06308549745049212, "grad_norm": 2.5123695054399477, "learning_rate": 2.6284584980237154e-05, "loss": 1.1936, "step": 532 }, { "epoch": 0.0632040792126171, "grad_norm": 2.240803724579135, "learning_rate": 2.633399209486166e-05, "loss": 1.0283, "step": 533 }, { "epoch": 0.06332266097474208, "grad_norm": 2.5397508254841075, "learning_rate": 2.6383399209486165e-05, "loss": 1.072, "step": 534 }, { "epoch": 0.06344124273686708, "grad_norm": 2.1343889089665016, "learning_rate": 2.643280632411067e-05, "loss": 1.5775, "step": 535 }, { "epoch": 0.06355982449899206, "grad_norm": 2.1018512408543115, "learning_rate": 2.6482213438735183e-05, "loss": 1.0445, "step": 536 }, { "epoch": 0.06367840626111704, "grad_norm": 2.262576385937686, "learning_rate": 2.653162055335969e-05, "loss": 1.3332, "step": 537 }, { "epoch": 0.06379698802324203, "grad_norm": 2.595962190337002, "learning_rate": 2.6581027667984194e-05, "loss": 1.2729, "step": 538 }, { "epoch": 0.06391556978536701, "grad_norm": 2.3214648614472257, "learning_rate": 2.66304347826087e-05, "loss": 1.3461, "step": 539 }, { "epoch": 0.064034151547492, "grad_norm": 2.33212808719731, "learning_rate": 2.6679841897233206e-05, "loss": 1.4264, "step": 540 }, { "epoch": 0.06415273330961697, "grad_norm": 2.551232093868217, "learning_rate": 2.672924901185771e-05, "loss": 1.2352, "step": 541 }, { "epoch": 0.06427131507174197, "grad_norm": 2.126277756022061, "learning_rate": 2.6778656126482217e-05, "loss": 1.3146, "step": 542 }, { "epoch": 0.06438989683386695, "grad_norm": 2.6343895844534204, "learning_rate": 2.6828063241106723e-05, "loss": 1.2963, "step": 543 }, { "epoch": 0.06450847859599193, "grad_norm": 3.1269497889316864, "learning_rate": 2.6877470355731228e-05, "loss": 1.4217, "step": 544 }, { "epoch": 0.06462706035811693, "grad_norm": 2.209949180971256, "learning_rate": 2.6926877470355734e-05, "loss": 1.2183, "step": 545 }, { "epoch": 0.0647456421202419, "grad_norm": 2.2784093704226214, "learning_rate": 2.697628458498024e-05, "loss": 1.2003, "step": 546 }, { "epoch": 0.06486422388236689, "grad_norm": 2.0827375183550543, "learning_rate": 2.7025691699604745e-05, "loss": 1.1671, "step": 547 }, { "epoch": 0.06498280564449188, "grad_norm": 2.0615295255055517, "learning_rate": 2.707509881422925e-05, "loss": 1.388, "step": 548 }, { "epoch": 0.06510138740661686, "grad_norm": 2.158957186779704, "learning_rate": 2.7124505928853756e-05, "loss": 1.149, "step": 549 }, { "epoch": 0.06521996916874184, "grad_norm": 2.3494861863352487, "learning_rate": 2.7173913043478262e-05, "loss": 1.3205, "step": 550 }, { "epoch": 0.06533855093086684, "grad_norm": 2.338850055804069, "learning_rate": 2.7223320158102767e-05, "loss": 1.1631, "step": 551 }, { "epoch": 0.06545713269299182, "grad_norm": 2.200781494549883, "learning_rate": 2.7272727272727273e-05, "loss": 1.338, "step": 552 }, { "epoch": 0.0655757144551168, "grad_norm": 2.3502168701130386, "learning_rate": 2.732213438735178e-05, "loss": 1.2998, "step": 553 }, { "epoch": 0.0656942962172418, "grad_norm": 2.4202855750037924, "learning_rate": 2.7371541501976284e-05, "loss": 1.2907, "step": 554 }, { "epoch": 0.06581287797936677, "grad_norm": 2.3234784157195643, "learning_rate": 2.7420948616600793e-05, "loss": 1.278, "step": 555 }, { "epoch": 0.06593145974149175, "grad_norm": 2.4446211192847582, "learning_rate": 2.74703557312253e-05, "loss": 1.3567, "step": 556 }, { "epoch": 0.06605004150361675, "grad_norm": 2.2866458271720886, "learning_rate": 2.7519762845849805e-05, "loss": 0.9767, "step": 557 }, { "epoch": 0.06616862326574173, "grad_norm": 2.609262746765853, "learning_rate": 2.756916996047431e-05, "loss": 1.443, "step": 558 }, { "epoch": 0.06628720502786671, "grad_norm": 2.3740859707682826, "learning_rate": 2.7618577075098816e-05, "loss": 1.4475, "step": 559 }, { "epoch": 0.0664057867899917, "grad_norm": 2.5762621742943557, "learning_rate": 2.766798418972332e-05, "loss": 1.3917, "step": 560 }, { "epoch": 0.06652436855211669, "grad_norm": 2.1722378179101027, "learning_rate": 2.7717391304347827e-05, "loss": 1.0666, "step": 561 }, { "epoch": 0.06664295031424167, "grad_norm": 2.2609664887850993, "learning_rate": 2.7766798418972333e-05, "loss": 1.332, "step": 562 }, { "epoch": 0.06676153207636666, "grad_norm": 2.190577586592334, "learning_rate": 2.7816205533596838e-05, "loss": 1.165, "step": 563 }, { "epoch": 0.06688011383849164, "grad_norm": 2.3310695181091603, "learning_rate": 2.7865612648221344e-05, "loss": 1.296, "step": 564 }, { "epoch": 0.06699869560061662, "grad_norm": 2.913174220423938, "learning_rate": 2.791501976284585e-05, "loss": 1.3252, "step": 565 }, { "epoch": 0.0671172773627416, "grad_norm": 2.3723399662015074, "learning_rate": 2.7964426877470355e-05, "loss": 1.1765, "step": 566 }, { "epoch": 0.0672358591248666, "grad_norm": 2.2321120851721448, "learning_rate": 2.801383399209486e-05, "loss": 1.2669, "step": 567 }, { "epoch": 0.06735444088699158, "grad_norm": 2.348018589720416, "learning_rate": 2.8063241106719366e-05, "loss": 1.3448, "step": 568 }, { "epoch": 0.06747302264911656, "grad_norm": 2.2628298531037547, "learning_rate": 2.8112648221343872e-05, "loss": 1.175, "step": 569 }, { "epoch": 0.06759160441124155, "grad_norm": 2.9745657121416373, "learning_rate": 2.8162055335968378e-05, "loss": 1.4164, "step": 570 }, { "epoch": 0.06771018617336654, "grad_norm": 2.1799016155552278, "learning_rate": 2.8211462450592883e-05, "loss": 1.3606, "step": 571 }, { "epoch": 0.06782876793549152, "grad_norm": 2.295860594362138, "learning_rate": 2.826086956521739e-05, "loss": 1.109, "step": 572 }, { "epoch": 0.06794734969761651, "grad_norm": 2.3734313084286085, "learning_rate": 2.8310276679841894e-05, "loss": 1.1759, "step": 573 }, { "epoch": 0.06806593145974149, "grad_norm": 2.550689100853419, "learning_rate": 2.8359683794466403e-05, "loss": 1.2211, "step": 574 }, { "epoch": 0.06818451322186647, "grad_norm": 2.2278869913563395, "learning_rate": 2.8409090909090912e-05, "loss": 1.1187, "step": 575 }, { "epoch": 0.06830309498399147, "grad_norm": 2.1600829534343617, "learning_rate": 2.8458498023715418e-05, "loss": 1.047, "step": 576 }, { "epoch": 0.06842167674611645, "grad_norm": 2.2318769989219014, "learning_rate": 2.8507905138339924e-05, "loss": 1.1978, "step": 577 }, { "epoch": 0.06854025850824143, "grad_norm": 2.2365189898352846, "learning_rate": 2.855731225296443e-05, "loss": 1.0909, "step": 578 }, { "epoch": 0.06865884027036642, "grad_norm": 2.386323796617713, "learning_rate": 2.8606719367588935e-05, "loss": 1.1331, "step": 579 }, { "epoch": 0.0687774220324914, "grad_norm": 2.3502834224507154, "learning_rate": 2.865612648221344e-05, "loss": 1.4039, "step": 580 }, { "epoch": 0.06889600379461638, "grad_norm": 1.9468222794647423, "learning_rate": 2.8705533596837946e-05, "loss": 1.2398, "step": 581 }, { "epoch": 0.06901458555674138, "grad_norm": 2.1965793446547215, "learning_rate": 2.8754940711462452e-05, "loss": 1.1226, "step": 582 }, { "epoch": 0.06913316731886636, "grad_norm": 2.1468281290600175, "learning_rate": 2.8804347826086957e-05, "loss": 1.2343, "step": 583 }, { "epoch": 0.06925174908099134, "grad_norm": 2.1819282632068484, "learning_rate": 2.8853754940711463e-05, "loss": 1.0693, "step": 584 }, { "epoch": 0.06937033084311633, "grad_norm": 2.5568229728407696, "learning_rate": 2.890316205533597e-05, "loss": 1.2834, "step": 585 }, { "epoch": 0.06948891260524132, "grad_norm": 2.2426630336916493, "learning_rate": 2.8952569169960474e-05, "loss": 1.3073, "step": 586 }, { "epoch": 0.0696074943673663, "grad_norm": 2.184080807208477, "learning_rate": 2.900197628458498e-05, "loss": 1.3569, "step": 587 }, { "epoch": 0.06972607612949129, "grad_norm": 2.153072527328173, "learning_rate": 2.9051383399209485e-05, "loss": 1.316, "step": 588 }, { "epoch": 0.06984465789161627, "grad_norm": 2.20119147513914, "learning_rate": 2.910079051383399e-05, "loss": 1.353, "step": 589 }, { "epoch": 0.06996323965374125, "grad_norm": 2.1995812868490416, "learning_rate": 2.9150197628458497e-05, "loss": 0.8626, "step": 590 }, { "epoch": 0.07008182141586625, "grad_norm": 2.63430958551922, "learning_rate": 2.9199604743083002e-05, "loss": 1.1934, "step": 591 }, { "epoch": 0.07020040317799123, "grad_norm": 2.1543968578695405, "learning_rate": 2.9249011857707508e-05, "loss": 0.9758, "step": 592 }, { "epoch": 0.07031898494011621, "grad_norm": 2.577090617768706, "learning_rate": 2.9298418972332014e-05, "loss": 1.2178, "step": 593 }, { "epoch": 0.07043756670224119, "grad_norm": 2.1960636756491163, "learning_rate": 2.9347826086956526e-05, "loss": 1.3568, "step": 594 }, { "epoch": 0.07055614846436618, "grad_norm": 2.5635171843913875, "learning_rate": 2.939723320158103e-05, "loss": 1.162, "step": 595 }, { "epoch": 0.07067473022649116, "grad_norm": 2.258402073980407, "learning_rate": 2.9446640316205537e-05, "loss": 1.1793, "step": 596 }, { "epoch": 0.07079331198861614, "grad_norm": 2.339126866973791, "learning_rate": 2.9496047430830043e-05, "loss": 1.3893, "step": 597 }, { "epoch": 0.07091189375074114, "grad_norm": 2.3165493505539008, "learning_rate": 2.954545454545455e-05, "loss": 1.1491, "step": 598 }, { "epoch": 0.07103047551286612, "grad_norm": 2.080024361942203, "learning_rate": 2.9594861660079054e-05, "loss": 0.832, "step": 599 }, { "epoch": 0.0711490572749911, "grad_norm": 2.059472479536438, "learning_rate": 2.964426877470356e-05, "loss": 1.2426, "step": 600 }, { "epoch": 0.0712676390371161, "grad_norm": 2.5679202315236536, "learning_rate": 2.9693675889328065e-05, "loss": 1.2791, "step": 601 }, { "epoch": 0.07138622079924108, "grad_norm": 1.9662487086267828, "learning_rate": 2.974308300395257e-05, "loss": 1.2761, "step": 602 }, { "epoch": 0.07150480256136606, "grad_norm": 2.0401467865885805, "learning_rate": 2.9792490118577076e-05, "loss": 1.347, "step": 603 }, { "epoch": 0.07162338432349105, "grad_norm": 2.1185551056585896, "learning_rate": 2.9841897233201582e-05, "loss": 0.6096, "step": 604 }, { "epoch": 0.07174196608561603, "grad_norm": 2.241714332501538, "learning_rate": 2.9891304347826088e-05, "loss": 1.4908, "step": 605 }, { "epoch": 0.07186054784774101, "grad_norm": 2.4785543126672924, "learning_rate": 2.9940711462450593e-05, "loss": 1.336, "step": 606 }, { "epoch": 0.07197912960986601, "grad_norm": 2.3094742794243146, "learning_rate": 2.99901185770751e-05, "loss": 1.3999, "step": 607 }, { "epoch": 0.07209771137199099, "grad_norm": 2.2997951375743213, "learning_rate": 3.0039525691699605e-05, "loss": 1.4017, "step": 608 }, { "epoch": 0.07221629313411597, "grad_norm": 2.4009311508809272, "learning_rate": 3.008893280632411e-05, "loss": 1.4592, "step": 609 }, { "epoch": 0.07233487489624096, "grad_norm": 2.1131382544040815, "learning_rate": 3.0138339920948616e-05, "loss": 1.2334, "step": 610 }, { "epoch": 0.07245345665836594, "grad_norm": 2.2017485441669047, "learning_rate": 3.018774703557312e-05, "loss": 1.2462, "step": 611 }, { "epoch": 0.07257203842049093, "grad_norm": 2.0918687069268267, "learning_rate": 3.0237154150197627e-05, "loss": 1.3212, "step": 612 }, { "epoch": 0.07269062018261592, "grad_norm": 2.2171748388548305, "learning_rate": 3.0286561264822133e-05, "loss": 1.0972, "step": 613 }, { "epoch": 0.0728092019447409, "grad_norm": 2.185411100980378, "learning_rate": 3.0335968379446645e-05, "loss": 1.2474, "step": 614 }, { "epoch": 0.07292778370686588, "grad_norm": 2.112212795996242, "learning_rate": 3.038537549407115e-05, "loss": 1.4223, "step": 615 }, { "epoch": 0.07304636546899088, "grad_norm": 2.4900141329748506, "learning_rate": 3.0434782608695656e-05, "loss": 1.1955, "step": 616 }, { "epoch": 0.07316494723111586, "grad_norm": 2.1863466934201585, "learning_rate": 3.0484189723320162e-05, "loss": 1.1771, "step": 617 }, { "epoch": 0.07328352899324084, "grad_norm": 2.055606299634549, "learning_rate": 3.053359683794467e-05, "loss": 1.1907, "step": 618 }, { "epoch": 0.07340211075536582, "grad_norm": 2.351500214968349, "learning_rate": 3.058300395256917e-05, "loss": 1.2593, "step": 619 }, { "epoch": 0.07352069251749081, "grad_norm": 2.1343037304166397, "learning_rate": 3.063241106719368e-05, "loss": 1.3491, "step": 620 }, { "epoch": 0.0736392742796158, "grad_norm": 2.178001910206558, "learning_rate": 3.068181818181818e-05, "loss": 1.0337, "step": 621 }, { "epoch": 0.07375785604174077, "grad_norm": 2.513016764974934, "learning_rate": 3.073122529644269e-05, "loss": 1.3247, "step": 622 }, { "epoch": 0.07387643780386577, "grad_norm": 2.430546202910741, "learning_rate": 3.078063241106719e-05, "loss": 1.2511, "step": 623 }, { "epoch": 0.07399501956599075, "grad_norm": 2.2610743212604643, "learning_rate": 3.08300395256917e-05, "loss": 1.0712, "step": 624 }, { "epoch": 0.07411360132811573, "grad_norm": 2.086946857646669, "learning_rate": 3.0879446640316203e-05, "loss": 1.4171, "step": 625 }, { "epoch": 0.07423218309024072, "grad_norm": 2.4141481186267706, "learning_rate": 3.092885375494071e-05, "loss": 1.2962, "step": 626 }, { "epoch": 0.0743507648523657, "grad_norm": 2.020708114178066, "learning_rate": 3.0978260869565215e-05, "loss": 1.2542, "step": 627 }, { "epoch": 0.07446934661449069, "grad_norm": 2.1868188417972365, "learning_rate": 3.1027667984189724e-05, "loss": 1.1663, "step": 628 }, { "epoch": 0.07458792837661568, "grad_norm": 2.2765309833984024, "learning_rate": 3.1077075098814226e-05, "loss": 1.4439, "step": 629 }, { "epoch": 0.07470651013874066, "grad_norm": 2.1002806417238076, "learning_rate": 3.1126482213438735e-05, "loss": 1.2488, "step": 630 }, { "epoch": 0.07482509190086564, "grad_norm": 2.4328411398703973, "learning_rate": 3.117588932806324e-05, "loss": 1.1749, "step": 631 }, { "epoch": 0.07494367366299064, "grad_norm": 2.1633496603266926, "learning_rate": 3.1225296442687746e-05, "loss": 1.3822, "step": 632 }, { "epoch": 0.07506225542511562, "grad_norm": 2.394648861031172, "learning_rate": 3.1274703557312255e-05, "loss": 1.1556, "step": 633 }, { "epoch": 0.0751808371872406, "grad_norm": 2.462460018255017, "learning_rate": 3.1324110671936764e-05, "loss": 1.2045, "step": 634 }, { "epoch": 0.07529941894936559, "grad_norm": 2.1592438434065846, "learning_rate": 3.1373517786561266e-05, "loss": 1.295, "step": 635 }, { "epoch": 0.07541800071149057, "grad_norm": 2.2949609954914205, "learning_rate": 3.1422924901185775e-05, "loss": 0.8925, "step": 636 }, { "epoch": 0.07553658247361555, "grad_norm": 2.2454320674029637, "learning_rate": 3.147233201581028e-05, "loss": 1.3552, "step": 637 }, { "epoch": 0.07565516423574055, "grad_norm": 2.2503175616551663, "learning_rate": 3.152173913043479e-05, "loss": 1.3242, "step": 638 }, { "epoch": 0.07577374599786553, "grad_norm": 1.9513395050231432, "learning_rate": 3.157114624505929e-05, "loss": 1.2112, "step": 639 }, { "epoch": 0.07589232775999051, "grad_norm": 2.493416694991229, "learning_rate": 3.16205533596838e-05, "loss": 1.1913, "step": 640 }, { "epoch": 0.0760109095221155, "grad_norm": 2.403724854397969, "learning_rate": 3.16699604743083e-05, "loss": 1.4156, "step": 641 }, { "epoch": 0.07612949128424049, "grad_norm": 2.5461074275512305, "learning_rate": 3.171936758893281e-05, "loss": 1.2197, "step": 642 }, { "epoch": 0.07624807304636547, "grad_norm": 2.3293402994235373, "learning_rate": 3.176877470355731e-05, "loss": 1.1746, "step": 643 }, { "epoch": 0.07636665480849045, "grad_norm": 2.0781989564578662, "learning_rate": 3.181818181818182e-05, "loss": 1.3666, "step": 644 }, { "epoch": 0.07648523657061544, "grad_norm": 2.2313907677331697, "learning_rate": 3.186758893280632e-05, "loss": 1.5331, "step": 645 }, { "epoch": 0.07660381833274042, "grad_norm": 2.3431465246335086, "learning_rate": 3.191699604743083e-05, "loss": 1.284, "step": 646 }, { "epoch": 0.0767224000948654, "grad_norm": 2.1346601541832686, "learning_rate": 3.1966403162055334e-05, "loss": 1.0286, "step": 647 }, { "epoch": 0.0768409818569904, "grad_norm": 2.155180433637423, "learning_rate": 3.201581027667984e-05, "loss": 1.2454, "step": 648 }, { "epoch": 0.07695956361911538, "grad_norm": 1.8240945560814488, "learning_rate": 3.2065217391304345e-05, "loss": 1.3571, "step": 649 }, { "epoch": 0.07707814538124036, "grad_norm": 2.686597292158509, "learning_rate": 3.2114624505928854e-05, "loss": 1.0689, "step": 650 }, { "epoch": 0.07719672714336535, "grad_norm": 2.131642033188979, "learning_rate": 3.2164031620553356e-05, "loss": 1.2287, "step": 651 }, { "epoch": 0.07731530890549033, "grad_norm": 2.047945977270479, "learning_rate": 3.221343873517787e-05, "loss": 1.2977, "step": 652 }, { "epoch": 0.07743389066761532, "grad_norm": 2.0922079462528087, "learning_rate": 3.2262845849802374e-05, "loss": 1.3369, "step": 653 }, { "epoch": 0.07755247242974031, "grad_norm": 2.0071577271544836, "learning_rate": 3.231225296442688e-05, "loss": 0.9622, "step": 654 }, { "epoch": 0.07767105419186529, "grad_norm": 1.8106085019867801, "learning_rate": 3.2361660079051385e-05, "loss": 1.3767, "step": 655 }, { "epoch": 0.07778963595399027, "grad_norm": 2.024519122811612, "learning_rate": 3.2411067193675894e-05, "loss": 1.1097, "step": 656 }, { "epoch": 0.07790821771611527, "grad_norm": 2.2896010776180638, "learning_rate": 3.24604743083004e-05, "loss": 1.1981, "step": 657 }, { "epoch": 0.07802679947824025, "grad_norm": 2.4414534453185883, "learning_rate": 3.2509881422924906e-05, "loss": 1.1189, "step": 658 }, { "epoch": 0.07814538124036523, "grad_norm": 2.493767082887729, "learning_rate": 3.255928853754941e-05, "loss": 1.1435, "step": 659 }, { "epoch": 0.07826396300249022, "grad_norm": 2.3659242083809526, "learning_rate": 3.260869565217392e-05, "loss": 1.3759, "step": 660 }, { "epoch": 0.0783825447646152, "grad_norm": 2.0617954455206298, "learning_rate": 3.265810276679842e-05, "loss": 1.2278, "step": 661 }, { "epoch": 0.07850112652674018, "grad_norm": 2.3859410364101024, "learning_rate": 3.270750988142293e-05, "loss": 1.0266, "step": 662 }, { "epoch": 0.07861970828886518, "grad_norm": 2.453665435105549, "learning_rate": 3.275691699604743e-05, "loss": 1.3979, "step": 663 }, { "epoch": 0.07873829005099016, "grad_norm": 2.40411250015987, "learning_rate": 3.280632411067194e-05, "loss": 1.1569, "step": 664 }, { "epoch": 0.07885687181311514, "grad_norm": 2.5377065367314877, "learning_rate": 3.285573122529644e-05, "loss": 1.208, "step": 665 }, { "epoch": 0.07897545357524013, "grad_norm": 2.1587067174195322, "learning_rate": 3.290513833992095e-05, "loss": 1.1847, "step": 666 }, { "epoch": 0.07909403533736512, "grad_norm": 2.0266377451748063, "learning_rate": 3.295454545454545e-05, "loss": 1.1628, "step": 667 }, { "epoch": 0.0792126170994901, "grad_norm": 1.9103375000981677, "learning_rate": 3.300395256916996e-05, "loss": 1.1041, "step": 668 }, { "epoch": 0.07933119886161509, "grad_norm": 2.118907982214127, "learning_rate": 3.3053359683794464e-05, "loss": 1.4679, "step": 669 }, { "epoch": 0.07944978062374007, "grad_norm": 2.374562165583004, "learning_rate": 3.310276679841897e-05, "loss": 1.3944, "step": 670 }, { "epoch": 0.07956836238586505, "grad_norm": 2.017225265432069, "learning_rate": 3.3152173913043475e-05, "loss": 1.1328, "step": 671 }, { "epoch": 0.07968694414799003, "grad_norm": 2.2327847507495133, "learning_rate": 3.320158102766799e-05, "loss": 1.1816, "step": 672 }, { "epoch": 0.07980552591011503, "grad_norm": 2.334767423555494, "learning_rate": 3.325098814229249e-05, "loss": 1.32, "step": 673 }, { "epoch": 0.07992410767224001, "grad_norm": 2.095355611749733, "learning_rate": 3.3300395256917e-05, "loss": 1.0858, "step": 674 }, { "epoch": 0.08004268943436499, "grad_norm": 2.633494345017161, "learning_rate": 3.3349802371541505e-05, "loss": 1.2824, "step": 675 }, { "epoch": 0.08016127119648998, "grad_norm": 2.1373584515263873, "learning_rate": 3.3399209486166014e-05, "loss": 1.2237, "step": 676 }, { "epoch": 0.08027985295861496, "grad_norm": 2.343213875301192, "learning_rate": 3.3448616600790516e-05, "loss": 1.0752, "step": 677 }, { "epoch": 0.08039843472073994, "grad_norm": 2.115296502138324, "learning_rate": 3.3498023715415025e-05, "loss": 1.3571, "step": 678 }, { "epoch": 0.08051701648286494, "grad_norm": 2.1214780677244547, "learning_rate": 3.354743083003953e-05, "loss": 1.0668, "step": 679 }, { "epoch": 0.08063559824498992, "grad_norm": 2.1907827608732555, "learning_rate": 3.3596837944664036e-05, "loss": 1.269, "step": 680 }, { "epoch": 0.0807541800071149, "grad_norm": 1.9420783532672319, "learning_rate": 3.364624505928854e-05, "loss": 1.1561, "step": 681 }, { "epoch": 0.0808727617692399, "grad_norm": 2.2031239730886836, "learning_rate": 3.369565217391305e-05, "loss": 1.5454, "step": 682 }, { "epoch": 0.08099134353136488, "grad_norm": 2.2594219734341245, "learning_rate": 3.374505928853755e-05, "loss": 1.2732, "step": 683 }, { "epoch": 0.08110992529348986, "grad_norm": 2.1544634619454537, "learning_rate": 3.379446640316206e-05, "loss": 1.4987, "step": 684 }, { "epoch": 0.08122850705561485, "grad_norm": 2.6484524379602132, "learning_rate": 3.384387351778656e-05, "loss": 1.188, "step": 685 }, { "epoch": 0.08134708881773983, "grad_norm": 1.8930749999761514, "learning_rate": 3.389328063241107e-05, "loss": 1.0723, "step": 686 }, { "epoch": 0.08146567057986481, "grad_norm": 2.1041458788958898, "learning_rate": 3.394268774703557e-05, "loss": 1.292, "step": 687 }, { "epoch": 0.08158425234198981, "grad_norm": 2.3393629881270877, "learning_rate": 3.399209486166008e-05, "loss": 1.2795, "step": 688 }, { "epoch": 0.08170283410411479, "grad_norm": 2.246018686556954, "learning_rate": 3.404150197628458e-05, "loss": 1.2298, "step": 689 }, { "epoch": 0.08182141586623977, "grad_norm": 2.0415236458011465, "learning_rate": 3.409090909090909e-05, "loss": 1.3815, "step": 690 }, { "epoch": 0.08193999762836476, "grad_norm": 2.0050226172145584, "learning_rate": 3.41403162055336e-05, "loss": 0.827, "step": 691 }, { "epoch": 0.08205857939048974, "grad_norm": 2.0528823131532357, "learning_rate": 3.418972332015811e-05, "loss": 1.2963, "step": 692 }, { "epoch": 0.08217716115261472, "grad_norm": 2.0532336654751235, "learning_rate": 3.423913043478261e-05, "loss": 1.4726, "step": 693 }, { "epoch": 0.08229574291473972, "grad_norm": 2.013861324264989, "learning_rate": 3.428853754940712e-05, "loss": 0.9586, "step": 694 }, { "epoch": 0.0824143246768647, "grad_norm": 2.1109504634949308, "learning_rate": 3.4337944664031624e-05, "loss": 1.3651, "step": 695 }, { "epoch": 0.08253290643898968, "grad_norm": 2.355524109905185, "learning_rate": 3.438735177865613e-05, "loss": 1.3987, "step": 696 }, { "epoch": 0.08265148820111466, "grad_norm": 2.005912665626445, "learning_rate": 3.4436758893280635e-05, "loss": 1.1214, "step": 697 }, { "epoch": 0.08277006996323966, "grad_norm": 1.8824213311759646, "learning_rate": 3.4486166007905144e-05, "loss": 1.2458, "step": 698 }, { "epoch": 0.08288865172536464, "grad_norm": 2.3442890017539333, "learning_rate": 3.4535573122529646e-05, "loss": 1.5919, "step": 699 }, { "epoch": 0.08300723348748962, "grad_norm": 2.0363567825099587, "learning_rate": 3.4584980237154155e-05, "loss": 1.2358, "step": 700 }, { "epoch": 0.08312581524961461, "grad_norm": 1.908902725924768, "learning_rate": 3.463438735177866e-05, "loss": 1.3315, "step": 701 }, { "epoch": 0.0832443970117396, "grad_norm": 1.937704617549985, "learning_rate": 3.4683794466403166e-05, "loss": 1.2119, "step": 702 }, { "epoch": 0.08336297877386457, "grad_norm": 2.0252018575771142, "learning_rate": 3.473320158102767e-05, "loss": 1.1829, "step": 703 }, { "epoch": 0.08348156053598957, "grad_norm": 2.200314530837662, "learning_rate": 3.478260869565218e-05, "loss": 1.4052, "step": 704 }, { "epoch": 0.08360014229811455, "grad_norm": 1.8319711337049067, "learning_rate": 3.483201581027668e-05, "loss": 0.8856, "step": 705 }, { "epoch": 0.08371872406023953, "grad_norm": 1.8916274781789295, "learning_rate": 3.488142292490119e-05, "loss": 1.401, "step": 706 }, { "epoch": 0.08383730582236452, "grad_norm": 2.0049433218094683, "learning_rate": 3.493083003952569e-05, "loss": 1.333, "step": 707 }, { "epoch": 0.0839558875844895, "grad_norm": 2.157756764032009, "learning_rate": 3.49802371541502e-05, "loss": 1.2478, "step": 708 }, { "epoch": 0.08407446934661449, "grad_norm": 1.8445024831246304, "learning_rate": 3.50296442687747e-05, "loss": 1.1563, "step": 709 }, { "epoch": 0.08419305110873948, "grad_norm": 2.0844870894129737, "learning_rate": 3.507905138339921e-05, "loss": 0.9649, "step": 710 }, { "epoch": 0.08431163287086446, "grad_norm": 1.8975178582598577, "learning_rate": 3.512845849802372e-05, "loss": 1.0537, "step": 711 }, { "epoch": 0.08443021463298944, "grad_norm": 1.9850209175482596, "learning_rate": 3.517786561264822e-05, "loss": 1.3354, "step": 712 }, { "epoch": 0.08454879639511444, "grad_norm": 2.3859087686276848, "learning_rate": 3.522727272727273e-05, "loss": 1.3569, "step": 713 }, { "epoch": 0.08466737815723942, "grad_norm": 2.0144627936459827, "learning_rate": 3.5276679841897234e-05, "loss": 1.4431, "step": 714 }, { "epoch": 0.0847859599193644, "grad_norm": 2.2707148228503495, "learning_rate": 3.532608695652174e-05, "loss": 1.196, "step": 715 }, { "epoch": 0.08490454168148939, "grad_norm": 1.995004379337582, "learning_rate": 3.5375494071146245e-05, "loss": 1.1068, "step": 716 }, { "epoch": 0.08502312344361437, "grad_norm": 2.2688964225269035, "learning_rate": 3.5424901185770754e-05, "loss": 1.0812, "step": 717 }, { "epoch": 0.08514170520573935, "grad_norm": 2.151511985849467, "learning_rate": 3.5474308300395256e-05, "loss": 1.524, "step": 718 }, { "epoch": 0.08526028696786435, "grad_norm": 1.867339507248027, "learning_rate": 3.5523715415019765e-05, "loss": 1.0814, "step": 719 }, { "epoch": 0.08537886872998933, "grad_norm": 1.950013348043195, "learning_rate": 3.557312252964427e-05, "loss": 1.2955, "step": 720 }, { "epoch": 0.08549745049211431, "grad_norm": 2.1881143430762853, "learning_rate": 3.5622529644268777e-05, "loss": 1.0316, "step": 721 }, { "epoch": 0.0856160322542393, "grad_norm": 2.074620560669965, "learning_rate": 3.567193675889328e-05, "loss": 1.4001, "step": 722 }, { "epoch": 0.08573461401636429, "grad_norm": 2.2477059414773075, "learning_rate": 3.572134387351779e-05, "loss": 1.1642, "step": 723 }, { "epoch": 0.08585319577848927, "grad_norm": 2.1598794607792127, "learning_rate": 3.577075098814229e-05, "loss": 1.1704, "step": 724 }, { "epoch": 0.08597177754061425, "grad_norm": 2.0054288575265873, "learning_rate": 3.58201581027668e-05, "loss": 1.2874, "step": 725 }, { "epoch": 0.08609035930273924, "grad_norm": 2.018201412986407, "learning_rate": 3.58695652173913e-05, "loss": 1.2215, "step": 726 }, { "epoch": 0.08620894106486422, "grad_norm": 2.112750670485053, "learning_rate": 3.591897233201581e-05, "loss": 1.2149, "step": 727 }, { "epoch": 0.0863275228269892, "grad_norm": 2.230391385910561, "learning_rate": 3.596837944664031e-05, "loss": 1.2555, "step": 728 }, { "epoch": 0.0864461045891142, "grad_norm": 2.147636108655856, "learning_rate": 3.601778656126482e-05, "loss": 1.2996, "step": 729 }, { "epoch": 0.08656468635123918, "grad_norm": 2.1142289499374405, "learning_rate": 3.606719367588933e-05, "loss": 1.2136, "step": 730 }, { "epoch": 0.08668326811336416, "grad_norm": 2.148582425413884, "learning_rate": 3.611660079051384e-05, "loss": 1.1621, "step": 731 }, { "epoch": 0.08680184987548915, "grad_norm": 1.9720848682342424, "learning_rate": 3.616600790513834e-05, "loss": 1.2137, "step": 732 }, { "epoch": 0.08692043163761413, "grad_norm": 1.911307435494401, "learning_rate": 3.621541501976285e-05, "loss": 0.9674, "step": 733 }, { "epoch": 0.08703901339973912, "grad_norm": 2.266432678082131, "learning_rate": 3.626482213438735e-05, "loss": 1.2808, "step": 734 }, { "epoch": 0.08715759516186411, "grad_norm": 1.8934794972549702, "learning_rate": 3.631422924901186e-05, "loss": 1.2665, "step": 735 }, { "epoch": 0.08727617692398909, "grad_norm": 1.7963249591338462, "learning_rate": 3.6363636363636364e-05, "loss": 1.0023, "step": 736 }, { "epoch": 0.08739475868611407, "grad_norm": 1.9429790773681141, "learning_rate": 3.641304347826087e-05, "loss": 1.1188, "step": 737 }, { "epoch": 0.08751334044823907, "grad_norm": 2.1208827102753394, "learning_rate": 3.6462450592885375e-05, "loss": 1.3529, "step": 738 }, { "epoch": 0.08763192221036405, "grad_norm": 2.1491179885176637, "learning_rate": 3.6511857707509884e-05, "loss": 1.2563, "step": 739 }, { "epoch": 0.08775050397248903, "grad_norm": 1.9682896204386802, "learning_rate": 3.656126482213439e-05, "loss": 1.1543, "step": 740 }, { "epoch": 0.08786908573461402, "grad_norm": 2.6134864341662727, "learning_rate": 3.6610671936758896e-05, "loss": 1.3361, "step": 741 }, { "epoch": 0.087987667496739, "grad_norm": 2.84463656121695, "learning_rate": 3.66600790513834e-05, "loss": 1.5155, "step": 742 }, { "epoch": 0.08810624925886398, "grad_norm": 2.1573916437470193, "learning_rate": 3.670948616600791e-05, "loss": 1.3103, "step": 743 }, { "epoch": 0.08822483102098898, "grad_norm": 1.9971857757828326, "learning_rate": 3.675889328063241e-05, "loss": 1.3001, "step": 744 }, { "epoch": 0.08834341278311396, "grad_norm": 2.088134644103535, "learning_rate": 3.680830039525692e-05, "loss": 1.1391, "step": 745 }, { "epoch": 0.08846199454523894, "grad_norm": 1.9048938409476335, "learning_rate": 3.685770750988142e-05, "loss": 1.2182, "step": 746 }, { "epoch": 0.08858057630736393, "grad_norm": 2.140521370998425, "learning_rate": 3.690711462450593e-05, "loss": 1.0491, "step": 747 }, { "epoch": 0.08869915806948891, "grad_norm": 1.7979602980447402, "learning_rate": 3.695652173913043e-05, "loss": 1.1915, "step": 748 }, { "epoch": 0.0888177398316139, "grad_norm": 1.9023154713918125, "learning_rate": 3.700592885375494e-05, "loss": 1.173, "step": 749 }, { "epoch": 0.08893632159373888, "grad_norm": 1.9874490983714652, "learning_rate": 3.705533596837945e-05, "loss": 1.3811, "step": 750 }, { "epoch": 0.08905490335586387, "grad_norm": 2.066170615101303, "learning_rate": 3.710474308300396e-05, "loss": 1.3331, "step": 751 }, { "epoch": 0.08917348511798885, "grad_norm": 2.106846147766279, "learning_rate": 3.715415019762846e-05, "loss": 1.1641, "step": 752 }, { "epoch": 0.08929206688011383, "grad_norm": 2.010016021320213, "learning_rate": 3.720355731225297e-05, "loss": 1.2018, "step": 753 }, { "epoch": 0.08941064864223883, "grad_norm": 2.269361084684674, "learning_rate": 3.725296442687747e-05, "loss": 1.2466, "step": 754 }, { "epoch": 0.08952923040436381, "grad_norm": 2.200944492960789, "learning_rate": 3.730237154150198e-05, "loss": 1.3754, "step": 755 }, { "epoch": 0.08964781216648879, "grad_norm": 1.9588635828677123, "learning_rate": 3.735177865612648e-05, "loss": 1.2139, "step": 756 }, { "epoch": 0.08976639392861378, "grad_norm": 2.336764382832611, "learning_rate": 3.740118577075099e-05, "loss": 1.2051, "step": 757 }, { "epoch": 0.08988497569073876, "grad_norm": 2.0604024003027432, "learning_rate": 3.7450592885375494e-05, "loss": 1.5451, "step": 758 }, { "epoch": 0.09000355745286374, "grad_norm": 1.9087552143741133, "learning_rate": 3.7500000000000003e-05, "loss": 1.0638, "step": 759 }, { "epoch": 0.09012213921498874, "grad_norm": 2.1629937244115167, "learning_rate": 3.7549407114624506e-05, "loss": 1.3252, "step": 760 }, { "epoch": 0.09024072097711372, "grad_norm": 1.9272862447690944, "learning_rate": 3.7598814229249015e-05, "loss": 1.3971, "step": 761 }, { "epoch": 0.0903593027392387, "grad_norm": 1.9707413014850337, "learning_rate": 3.764822134387352e-05, "loss": 1.3212, "step": 762 }, { "epoch": 0.0904778845013637, "grad_norm": 1.9855428981452112, "learning_rate": 3.7697628458498026e-05, "loss": 1.3926, "step": 763 }, { "epoch": 0.09059646626348868, "grad_norm": 2.0081166667511017, "learning_rate": 3.774703557312253e-05, "loss": 1.1415, "step": 764 }, { "epoch": 0.09071504802561366, "grad_norm": 2.132496779620122, "learning_rate": 3.779644268774704e-05, "loss": 1.0158, "step": 765 }, { "epoch": 0.09083362978773865, "grad_norm": 1.8882261360932378, "learning_rate": 3.784584980237154e-05, "loss": 1.2197, "step": 766 }, { "epoch": 0.09095221154986363, "grad_norm": 2.072963891619346, "learning_rate": 3.789525691699605e-05, "loss": 1.0694, "step": 767 }, { "epoch": 0.09107079331198861, "grad_norm": 1.9686449742403433, "learning_rate": 3.794466403162055e-05, "loss": 1.1653, "step": 768 }, { "epoch": 0.09118937507411361, "grad_norm": 2.394420033356521, "learning_rate": 3.7994071146245066e-05, "loss": 1.2517, "step": 769 }, { "epoch": 0.09130795683623859, "grad_norm": 1.79923002967305, "learning_rate": 3.804347826086957e-05, "loss": 1.4045, "step": 770 }, { "epoch": 0.09142653859836357, "grad_norm": 2.45011876921403, "learning_rate": 3.809288537549408e-05, "loss": 1.1309, "step": 771 }, { "epoch": 0.09154512036048856, "grad_norm": 2.039062136245424, "learning_rate": 3.814229249011858e-05, "loss": 1.176, "step": 772 }, { "epoch": 0.09166370212261354, "grad_norm": 1.8993568309514166, "learning_rate": 3.819169960474309e-05, "loss": 1.4058, "step": 773 }, { "epoch": 0.09178228388473852, "grad_norm": 1.987374335073063, "learning_rate": 3.824110671936759e-05, "loss": 1.2478, "step": 774 }, { "epoch": 0.0919008656468635, "grad_norm": 1.9683668826973582, "learning_rate": 3.82905138339921e-05, "loss": 1.0807, "step": 775 }, { "epoch": 0.0920194474089885, "grad_norm": 1.726234548030342, "learning_rate": 3.83399209486166e-05, "loss": 1.1416, "step": 776 }, { "epoch": 0.09213802917111348, "grad_norm": 2.056944594881047, "learning_rate": 3.838932806324111e-05, "loss": 1.0965, "step": 777 }, { "epoch": 0.09225661093323846, "grad_norm": 2.03884930625359, "learning_rate": 3.8438735177865614e-05, "loss": 1.0829, "step": 778 }, { "epoch": 0.09237519269536346, "grad_norm": 1.9918481277966453, "learning_rate": 3.848814229249012e-05, "loss": 1.2253, "step": 779 }, { "epoch": 0.09249377445748844, "grad_norm": 1.8589113310217127, "learning_rate": 3.8537549407114625e-05, "loss": 1.4867, "step": 780 }, { "epoch": 0.09261235621961342, "grad_norm": 1.7108111605808969, "learning_rate": 3.8586956521739134e-05, "loss": 0.9607, "step": 781 }, { "epoch": 0.09273093798173841, "grad_norm": 1.9579371064233615, "learning_rate": 3.8636363636363636e-05, "loss": 1.0628, "step": 782 }, { "epoch": 0.09284951974386339, "grad_norm": 2.3083962779000644, "learning_rate": 3.8685770750988145e-05, "loss": 1.0626, "step": 783 }, { "epoch": 0.09296810150598837, "grad_norm": 2.0871321095522672, "learning_rate": 3.873517786561265e-05, "loss": 1.2407, "step": 784 }, { "epoch": 0.09308668326811337, "grad_norm": 1.830627626149258, "learning_rate": 3.8784584980237156e-05, "loss": 1.244, "step": 785 }, { "epoch": 0.09320526503023835, "grad_norm": 1.9163261155014395, "learning_rate": 3.883399209486166e-05, "loss": 1.1841, "step": 786 }, { "epoch": 0.09332384679236333, "grad_norm": 2.1374368197020917, "learning_rate": 3.888339920948617e-05, "loss": 1.1872, "step": 787 }, { "epoch": 0.09344242855448832, "grad_norm": 2.2879414968759813, "learning_rate": 3.893280632411067e-05, "loss": 1.2956, "step": 788 }, { "epoch": 0.0935610103166133, "grad_norm": 1.919613605997733, "learning_rate": 3.8982213438735186e-05, "loss": 1.0762, "step": 789 }, { "epoch": 0.09367959207873829, "grad_norm": 1.9117486478745016, "learning_rate": 3.903162055335969e-05, "loss": 1.1087, "step": 790 }, { "epoch": 0.09379817384086328, "grad_norm": 2.1692338885653166, "learning_rate": 3.90810276679842e-05, "loss": 1.3508, "step": 791 }, { "epoch": 0.09391675560298826, "grad_norm": 1.9707297484103659, "learning_rate": 3.91304347826087e-05, "loss": 1.1895, "step": 792 }, { "epoch": 0.09403533736511324, "grad_norm": 2.080641285140318, "learning_rate": 3.917984189723321e-05, "loss": 1.2609, "step": 793 }, { "epoch": 0.09415391912723824, "grad_norm": 2.0771137201593297, "learning_rate": 3.922924901185771e-05, "loss": 1.3348, "step": 794 }, { "epoch": 0.09427250088936322, "grad_norm": 2.082356002136901, "learning_rate": 3.927865612648222e-05, "loss": 1.1172, "step": 795 }, { "epoch": 0.0943910826514882, "grad_norm": 2.0592410333350677, "learning_rate": 3.932806324110672e-05, "loss": 1.2573, "step": 796 }, { "epoch": 0.09450966441361319, "grad_norm": 2.0423949289939287, "learning_rate": 3.937747035573123e-05, "loss": 1.5129, "step": 797 }, { "epoch": 0.09462824617573817, "grad_norm": 1.8842003872612643, "learning_rate": 3.942687747035573e-05, "loss": 1.2048, "step": 798 }, { "epoch": 0.09474682793786315, "grad_norm": 2.1131952328169556, "learning_rate": 3.947628458498024e-05, "loss": 1.3676, "step": 799 }, { "epoch": 0.09486540969998815, "grad_norm": 2.182430677382619, "learning_rate": 3.9525691699604744e-05, "loss": 1.1955, "step": 800 }, { "epoch": 0.09498399146211313, "grad_norm": 2.059213024896555, "learning_rate": 3.957509881422925e-05, "loss": 1.0985, "step": 801 }, { "epoch": 0.09510257322423811, "grad_norm": 1.6369766530788656, "learning_rate": 3.9624505928853755e-05, "loss": 1.1896, "step": 802 }, { "epoch": 0.09522115498636309, "grad_norm": 1.8255258387813063, "learning_rate": 3.9673913043478264e-05, "loss": 1.2477, "step": 803 }, { "epoch": 0.09533973674848809, "grad_norm": 2.162579954024575, "learning_rate": 3.9723320158102766e-05, "loss": 1.1176, "step": 804 }, { "epoch": 0.09545831851061307, "grad_norm": 2.004376525231961, "learning_rate": 3.9772727272727275e-05, "loss": 1.215, "step": 805 }, { "epoch": 0.09557690027273805, "grad_norm": 1.6962724045968693, "learning_rate": 3.982213438735178e-05, "loss": 1.1027, "step": 806 }, { "epoch": 0.09569548203486304, "grad_norm": 1.846328983639688, "learning_rate": 3.987154150197629e-05, "loss": 1.2272, "step": 807 }, { "epoch": 0.09581406379698802, "grad_norm": 2.2400765181151345, "learning_rate": 3.9920948616600796e-05, "loss": 1.3806, "step": 808 }, { "epoch": 0.095932645559113, "grad_norm": 2.3032432860820515, "learning_rate": 3.99703557312253e-05, "loss": 1.3515, "step": 809 }, { "epoch": 0.096051227321238, "grad_norm": 1.8562954221125572, "learning_rate": 4.001976284584981e-05, "loss": 1.2801, "step": 810 }, { "epoch": 0.09616980908336298, "grad_norm": 1.853668007566561, "learning_rate": 4.006916996047431e-05, "loss": 1.3078, "step": 811 }, { "epoch": 0.09628839084548796, "grad_norm": 2.1704001324965327, "learning_rate": 4.011857707509882e-05, "loss": 1.1313, "step": 812 }, { "epoch": 0.09640697260761295, "grad_norm": 2.006749582829935, "learning_rate": 4.016798418972332e-05, "loss": 1.1217, "step": 813 }, { "epoch": 0.09652555436973793, "grad_norm": 1.9572131864352607, "learning_rate": 4.021739130434783e-05, "loss": 1.2878, "step": 814 }, { "epoch": 0.09664413613186291, "grad_norm": 1.9897941710919402, "learning_rate": 4.026679841897233e-05, "loss": 1.1713, "step": 815 }, { "epoch": 0.09676271789398791, "grad_norm": 2.0169852356240012, "learning_rate": 4.031620553359684e-05, "loss": 1.304, "step": 816 }, { "epoch": 0.09688129965611289, "grad_norm": 1.8101163459236398, "learning_rate": 4.036561264822134e-05, "loss": 1.2212, "step": 817 }, { "epoch": 0.09699988141823787, "grad_norm": 2.045954558587558, "learning_rate": 4.041501976284585e-05, "loss": 1.3892, "step": 818 }, { "epoch": 0.09711846318036287, "grad_norm": 2.0244257526296154, "learning_rate": 4.0464426877470354e-05, "loss": 1.1943, "step": 819 }, { "epoch": 0.09723704494248785, "grad_norm": 1.8140753757579653, "learning_rate": 4.051383399209486e-05, "loss": 1.3084, "step": 820 }, { "epoch": 0.09735562670461283, "grad_norm": 1.8014643051122026, "learning_rate": 4.0563241106719365e-05, "loss": 1.0362, "step": 821 }, { "epoch": 0.09747420846673782, "grad_norm": 2.0596177893138563, "learning_rate": 4.0612648221343874e-05, "loss": 1.5499, "step": 822 }, { "epoch": 0.0975927902288628, "grad_norm": 1.818569622680358, "learning_rate": 4.0662055335968377e-05, "loss": 1.249, "step": 823 }, { "epoch": 0.09771137199098778, "grad_norm": 2.048847924967824, "learning_rate": 4.0711462450592886e-05, "loss": 1.4027, "step": 824 }, { "epoch": 0.09782995375311278, "grad_norm": 2.0016751634662118, "learning_rate": 4.076086956521739e-05, "loss": 1.2888, "step": 825 }, { "epoch": 0.09794853551523776, "grad_norm": 2.0902610110569513, "learning_rate": 4.08102766798419e-05, "loss": 1.2238, "step": 826 }, { "epoch": 0.09806711727736274, "grad_norm": 2.118515128947569, "learning_rate": 4.0859683794466406e-05, "loss": 1.4035, "step": 827 }, { "epoch": 0.09818569903948772, "grad_norm": 1.9549421008196457, "learning_rate": 4.0909090909090915e-05, "loss": 1.0099, "step": 828 }, { "epoch": 0.09830428080161271, "grad_norm": 1.9369676130278435, "learning_rate": 4.095849802371542e-05, "loss": 1.1756, "step": 829 }, { "epoch": 0.0984228625637377, "grad_norm": 2.0467149459912326, "learning_rate": 4.1007905138339926e-05, "loss": 1.3032, "step": 830 }, { "epoch": 0.09854144432586268, "grad_norm": 2.1771320433376014, "learning_rate": 4.105731225296443e-05, "loss": 1.2864, "step": 831 }, { "epoch": 0.09866002608798767, "grad_norm": 1.8247310544606976, "learning_rate": 4.110671936758894e-05, "loss": 1.2725, "step": 832 }, { "epoch": 0.09877860785011265, "grad_norm": 1.7473377159219128, "learning_rate": 4.115612648221344e-05, "loss": 1.252, "step": 833 }, { "epoch": 0.09889718961223763, "grad_norm": 1.8955558776420427, "learning_rate": 4.120553359683795e-05, "loss": 1.2867, "step": 834 }, { "epoch": 0.09901577137436263, "grad_norm": 1.8694595682778619, "learning_rate": 4.125494071146245e-05, "loss": 1.074, "step": 835 }, { "epoch": 0.09913435313648761, "grad_norm": 1.7139564894242536, "learning_rate": 4.130434782608696e-05, "loss": 1.188, "step": 836 }, { "epoch": 0.09925293489861259, "grad_norm": 1.6488950592388731, "learning_rate": 4.135375494071146e-05, "loss": 1.3664, "step": 837 }, { "epoch": 0.09937151666073758, "grad_norm": 1.9726394321118084, "learning_rate": 4.140316205533597e-05, "loss": 1.0221, "step": 838 }, { "epoch": 0.09949009842286256, "grad_norm": 2.0623449352578387, "learning_rate": 4.145256916996047e-05, "loss": 1.1312, "step": 839 }, { "epoch": 0.09960868018498754, "grad_norm": 1.9312461884967698, "learning_rate": 4.150197628458498e-05, "loss": 1.3727, "step": 840 }, { "epoch": 0.09972726194711254, "grad_norm": 1.9786099830436268, "learning_rate": 4.1551383399209484e-05, "loss": 1.3299, "step": 841 }, { "epoch": 0.09984584370923752, "grad_norm": 2.4660285091151515, "learning_rate": 4.160079051383399e-05, "loss": 1.3515, "step": 842 }, { "epoch": 0.0999644254713625, "grad_norm": 1.9237543580154044, "learning_rate": 4.1650197628458496e-05, "loss": 1.4227, "step": 843 }, { "epoch": 0.1000830072334875, "grad_norm": 1.7351420748317194, "learning_rate": 4.1699604743083005e-05, "loss": 0.6643, "step": 844 }, { "epoch": 0.10020158899561248, "grad_norm": 2.0591602458797316, "learning_rate": 4.174901185770751e-05, "loss": 1.404, "step": 845 }, { "epoch": 0.10032017075773746, "grad_norm": 1.8192293948305147, "learning_rate": 4.1798418972332016e-05, "loss": 1.1474, "step": 846 }, { "epoch": 0.10043875251986245, "grad_norm": 2.0611499294257922, "learning_rate": 4.1847826086956525e-05, "loss": 1.2152, "step": 847 }, { "epoch": 0.10055733428198743, "grad_norm": 2.2534487177219074, "learning_rate": 4.1897233201581034e-05, "loss": 1.0678, "step": 848 }, { "epoch": 0.10067591604411241, "grad_norm": 1.968717669879219, "learning_rate": 4.1946640316205536e-05, "loss": 1.1626, "step": 849 }, { "epoch": 0.10079449780623741, "grad_norm": 2.156380333963278, "learning_rate": 4.1996047430830045e-05, "loss": 1.0764, "step": 850 }, { "epoch": 0.10091307956836239, "grad_norm": 1.9497695815682135, "learning_rate": 4.204545454545455e-05, "loss": 1.1463, "step": 851 }, { "epoch": 0.10103166133048737, "grad_norm": 1.9879877753040118, "learning_rate": 4.2094861660079056e-05, "loss": 1.0398, "step": 852 }, { "epoch": 0.10115024309261236, "grad_norm": 2.1932365865877395, "learning_rate": 4.214426877470356e-05, "loss": 1.3112, "step": 853 }, { "epoch": 0.10126882485473734, "grad_norm": 2.2291811291446226, "learning_rate": 4.219367588932807e-05, "loss": 1.2053, "step": 854 }, { "epoch": 0.10138740661686232, "grad_norm": 2.4204808458141835, "learning_rate": 4.224308300395257e-05, "loss": 0.9389, "step": 855 }, { "epoch": 0.1015059883789873, "grad_norm": 1.887568640982617, "learning_rate": 4.229249011857708e-05, "loss": 1.3766, "step": 856 }, { "epoch": 0.1016245701411123, "grad_norm": 2.0088931764450924, "learning_rate": 4.234189723320158e-05, "loss": 1.3968, "step": 857 }, { "epoch": 0.10174315190323728, "grad_norm": 1.8863378987932518, "learning_rate": 4.239130434782609e-05, "loss": 0.9547, "step": 858 }, { "epoch": 0.10186173366536226, "grad_norm": 2.166206132233298, "learning_rate": 4.244071146245059e-05, "loss": 1.0599, "step": 859 }, { "epoch": 0.10198031542748726, "grad_norm": 1.9023084798837424, "learning_rate": 4.24901185770751e-05, "loss": 1.3391, "step": 860 }, { "epoch": 0.10209889718961224, "grad_norm": 1.966553097866335, "learning_rate": 4.2539525691699603e-05, "loss": 1.2819, "step": 861 }, { "epoch": 0.10221747895173722, "grad_norm": 1.908856504444068, "learning_rate": 4.258893280632411e-05, "loss": 1.1397, "step": 862 }, { "epoch": 0.10233606071386221, "grad_norm": 2.0220728533214576, "learning_rate": 4.2638339920948615e-05, "loss": 1.3664, "step": 863 }, { "epoch": 0.10245464247598719, "grad_norm": 1.9501631201580372, "learning_rate": 4.2687747035573124e-05, "loss": 1.2686, "step": 864 }, { "epoch": 0.10257322423811217, "grad_norm": 1.769558256501275, "learning_rate": 4.2737154150197626e-05, "loss": 1.3436, "step": 865 }, { "epoch": 0.10269180600023717, "grad_norm": 1.8578844166907498, "learning_rate": 4.2786561264822135e-05, "loss": 1.2387, "step": 866 }, { "epoch": 0.10281038776236215, "grad_norm": 1.9678988334101155, "learning_rate": 4.2835968379446644e-05, "loss": 1.4244, "step": 867 }, { "epoch": 0.10292896952448713, "grad_norm": 2.191004380786469, "learning_rate": 4.288537549407115e-05, "loss": 1.1014, "step": 868 }, { "epoch": 0.10304755128661212, "grad_norm": 2.074482620996577, "learning_rate": 4.2934782608695655e-05, "loss": 1.5526, "step": 869 }, { "epoch": 0.1031661330487371, "grad_norm": 2.2228737816081243, "learning_rate": 4.2984189723320164e-05, "loss": 1.2706, "step": 870 }, { "epoch": 0.10328471481086209, "grad_norm": 1.745876903000705, "learning_rate": 4.3033596837944666e-05, "loss": 0.9514, "step": 871 }, { "epoch": 0.10340329657298708, "grad_norm": 2.0824681483484464, "learning_rate": 4.3083003952569175e-05, "loss": 1.2042, "step": 872 }, { "epoch": 0.10352187833511206, "grad_norm": 2.0967339509318044, "learning_rate": 4.313241106719368e-05, "loss": 0.8052, "step": 873 }, { "epoch": 0.10364046009723704, "grad_norm": 1.8949877611990555, "learning_rate": 4.318181818181819e-05, "loss": 1.1511, "step": 874 }, { "epoch": 0.10375904185936204, "grad_norm": 1.9043727390088339, "learning_rate": 4.323122529644269e-05, "loss": 1.2973, "step": 875 }, { "epoch": 0.10387762362148702, "grad_norm": 2.0952638816849505, "learning_rate": 4.32806324110672e-05, "loss": 1.1198, "step": 876 }, { "epoch": 0.103996205383612, "grad_norm": 2.4468528049292, "learning_rate": 4.33300395256917e-05, "loss": 1.1874, "step": 877 }, { "epoch": 0.10411478714573699, "grad_norm": 2.2346995754120704, "learning_rate": 4.337944664031621e-05, "loss": 1.2748, "step": 878 }, { "epoch": 0.10423336890786197, "grad_norm": 1.902601337232446, "learning_rate": 4.342885375494071e-05, "loss": 1.2307, "step": 879 }, { "epoch": 0.10435195066998695, "grad_norm": 1.9487531221239764, "learning_rate": 4.347826086956522e-05, "loss": 1.2994, "step": 880 }, { "epoch": 0.10447053243211193, "grad_norm": 1.7596427645246235, "learning_rate": 4.352766798418972e-05, "loss": 1.3672, "step": 881 }, { "epoch": 0.10458911419423693, "grad_norm": 1.7590861378320122, "learning_rate": 4.357707509881423e-05, "loss": 1.2722, "step": 882 }, { "epoch": 0.10470769595636191, "grad_norm": 2.044211916033582, "learning_rate": 4.3626482213438734e-05, "loss": 0.9985, "step": 883 }, { "epoch": 0.10482627771848689, "grad_norm": 1.6468563802673024, "learning_rate": 4.367588932806324e-05, "loss": 1.1106, "step": 884 }, { "epoch": 0.10494485948061189, "grad_norm": 1.801814450984469, "learning_rate": 4.3725296442687745e-05, "loss": 1.3193, "step": 885 }, { "epoch": 0.10506344124273687, "grad_norm": 1.9961628868622643, "learning_rate": 4.377470355731226e-05, "loss": 1.0353, "step": 886 }, { "epoch": 0.10518202300486185, "grad_norm": 1.8436219413080774, "learning_rate": 4.382411067193676e-05, "loss": 1.1249, "step": 887 }, { "epoch": 0.10530060476698684, "grad_norm": 1.9741144551584124, "learning_rate": 4.387351778656127e-05, "loss": 1.1672, "step": 888 }, { "epoch": 0.10541918652911182, "grad_norm": 1.6923283503935258, "learning_rate": 4.3922924901185774e-05, "loss": 1.2347, "step": 889 }, { "epoch": 0.1055377682912368, "grad_norm": 1.6409134952018314, "learning_rate": 4.397233201581028e-05, "loss": 0.996, "step": 890 }, { "epoch": 0.1056563500533618, "grad_norm": 1.7559903407107664, "learning_rate": 4.4021739130434786e-05, "loss": 1.3053, "step": 891 }, { "epoch": 0.10577493181548678, "grad_norm": 1.7279329119648792, "learning_rate": 4.4071146245059295e-05, "loss": 1.4076, "step": 892 }, { "epoch": 0.10589351357761176, "grad_norm": 2.1244467806105707, "learning_rate": 4.41205533596838e-05, "loss": 1.4878, "step": 893 }, { "epoch": 0.10601209533973675, "grad_norm": 2.3752813238702535, "learning_rate": 4.4169960474308306e-05, "loss": 1.2124, "step": 894 }, { "epoch": 0.10613067710186173, "grad_norm": 2.05779691288018, "learning_rate": 4.421936758893281e-05, "loss": 1.2139, "step": 895 }, { "epoch": 0.10624925886398671, "grad_norm": 1.9261420575362562, "learning_rate": 4.426877470355732e-05, "loss": 0.9466, "step": 896 }, { "epoch": 0.10636784062611171, "grad_norm": 1.7298709781494437, "learning_rate": 4.431818181818182e-05, "loss": 1.2658, "step": 897 }, { "epoch": 0.10648642238823669, "grad_norm": 1.658945611223593, "learning_rate": 4.436758893280633e-05, "loss": 1.2954, "step": 898 }, { "epoch": 0.10660500415036167, "grad_norm": 1.8339472691663428, "learning_rate": 4.441699604743083e-05, "loss": 1.1393, "step": 899 }, { "epoch": 0.10672358591248667, "grad_norm": 1.9285292451385885, "learning_rate": 4.446640316205534e-05, "loss": 1.3713, "step": 900 }, { "epoch": 0.10684216767461165, "grad_norm": 2.1134731542458156, "learning_rate": 4.451581027667984e-05, "loss": 1.1754, "step": 901 }, { "epoch": 0.10696074943673663, "grad_norm": 1.8981371458183007, "learning_rate": 4.456521739130435e-05, "loss": 1.2121, "step": 902 }, { "epoch": 0.10707933119886162, "grad_norm": 1.7672718156652993, "learning_rate": 4.461462450592885e-05, "loss": 1.2403, "step": 903 }, { "epoch": 0.1071979129609866, "grad_norm": 1.8165985090687495, "learning_rate": 4.466403162055336e-05, "loss": 1.2038, "step": 904 }, { "epoch": 0.10731649472311158, "grad_norm": 1.8549108104357437, "learning_rate": 4.471343873517787e-05, "loss": 1.3917, "step": 905 }, { "epoch": 0.10743507648523658, "grad_norm": 1.9023262164992225, "learning_rate": 4.476284584980237e-05, "loss": 1.2924, "step": 906 }, { "epoch": 0.10755365824736156, "grad_norm": 2.029546637131855, "learning_rate": 4.481225296442688e-05, "loss": 1.3235, "step": 907 }, { "epoch": 0.10767224000948654, "grad_norm": 1.7274872916585335, "learning_rate": 4.4861660079051384e-05, "loss": 1.2639, "step": 908 }, { "epoch": 0.10779082177161152, "grad_norm": 2.3181291421702213, "learning_rate": 4.4911067193675893e-05, "loss": 1.1122, "step": 909 }, { "epoch": 0.10790940353373651, "grad_norm": 1.6709200747355057, "learning_rate": 4.4960474308300396e-05, "loss": 0.8722, "step": 910 }, { "epoch": 0.1080279852958615, "grad_norm": 1.9262307751965115, "learning_rate": 4.5009881422924905e-05, "loss": 1.3221, "step": 911 }, { "epoch": 0.10814656705798648, "grad_norm": 1.7541556743157296, "learning_rate": 4.505928853754941e-05, "loss": 1.2547, "step": 912 }, { "epoch": 0.10826514882011147, "grad_norm": 2.0693202513228264, "learning_rate": 4.5108695652173916e-05, "loss": 1.1727, "step": 913 }, { "epoch": 0.10838373058223645, "grad_norm": 1.6748716546145963, "learning_rate": 4.515810276679842e-05, "loss": 1.0561, "step": 914 }, { "epoch": 0.10850231234436143, "grad_norm": 1.7669442762098209, "learning_rate": 4.520750988142293e-05, "loss": 1.0247, "step": 915 }, { "epoch": 0.10862089410648643, "grad_norm": 1.764858036082851, "learning_rate": 4.525691699604743e-05, "loss": 1.3064, "step": 916 }, { "epoch": 0.10873947586861141, "grad_norm": 1.8523355766215788, "learning_rate": 4.530632411067194e-05, "loss": 1.3436, "step": 917 }, { "epoch": 0.10885805763073639, "grad_norm": 1.7208198277303732, "learning_rate": 4.535573122529644e-05, "loss": 1.2577, "step": 918 }, { "epoch": 0.10897663939286138, "grad_norm": 1.8955864533123026, "learning_rate": 4.540513833992095e-05, "loss": 0.9976, "step": 919 }, { "epoch": 0.10909522115498636, "grad_norm": 1.9759289842880057, "learning_rate": 4.545454545454546e-05, "loss": 1.2256, "step": 920 }, { "epoch": 0.10921380291711134, "grad_norm": 1.8432663662277446, "learning_rate": 4.550395256916996e-05, "loss": 1.1955, "step": 921 }, { "epoch": 0.10933238467923634, "grad_norm": 1.9418810962425284, "learning_rate": 4.555335968379447e-05, "loss": 1.2502, "step": 922 }, { "epoch": 0.10945096644136132, "grad_norm": 2.1153215664389817, "learning_rate": 4.560276679841897e-05, "loss": 1.2582, "step": 923 }, { "epoch": 0.1095695482034863, "grad_norm": 2.1726670434809767, "learning_rate": 4.565217391304348e-05, "loss": 1.2586, "step": 924 }, { "epoch": 0.1096881299656113, "grad_norm": 1.9671307783727052, "learning_rate": 4.570158102766799e-05, "loss": 1.3821, "step": 925 }, { "epoch": 0.10980671172773628, "grad_norm": 2.0521507782723694, "learning_rate": 4.575098814229249e-05, "loss": 1.37, "step": 926 }, { "epoch": 0.10992529348986126, "grad_norm": 1.9181584692109688, "learning_rate": 4.5800395256917e-05, "loss": 0.9747, "step": 927 }, { "epoch": 0.11004387525198625, "grad_norm": 1.8722199623877365, "learning_rate": 4.5849802371541504e-05, "loss": 1.0937, "step": 928 }, { "epoch": 0.11016245701411123, "grad_norm": 1.7865272778702999, "learning_rate": 4.589920948616601e-05, "loss": 1.1477, "step": 929 }, { "epoch": 0.11028103877623621, "grad_norm": 1.7119662914166476, "learning_rate": 4.5948616600790515e-05, "loss": 1.2592, "step": 930 }, { "epoch": 0.1103996205383612, "grad_norm": 1.7885041358769678, "learning_rate": 4.5998023715415024e-05, "loss": 1.1396, "step": 931 }, { "epoch": 0.11051820230048619, "grad_norm": 2.0130227835378314, "learning_rate": 4.6047430830039526e-05, "loss": 1.0512, "step": 932 }, { "epoch": 0.11063678406261117, "grad_norm": 1.8315683633769348, "learning_rate": 4.6096837944664035e-05, "loss": 1.4377, "step": 933 }, { "epoch": 0.11075536582473615, "grad_norm": 1.8732028503154297, "learning_rate": 4.614624505928854e-05, "loss": 1.2662, "step": 934 }, { "epoch": 0.11087394758686114, "grad_norm": 1.9999463394591568, "learning_rate": 4.6195652173913046e-05, "loss": 1.2867, "step": 935 }, { "epoch": 0.11099252934898612, "grad_norm": 2.1162199150606886, "learning_rate": 4.624505928853755e-05, "loss": 1.232, "step": 936 }, { "epoch": 0.1111111111111111, "grad_norm": 1.896852560904737, "learning_rate": 4.629446640316206e-05, "loss": 0.9709, "step": 937 }, { "epoch": 0.1112296928732361, "grad_norm": 2.1124524990856726, "learning_rate": 4.634387351778656e-05, "loss": 1.3192, "step": 938 }, { "epoch": 0.11134827463536108, "grad_norm": 2.0247553109642897, "learning_rate": 4.639328063241107e-05, "loss": 1.0495, "step": 939 }, { "epoch": 0.11146685639748606, "grad_norm": 1.837032193157011, "learning_rate": 4.644268774703557e-05, "loss": 1.2016, "step": 940 }, { "epoch": 0.11158543815961106, "grad_norm": 1.7637329162392503, "learning_rate": 4.649209486166008e-05, "loss": 1.1527, "step": 941 }, { "epoch": 0.11170401992173604, "grad_norm": 1.7004539282959317, "learning_rate": 4.654150197628458e-05, "loss": 0.8982, "step": 942 }, { "epoch": 0.11182260168386102, "grad_norm": 1.6509845886045809, "learning_rate": 4.659090909090909e-05, "loss": 1.1349, "step": 943 }, { "epoch": 0.11194118344598601, "grad_norm": 2.01819605967656, "learning_rate": 4.66403162055336e-05, "loss": 1.2484, "step": 944 }, { "epoch": 0.11205976520811099, "grad_norm": 1.8885785050848634, "learning_rate": 4.668972332015811e-05, "loss": 1.2451, "step": 945 }, { "epoch": 0.11217834697023597, "grad_norm": 1.7149115286236059, "learning_rate": 4.673913043478261e-05, "loss": 0.9956, "step": 946 }, { "epoch": 0.11229692873236097, "grad_norm": 1.858290844796101, "learning_rate": 4.678853754940712e-05, "loss": 0.9594, "step": 947 }, { "epoch": 0.11241551049448595, "grad_norm": 1.6783958274539101, "learning_rate": 4.683794466403162e-05, "loss": 0.8649, "step": 948 }, { "epoch": 0.11253409225661093, "grad_norm": 1.8331216161311423, "learning_rate": 4.688735177865613e-05, "loss": 1.4045, "step": 949 }, { "epoch": 0.11265267401873592, "grad_norm": 2.153906467736849, "learning_rate": 4.6936758893280634e-05, "loss": 1.4198, "step": 950 }, { "epoch": 0.1127712557808609, "grad_norm": 2.235965827757821, "learning_rate": 4.698616600790514e-05, "loss": 1.3424, "step": 951 }, { "epoch": 0.11288983754298589, "grad_norm": 2.155332770523107, "learning_rate": 4.7035573122529645e-05, "loss": 1.3781, "step": 952 }, { "epoch": 0.11300841930511088, "grad_norm": 1.8623449872378037, "learning_rate": 4.7084980237154154e-05, "loss": 1.2494, "step": 953 }, { "epoch": 0.11312700106723586, "grad_norm": 2.087247285271247, "learning_rate": 4.7134387351778656e-05, "loss": 1.2624, "step": 954 }, { "epoch": 0.11324558282936084, "grad_norm": 1.813153002810931, "learning_rate": 4.7183794466403165e-05, "loss": 1.4037, "step": 955 }, { "epoch": 0.11336416459148584, "grad_norm": 1.7715822596606956, "learning_rate": 4.723320158102767e-05, "loss": 1.0358, "step": 956 }, { "epoch": 0.11348274635361082, "grad_norm": 1.783962441375639, "learning_rate": 4.7282608695652177e-05, "loss": 1.1431, "step": 957 }, { "epoch": 0.1136013281157358, "grad_norm": 1.8052430807935291, "learning_rate": 4.733201581027668e-05, "loss": 1.0908, "step": 958 }, { "epoch": 0.11371990987786078, "grad_norm": 1.8586355378281585, "learning_rate": 4.738142292490119e-05, "loss": 1.0749, "step": 959 }, { "epoch": 0.11383849163998577, "grad_norm": 2.0881727003165755, "learning_rate": 4.743083003952569e-05, "loss": 1.1952, "step": 960 }, { "epoch": 0.11395707340211075, "grad_norm": 1.6846664201538244, "learning_rate": 4.74802371541502e-05, "loss": 1.3378, "step": 961 }, { "epoch": 0.11407565516423573, "grad_norm": 2.005243313616091, "learning_rate": 4.75296442687747e-05, "loss": 1.2846, "step": 962 }, { "epoch": 0.11419423692636073, "grad_norm": 2.3306286589425995, "learning_rate": 4.757905138339921e-05, "loss": 1.2981, "step": 963 }, { "epoch": 0.11431281868848571, "grad_norm": 1.900634256089495, "learning_rate": 4.762845849802372e-05, "loss": 1.4166, "step": 964 }, { "epoch": 0.11443140045061069, "grad_norm": 2.0920531846538304, "learning_rate": 4.767786561264823e-05, "loss": 1.2489, "step": 965 }, { "epoch": 0.11454998221273568, "grad_norm": 1.96156413237464, "learning_rate": 4.772727272727273e-05, "loss": 1.2874, "step": 966 }, { "epoch": 0.11466856397486067, "grad_norm": 1.9617195638348448, "learning_rate": 4.777667984189724e-05, "loss": 1.0668, "step": 967 }, { "epoch": 0.11478714573698565, "grad_norm": 2.0481211742199674, "learning_rate": 4.782608695652174e-05, "loss": 1.1679, "step": 968 }, { "epoch": 0.11490572749911064, "grad_norm": 2.0941894832795107, "learning_rate": 4.787549407114625e-05, "loss": 1.1117, "step": 969 }, { "epoch": 0.11502430926123562, "grad_norm": 1.659940015126908, "learning_rate": 4.792490118577075e-05, "loss": 1.3317, "step": 970 }, { "epoch": 0.1151428910233606, "grad_norm": 1.7143135700833254, "learning_rate": 4.797430830039526e-05, "loss": 1.2139, "step": 971 }, { "epoch": 0.1152614727854856, "grad_norm": 1.8946769265830694, "learning_rate": 4.8023715415019764e-05, "loss": 1.1573, "step": 972 }, { "epoch": 0.11538005454761058, "grad_norm": 1.9568290156255086, "learning_rate": 4.807312252964427e-05, "loss": 1.1493, "step": 973 }, { "epoch": 0.11549863630973556, "grad_norm": 1.9682931863285784, "learning_rate": 4.8122529644268775e-05, "loss": 1.2079, "step": 974 }, { "epoch": 0.11561721807186055, "grad_norm": 2.242643603494696, "learning_rate": 4.8171936758893284e-05, "loss": 1.2469, "step": 975 }, { "epoch": 0.11573579983398553, "grad_norm": 1.881984303824199, "learning_rate": 4.822134387351779e-05, "loss": 1.2232, "step": 976 }, { "epoch": 0.11585438159611051, "grad_norm": 1.9788716852599484, "learning_rate": 4.8270750988142296e-05, "loss": 1.1689, "step": 977 }, { "epoch": 0.11597296335823551, "grad_norm": 1.852786357550817, "learning_rate": 4.83201581027668e-05, "loss": 1.1687, "step": 978 }, { "epoch": 0.11609154512036049, "grad_norm": 2.013911311112838, "learning_rate": 4.836956521739131e-05, "loss": 1.2123, "step": 979 }, { "epoch": 0.11621012688248547, "grad_norm": 1.979762269989753, "learning_rate": 4.841897233201581e-05, "loss": 1.0937, "step": 980 }, { "epoch": 0.11632870864461047, "grad_norm": 1.9586515160489768, "learning_rate": 4.846837944664032e-05, "loss": 1.1928, "step": 981 }, { "epoch": 0.11644729040673545, "grad_norm": 1.8787502593383414, "learning_rate": 4.851778656126482e-05, "loss": 1.2031, "step": 982 }, { "epoch": 0.11656587216886043, "grad_norm": 1.8925454821695789, "learning_rate": 4.8567193675889336e-05, "loss": 1.035, "step": 983 }, { "epoch": 0.11668445393098542, "grad_norm": 1.9625363754307843, "learning_rate": 4.861660079051384e-05, "loss": 1.0582, "step": 984 }, { "epoch": 0.1168030356931104, "grad_norm": 1.9065950092131323, "learning_rate": 4.866600790513835e-05, "loss": 1.3718, "step": 985 }, { "epoch": 0.11692161745523538, "grad_norm": 1.8046804656741444, "learning_rate": 4.871541501976285e-05, "loss": 1.1547, "step": 986 }, { "epoch": 0.11704019921736036, "grad_norm": 1.7622499727660677, "learning_rate": 4.876482213438736e-05, "loss": 0.8967, "step": 987 }, { "epoch": 0.11715878097948536, "grad_norm": 1.9781241882538605, "learning_rate": 4.881422924901186e-05, "loss": 1.3093, "step": 988 }, { "epoch": 0.11727736274161034, "grad_norm": 1.724237468843233, "learning_rate": 4.886363636363637e-05, "loss": 1.2335, "step": 989 }, { "epoch": 0.11739594450373532, "grad_norm": 1.8110047843615895, "learning_rate": 4.891304347826087e-05, "loss": 0.9987, "step": 990 }, { "epoch": 0.11751452626586031, "grad_norm": 2.051439912230871, "learning_rate": 4.896245059288538e-05, "loss": 1.2339, "step": 991 }, { "epoch": 0.1176331080279853, "grad_norm": 2.2060499732588257, "learning_rate": 4.901185770750988e-05, "loss": 1.1418, "step": 992 }, { "epoch": 0.11775168979011028, "grad_norm": 1.8898111790633472, "learning_rate": 4.906126482213439e-05, "loss": 0.9866, "step": 993 }, { "epoch": 0.11787027155223527, "grad_norm": 1.9105448729196526, "learning_rate": 4.9110671936758895e-05, "loss": 1.3148, "step": 994 }, { "epoch": 0.11798885331436025, "grad_norm": 1.7988818762938, "learning_rate": 4.9160079051383404e-05, "loss": 0.6929, "step": 995 }, { "epoch": 0.11810743507648523, "grad_norm": 1.6665733638139224, "learning_rate": 4.9209486166007906e-05, "loss": 0.9581, "step": 996 }, { "epoch": 0.11822601683861023, "grad_norm": 2.1392490113267217, "learning_rate": 4.9258893280632415e-05, "loss": 1.3025, "step": 997 }, { "epoch": 0.1183445986007352, "grad_norm": 1.6484191710586578, "learning_rate": 4.930830039525692e-05, "loss": 1.3707, "step": 998 }, { "epoch": 0.11846318036286019, "grad_norm": 1.948458496037207, "learning_rate": 4.9357707509881426e-05, "loss": 1.2303, "step": 999 }, { "epoch": 0.11858176212498518, "grad_norm": 1.8026660099959277, "learning_rate": 4.940711462450593e-05, "loss": 1.0804, "step": 1000 }, { "epoch": 0.11870034388711016, "grad_norm": 1.8026466537417387, "learning_rate": 4.945652173913044e-05, "loss": 1.354, "step": 1001 }, { "epoch": 0.11881892564923514, "grad_norm": 1.6044287487874351, "learning_rate": 4.950592885375494e-05, "loss": 1.3488, "step": 1002 }, { "epoch": 0.11893750741136014, "grad_norm": 1.7719903825353065, "learning_rate": 4.955533596837945e-05, "loss": 1.1391, "step": 1003 }, { "epoch": 0.11905608917348512, "grad_norm": 2.0330938724381307, "learning_rate": 4.960474308300396e-05, "loss": 1.0259, "step": 1004 }, { "epoch": 0.1191746709356101, "grad_norm": 1.5916787119813633, "learning_rate": 4.965415019762846e-05, "loss": 0.8591, "step": 1005 }, { "epoch": 0.1192932526977351, "grad_norm": 1.682357430479222, "learning_rate": 4.970355731225297e-05, "loss": 1.1286, "step": 1006 }, { "epoch": 0.11941183445986008, "grad_norm": 1.932868773806507, "learning_rate": 4.975296442687747e-05, "loss": 1.3098, "step": 1007 }, { "epoch": 0.11953041622198506, "grad_norm": 1.8164841750466307, "learning_rate": 4.980237154150198e-05, "loss": 1.2577, "step": 1008 }, { "epoch": 0.11964899798411005, "grad_norm": 1.9510900299128513, "learning_rate": 4.985177865612648e-05, "loss": 1.4262, "step": 1009 }, { "epoch": 0.11976757974623503, "grad_norm": 1.567392649076469, "learning_rate": 4.990118577075099e-05, "loss": 1.2252, "step": 1010 }, { "epoch": 0.11988616150836001, "grad_norm": 1.7364980371963226, "learning_rate": 4.9950592885375493e-05, "loss": 0.9976, "step": 1011 }, { "epoch": 0.12000474327048499, "grad_norm": 1.849831435461812, "learning_rate": 5e-05, "loss": 1.228, "step": 1012 }, { "epoch": 0.12012332503260999, "grad_norm": 1.9555085269155421, "learning_rate": 4.999999988476533e-05, "loss": 0.956, "step": 1013 }, { "epoch": 0.12024190679473497, "grad_norm": 1.9440927627835354, "learning_rate": 4.9999999539061315e-05, "loss": 1.1064, "step": 1014 }, { "epoch": 0.12036048855685995, "grad_norm": 1.84469394830904, "learning_rate": 4.999999896288796e-05, "loss": 1.2925, "step": 1015 }, { "epoch": 0.12047907031898494, "grad_norm": 1.9158110052876123, "learning_rate": 4.999999815624527e-05, "loss": 1.3134, "step": 1016 }, { "epoch": 0.12059765208110992, "grad_norm": 2.0117361645944984, "learning_rate": 4.999999711913327e-05, "loss": 1.1083, "step": 1017 }, { "epoch": 0.1207162338432349, "grad_norm": 1.844216080601768, "learning_rate": 4.999999585155194e-05, "loss": 1.2087, "step": 1018 }, { "epoch": 0.1208348156053599, "grad_norm": 1.730543135065885, "learning_rate": 4.999999435350131e-05, "loss": 1.1102, "step": 1019 }, { "epoch": 0.12095339736748488, "grad_norm": 1.9818506190570875, "learning_rate": 4.9999992624981384e-05, "loss": 1.3528, "step": 1020 }, { "epoch": 0.12107197912960986, "grad_norm": 1.8715099993860853, "learning_rate": 4.9999990665992186e-05, "loss": 1.0893, "step": 1021 }, { "epoch": 0.12119056089173486, "grad_norm": 1.716167027610077, "learning_rate": 4.999998847653373e-05, "loss": 1.1002, "step": 1022 }, { "epoch": 0.12130914265385984, "grad_norm": 1.790487121080667, "learning_rate": 4.9999986056606044e-05, "loss": 1.2409, "step": 1023 }, { "epoch": 0.12142772441598482, "grad_norm": 1.9883428146216597, "learning_rate": 4.9999983406209127e-05, "loss": 1.324, "step": 1024 }, { "epoch": 0.12154630617810981, "grad_norm": 1.7654449428420373, "learning_rate": 4.9999980525343035e-05, "loss": 1.0383, "step": 1025 }, { "epoch": 0.12166488794023479, "grad_norm": 1.950085632070345, "learning_rate": 4.999997741400777e-05, "loss": 1.102, "step": 1026 }, { "epoch": 0.12178346970235977, "grad_norm": 1.8529344004458457, "learning_rate": 4.9999974072203383e-05, "loss": 1.2229, "step": 1027 }, { "epoch": 0.12190205146448477, "grad_norm": 2.0505550638386665, "learning_rate": 4.9999970499929884e-05, "loss": 1.1214, "step": 1028 }, { "epoch": 0.12202063322660975, "grad_norm": 1.7682492794989868, "learning_rate": 4.999996669718731e-05, "loss": 0.9526, "step": 1029 }, { "epoch": 0.12213921498873473, "grad_norm": 1.7107854388329125, "learning_rate": 4.999996266397571e-05, "loss": 1.2079, "step": 1030 }, { "epoch": 0.12225779675085972, "grad_norm": 1.507303001129272, "learning_rate": 4.99999584002951e-05, "loss": 1.0681, "step": 1031 }, { "epoch": 0.1223763785129847, "grad_norm": 1.727884210491178, "learning_rate": 4.9999953906145534e-05, "loss": 1.2404, "step": 1032 }, { "epoch": 0.12249496027510968, "grad_norm": 2.1978094225061695, "learning_rate": 4.999994918152705e-05, "loss": 1.1827, "step": 1033 }, { "epoch": 0.12261354203723468, "grad_norm": 1.9662562674855946, "learning_rate": 4.99999442264397e-05, "loss": 1.1612, "step": 1034 }, { "epoch": 0.12273212379935966, "grad_norm": 1.7215098542107097, "learning_rate": 4.999993904088351e-05, "loss": 1.3064, "step": 1035 }, { "epoch": 0.12285070556148464, "grad_norm": 1.6821097285578939, "learning_rate": 4.999993362485855e-05, "loss": 1.0158, "step": 1036 }, { "epoch": 0.12296928732360964, "grad_norm": 1.8062673216588407, "learning_rate": 4.9999927978364846e-05, "loss": 1.1758, "step": 1037 }, { "epoch": 0.12308786908573462, "grad_norm": 1.8673197453023052, "learning_rate": 4.999992210140248e-05, "loss": 1.002, "step": 1038 }, { "epoch": 0.1232064508478596, "grad_norm": 1.8407668562551853, "learning_rate": 4.999991599397147e-05, "loss": 1.3068, "step": 1039 }, { "epoch": 0.12332503260998458, "grad_norm": 1.776021028710403, "learning_rate": 4.99999096560719e-05, "loss": 1.0876, "step": 1040 }, { "epoch": 0.12344361437210957, "grad_norm": 1.9118130429942444, "learning_rate": 4.999990308770382e-05, "loss": 1.1303, "step": 1041 }, { "epoch": 0.12356219613423455, "grad_norm": 1.9030062723811847, "learning_rate": 4.999989628886729e-05, "loss": 1.2033, "step": 1042 }, { "epoch": 0.12368077789635953, "grad_norm": 1.8020574134702663, "learning_rate": 4.999988925956237e-05, "loss": 1.3159, "step": 1043 }, { "epoch": 0.12379935965848453, "grad_norm": 1.833444467228498, "learning_rate": 4.9999881999789136e-05, "loss": 1.1946, "step": 1044 }, { "epoch": 0.12391794142060951, "grad_norm": 1.9243703033074129, "learning_rate": 4.999987450954764e-05, "loss": 1.1783, "step": 1045 }, { "epoch": 0.12403652318273449, "grad_norm": 1.8863534023748212, "learning_rate": 4.9999866788837964e-05, "loss": 1.3485, "step": 1046 }, { "epoch": 0.12415510494485948, "grad_norm": 1.7730614947739338, "learning_rate": 4.999985883766017e-05, "loss": 1.252, "step": 1047 }, { "epoch": 0.12427368670698447, "grad_norm": 2.011959995811757, "learning_rate": 4.999985065601433e-05, "loss": 1.1448, "step": 1048 }, { "epoch": 0.12439226846910945, "grad_norm": 1.644928149889462, "learning_rate": 4.999984224390053e-05, "loss": 1.2044, "step": 1049 }, { "epoch": 0.12451085023123444, "grad_norm": 1.7007102037002007, "learning_rate": 4.999983360131883e-05, "loss": 0.9127, "step": 1050 }, { "epoch": 0.12462943199335942, "grad_norm": 1.9475956461288497, "learning_rate": 4.999982472826934e-05, "loss": 1.3703, "step": 1051 }, { "epoch": 0.1247480137554844, "grad_norm": 1.6820796990762905, "learning_rate": 4.999981562475211e-05, "loss": 1.21, "step": 1052 }, { "epoch": 0.1248665955176094, "grad_norm": 1.9299529576017962, "learning_rate": 4.999980629076724e-05, "loss": 1.1906, "step": 1053 }, { "epoch": 0.12498517727973438, "grad_norm": 1.730668895714683, "learning_rate": 4.9999796726314805e-05, "loss": 1.0917, "step": 1054 }, { "epoch": 0.12510375904185936, "grad_norm": 1.6823271421774455, "learning_rate": 4.999978693139492e-05, "loss": 0.9525, "step": 1055 }, { "epoch": 0.12522234080398434, "grad_norm": 1.8355783680393534, "learning_rate": 4.999977690600764e-05, "loss": 1.1243, "step": 1056 }, { "epoch": 0.12534092256610932, "grad_norm": 1.6726440650668317, "learning_rate": 4.999976665015308e-05, "loss": 1.3405, "step": 1057 }, { "epoch": 0.12545950432823433, "grad_norm": 1.7524104511638119, "learning_rate": 4.9999756163831325e-05, "loss": 1.4106, "step": 1058 }, { "epoch": 0.1255780860903593, "grad_norm": 1.6975359408831927, "learning_rate": 4.999974544704247e-05, "loss": 1.1604, "step": 1059 }, { "epoch": 0.1256966678524843, "grad_norm": 1.9298643376479314, "learning_rate": 4.9999734499786635e-05, "loss": 1.2125, "step": 1060 }, { "epoch": 0.12581524961460927, "grad_norm": 1.7416592424032298, "learning_rate": 4.9999723322063886e-05, "loss": 1.0412, "step": 1061 }, { "epoch": 0.12593383137673425, "grad_norm": 2.031533783288666, "learning_rate": 4.9999711913874355e-05, "loss": 1.0601, "step": 1062 }, { "epoch": 0.12605241313885923, "grad_norm": 1.5042702179708785, "learning_rate": 4.999970027521814e-05, "loss": 1.0035, "step": 1063 }, { "epoch": 0.12617099490098424, "grad_norm": 1.743517812999746, "learning_rate": 4.999968840609533e-05, "loss": 1.2745, "step": 1064 }, { "epoch": 0.12628957666310922, "grad_norm": 1.5570688720184922, "learning_rate": 4.9999676306506064e-05, "loss": 0.8286, "step": 1065 }, { "epoch": 0.1264081584252342, "grad_norm": 1.5932407197526472, "learning_rate": 4.999966397645044e-05, "loss": 1.325, "step": 1066 }, { "epoch": 0.12652674018735918, "grad_norm": 2.016366838524765, "learning_rate": 4.9999651415928564e-05, "loss": 1.055, "step": 1067 }, { "epoch": 0.12664532194948416, "grad_norm": 1.927509589279619, "learning_rate": 4.999963862494056e-05, "loss": 1.2582, "step": 1068 }, { "epoch": 0.12676390371160914, "grad_norm": 1.897029217529347, "learning_rate": 4.999962560348654e-05, "loss": 1.0861, "step": 1069 }, { "epoch": 0.12688248547373415, "grad_norm": 1.8148911752814572, "learning_rate": 4.9999612351566637e-05, "loss": 1.2267, "step": 1070 }, { "epoch": 0.12700106723585913, "grad_norm": 1.769416009370227, "learning_rate": 4.999959886918096e-05, "loss": 1.231, "step": 1071 }, { "epoch": 0.1271196489979841, "grad_norm": 1.7238857957089018, "learning_rate": 4.9999585156329634e-05, "loss": 1.2082, "step": 1072 }, { "epoch": 0.1272382307601091, "grad_norm": 1.5468343508179117, "learning_rate": 4.9999571213012796e-05, "loss": 0.838, "step": 1073 }, { "epoch": 0.12735681252223408, "grad_norm": 1.6335036893171109, "learning_rate": 4.9999557039230565e-05, "loss": 1.2508, "step": 1074 }, { "epoch": 0.12747539428435906, "grad_norm": 1.972163041577415, "learning_rate": 4.9999542634983076e-05, "loss": 1.1852, "step": 1075 }, { "epoch": 0.12759397604648406, "grad_norm": 1.8169485001389083, "learning_rate": 4.999952800027046e-05, "loss": 1.1128, "step": 1076 }, { "epoch": 0.12771255780860905, "grad_norm": 1.6886683116720567, "learning_rate": 4.9999513135092855e-05, "loss": 1.1429, "step": 1077 }, { "epoch": 0.12783113957073403, "grad_norm": 2.101878852335351, "learning_rate": 4.9999498039450386e-05, "loss": 1.0705, "step": 1078 }, { "epoch": 0.127949721332859, "grad_norm": 1.7381293148988552, "learning_rate": 4.9999482713343215e-05, "loss": 0.9712, "step": 1079 }, { "epoch": 0.128068303094984, "grad_norm": 1.5493550093842379, "learning_rate": 4.9999467156771464e-05, "loss": 1.1666, "step": 1080 }, { "epoch": 0.12818688485710897, "grad_norm": 1.6573753726006677, "learning_rate": 4.9999451369735276e-05, "loss": 0.8108, "step": 1081 }, { "epoch": 0.12830546661923395, "grad_norm": 1.7568689030898086, "learning_rate": 4.9999435352234806e-05, "loss": 1.0184, "step": 1082 }, { "epoch": 0.12842404838135896, "grad_norm": 1.948309836515425, "learning_rate": 4.99994191042702e-05, "loss": 1.2576, "step": 1083 }, { "epoch": 0.12854263014348394, "grad_norm": 2.0497108527418577, "learning_rate": 4.999940262584161e-05, "loss": 0.9323, "step": 1084 }, { "epoch": 0.12866121190560892, "grad_norm": 1.8350101931266938, "learning_rate": 4.999938591694918e-05, "loss": 1.1348, "step": 1085 }, { "epoch": 0.1287797936677339, "grad_norm": 1.7046512984904234, "learning_rate": 4.999936897759306e-05, "loss": 1.2158, "step": 1086 }, { "epoch": 0.12889837542985888, "grad_norm": 2.0563344286444267, "learning_rate": 4.999935180777343e-05, "loss": 1.2374, "step": 1087 }, { "epoch": 0.12901695719198386, "grad_norm": 1.893878065008453, "learning_rate": 4.999933440749043e-05, "loss": 1.2286, "step": 1088 }, { "epoch": 0.12913553895410887, "grad_norm": 2.0464174044644508, "learning_rate": 4.9999316776744206e-05, "loss": 1.0264, "step": 1089 }, { "epoch": 0.12925412071623385, "grad_norm": 2.0049912179641884, "learning_rate": 4.999929891553495e-05, "loss": 0.8706, "step": 1090 }, { "epoch": 0.12937270247835883, "grad_norm": 2.326455363219795, "learning_rate": 4.999928082386282e-05, "loss": 1.2466, "step": 1091 }, { "epoch": 0.1294912842404838, "grad_norm": 2.177055176550707, "learning_rate": 4.999926250172797e-05, "loss": 1.0925, "step": 1092 }, { "epoch": 0.1296098660026088, "grad_norm": 2.139923066543896, "learning_rate": 4.9999243949130584e-05, "loss": 1.2492, "step": 1093 }, { "epoch": 0.12972844776473377, "grad_norm": 1.8650182102463062, "learning_rate": 4.999922516607081e-05, "loss": 1.4138, "step": 1094 }, { "epoch": 0.12984702952685878, "grad_norm": 1.727890729251297, "learning_rate": 4.999920615254884e-05, "loss": 1.3152, "step": 1095 }, { "epoch": 0.12996561128898376, "grad_norm": 2.1479397502814357, "learning_rate": 4.999918690856485e-05, "loss": 1.235, "step": 1096 }, { "epoch": 0.13008419305110874, "grad_norm": 1.613696030978353, "learning_rate": 4.999916743411901e-05, "loss": 1.2745, "step": 1097 }, { "epoch": 0.13020277481323372, "grad_norm": 1.7704666297831217, "learning_rate": 4.999914772921151e-05, "loss": 1.2599, "step": 1098 }, { "epoch": 0.1303213565753587, "grad_norm": 1.8629181856716268, "learning_rate": 4.999912779384252e-05, "loss": 1.2683, "step": 1099 }, { "epoch": 0.13043993833748369, "grad_norm": 1.8419651470267826, "learning_rate": 4.999910762801222e-05, "loss": 1.4847, "step": 1100 }, { "epoch": 0.1305585200996087, "grad_norm": 1.809145851451482, "learning_rate": 4.999908723172081e-05, "loss": 1.2067, "step": 1101 }, { "epoch": 0.13067710186173367, "grad_norm": 1.7503667199447281, "learning_rate": 4.999906660496847e-05, "loss": 1.1288, "step": 1102 }, { "epoch": 0.13079568362385866, "grad_norm": 1.717215861980404, "learning_rate": 4.999904574775539e-05, "loss": 1.2514, "step": 1103 }, { "epoch": 0.13091426538598364, "grad_norm": 1.640608828372075, "learning_rate": 4.999902466008177e-05, "loss": 1.1283, "step": 1104 }, { "epoch": 0.13103284714810862, "grad_norm": 1.8051514984999204, "learning_rate": 4.999900334194779e-05, "loss": 1.4241, "step": 1105 }, { "epoch": 0.1311514289102336, "grad_norm": 1.704628757020758, "learning_rate": 4.9998981793353666e-05, "loss": 1.1074, "step": 1106 }, { "epoch": 0.13127001067235858, "grad_norm": 1.8795192043366644, "learning_rate": 4.999896001429958e-05, "loss": 1.1803, "step": 1107 }, { "epoch": 0.1313885924344836, "grad_norm": 1.530375425014469, "learning_rate": 4.999893800478573e-05, "loss": 1.1084, "step": 1108 }, { "epoch": 0.13150717419660857, "grad_norm": 1.6456513960475965, "learning_rate": 4.999891576481234e-05, "loss": 1.1216, "step": 1109 }, { "epoch": 0.13162575595873355, "grad_norm": 1.8076841183456553, "learning_rate": 4.99988932943796e-05, "loss": 1.3404, "step": 1110 }, { "epoch": 0.13174433772085853, "grad_norm": 1.8679515896365269, "learning_rate": 4.999887059348772e-05, "loss": 1.2514, "step": 1111 }, { "epoch": 0.1318629194829835, "grad_norm": 1.6942154184592442, "learning_rate": 4.9998847662136905e-05, "loss": 1.1239, "step": 1112 }, { "epoch": 0.1319815012451085, "grad_norm": 1.8109324430726381, "learning_rate": 4.9998824500327365e-05, "loss": 1.1998, "step": 1113 }, { "epoch": 0.1321000830072335, "grad_norm": 1.7852251750571821, "learning_rate": 4.999880110805933e-05, "loss": 1.2698, "step": 1114 }, { "epoch": 0.13221866476935848, "grad_norm": 1.5281057517563001, "learning_rate": 4.9998777485332996e-05, "loss": 1.0754, "step": 1115 }, { "epoch": 0.13233724653148346, "grad_norm": 1.6715323301310816, "learning_rate": 4.99987536321486e-05, "loss": 0.9717, "step": 1116 }, { "epoch": 0.13245582829360844, "grad_norm": 1.5752741396364391, "learning_rate": 4.999872954850634e-05, "loss": 0.8794, "step": 1117 }, { "epoch": 0.13257441005573342, "grad_norm": 1.6534887738899051, "learning_rate": 4.9998705234406454e-05, "loss": 1.3386, "step": 1118 }, { "epoch": 0.1326929918178584, "grad_norm": 1.5776050571298075, "learning_rate": 4.9998680689849157e-05, "loss": 1.1703, "step": 1119 }, { "epoch": 0.1328115735799834, "grad_norm": 1.558972000338839, "learning_rate": 4.999865591483468e-05, "loss": 1.056, "step": 1120 }, { "epoch": 0.1329301553421084, "grad_norm": 1.652550377955272, "learning_rate": 4.999863090936325e-05, "loss": 0.9584, "step": 1121 }, { "epoch": 0.13304873710423337, "grad_norm": 1.8539743168789626, "learning_rate": 4.99986056734351e-05, "loss": 1.2429, "step": 1122 }, { "epoch": 0.13316731886635835, "grad_norm": 1.6568608777747698, "learning_rate": 4.9998580207050466e-05, "loss": 1.2244, "step": 1123 }, { "epoch": 0.13328590062848333, "grad_norm": 1.9346238827442102, "learning_rate": 4.999855451020957e-05, "loss": 1.3629, "step": 1124 }, { "epoch": 0.13340448239060831, "grad_norm": 1.8240837126904088, "learning_rate": 4.999852858291266e-05, "loss": 1.0605, "step": 1125 }, { "epoch": 0.13352306415273332, "grad_norm": 1.6109697129490912, "learning_rate": 4.999850242515998e-05, "loss": 1.0814, "step": 1126 }, { "epoch": 0.1336416459148583, "grad_norm": 1.5982312128969685, "learning_rate": 4.999847603695175e-05, "loss": 1.2116, "step": 1127 }, { "epoch": 0.13376022767698328, "grad_norm": 2.028970955865662, "learning_rate": 4.9998449418288234e-05, "loss": 1.412, "step": 1128 }, { "epoch": 0.13387880943910827, "grad_norm": 1.7530959227923946, "learning_rate": 4.999842256916967e-05, "loss": 0.9405, "step": 1129 }, { "epoch": 0.13399739120123325, "grad_norm": 2.102151242661334, "learning_rate": 4.99983954895963e-05, "loss": 1.2924, "step": 1130 }, { "epoch": 0.13411597296335823, "grad_norm": 1.7996441930889286, "learning_rate": 4.999836817956838e-05, "loss": 1.315, "step": 1131 }, { "epoch": 0.1342345547254832, "grad_norm": 1.6438776683665282, "learning_rate": 4.9998340639086164e-05, "loss": 1.2632, "step": 1132 }, { "epoch": 0.13435313648760822, "grad_norm": 1.514760449719494, "learning_rate": 4.99983128681499e-05, "loss": 0.8421, "step": 1133 }, { "epoch": 0.1344717182497332, "grad_norm": 1.9035040354587183, "learning_rate": 4.999828486675984e-05, "loss": 1.1838, "step": 1134 }, { "epoch": 0.13459030001185818, "grad_norm": 1.7344527449097376, "learning_rate": 4.999825663491625e-05, "loss": 1.2936, "step": 1135 }, { "epoch": 0.13470888177398316, "grad_norm": 1.419892547102826, "learning_rate": 4.99982281726194e-05, "loss": 1.028, "step": 1136 }, { "epoch": 0.13482746353610814, "grad_norm": 1.8473328333033185, "learning_rate": 4.999819947986954e-05, "loss": 1.1984, "step": 1137 }, { "epoch": 0.13494604529823312, "grad_norm": 1.783309595734706, "learning_rate": 4.999817055666692e-05, "loss": 1.1726, "step": 1138 }, { "epoch": 0.13506462706035813, "grad_norm": 1.476040860622353, "learning_rate": 4.9998141403011836e-05, "loss": 1.4328, "step": 1139 }, { "epoch": 0.1351832088224831, "grad_norm": 1.622774991952162, "learning_rate": 4.999811201890453e-05, "loss": 1.1468, "step": 1140 }, { "epoch": 0.1353017905846081, "grad_norm": 1.9952505668065472, "learning_rate": 4.9998082404345295e-05, "loss": 1.1575, "step": 1141 }, { "epoch": 0.13542037234673307, "grad_norm": 1.7312521317568663, "learning_rate": 4.99980525593344e-05, "loss": 1.2236, "step": 1142 }, { "epoch": 0.13553895410885805, "grad_norm": 1.7562832946281564, "learning_rate": 4.999802248387211e-05, "loss": 1.2104, "step": 1143 }, { "epoch": 0.13565753587098303, "grad_norm": 1.8064074588280818, "learning_rate": 4.9997992177958705e-05, "loss": 1.1883, "step": 1144 }, { "epoch": 0.13577611763310804, "grad_norm": 1.7512489356566345, "learning_rate": 4.999796164159446e-05, "loss": 1.2332, "step": 1145 }, { "epoch": 0.13589469939523302, "grad_norm": 1.9506302754602374, "learning_rate": 4.999793087477967e-05, "loss": 1.3006, "step": 1146 }, { "epoch": 0.136013281157358, "grad_norm": 1.7782049334377104, "learning_rate": 4.999789987751462e-05, "loss": 1.2128, "step": 1147 }, { "epoch": 0.13613186291948298, "grad_norm": 1.8294946717763059, "learning_rate": 4.999786864979958e-05, "loss": 1.3324, "step": 1148 }, { "epoch": 0.13625044468160796, "grad_norm": 1.5996806219328112, "learning_rate": 4.999783719163485e-05, "loss": 1.2727, "step": 1149 }, { "epoch": 0.13636902644373294, "grad_norm": 1.7814278002985118, "learning_rate": 4.999780550302071e-05, "loss": 1.1625, "step": 1150 }, { "epoch": 0.13648760820585795, "grad_norm": 1.644250913880291, "learning_rate": 4.999777358395746e-05, "loss": 1.2001, "step": 1151 }, { "epoch": 0.13660618996798293, "grad_norm": 1.759508106680521, "learning_rate": 4.999774143444539e-05, "loss": 1.087, "step": 1152 }, { "epoch": 0.1367247717301079, "grad_norm": 1.5933677812136826, "learning_rate": 4.99977090544848e-05, "loss": 1.0231, "step": 1153 }, { "epoch": 0.1368433534922329, "grad_norm": 1.4502302265609956, "learning_rate": 4.999767644407599e-05, "loss": 1.0564, "step": 1154 }, { "epoch": 0.13696193525435787, "grad_norm": 2.0823735946209987, "learning_rate": 4.9997643603219245e-05, "loss": 1.3283, "step": 1155 }, { "epoch": 0.13708051701648286, "grad_norm": 1.97240903344867, "learning_rate": 4.9997610531914896e-05, "loss": 1.2341, "step": 1156 }, { "epoch": 0.13719909877860784, "grad_norm": 1.8975780811776994, "learning_rate": 4.999757723016322e-05, "loss": 1.2824, "step": 1157 }, { "epoch": 0.13731768054073284, "grad_norm": 1.9099669591461903, "learning_rate": 4.999754369796454e-05, "loss": 0.9507, "step": 1158 }, { "epoch": 0.13743626230285783, "grad_norm": 1.9400843364465061, "learning_rate": 4.9997509935319166e-05, "loss": 1.1591, "step": 1159 }, { "epoch": 0.1375548440649828, "grad_norm": 1.6865899194570029, "learning_rate": 4.99974759422274e-05, "loss": 1.2238, "step": 1160 }, { "epoch": 0.1376734258271078, "grad_norm": 1.4965875438925413, "learning_rate": 4.9997441718689555e-05, "loss": 1.2548, "step": 1161 }, { "epoch": 0.13779200758923277, "grad_norm": 1.7167257090672152, "learning_rate": 4.9997407264705964e-05, "loss": 1.1094, "step": 1162 }, { "epoch": 0.13791058935135775, "grad_norm": 1.9984652261976052, "learning_rate": 4.999737258027692e-05, "loss": 1.1271, "step": 1163 }, { "epoch": 0.13802917111348276, "grad_norm": 1.7841749850405313, "learning_rate": 4.999733766540276e-05, "loss": 1.2, "step": 1164 }, { "epoch": 0.13814775287560774, "grad_norm": 1.762805640217121, "learning_rate": 4.99973025200838e-05, "loss": 1.3382, "step": 1165 }, { "epoch": 0.13826633463773272, "grad_norm": 1.7812156369967262, "learning_rate": 4.999726714432036e-05, "loss": 1.2499, "step": 1166 }, { "epoch": 0.1383849163998577, "grad_norm": 1.5159957600761718, "learning_rate": 4.9997231538112775e-05, "loss": 1.1019, "step": 1167 }, { "epoch": 0.13850349816198268, "grad_norm": 1.5730948609362534, "learning_rate": 4.9997195701461366e-05, "loss": 1.111, "step": 1168 }, { "epoch": 0.13862207992410766, "grad_norm": 1.671554365423232, "learning_rate": 4.999715963436647e-05, "loss": 0.9275, "step": 1169 }, { "epoch": 0.13874066168623267, "grad_norm": 1.5771847419188025, "learning_rate": 4.999712333682842e-05, "loss": 1.2024, "step": 1170 }, { "epoch": 0.13885924344835765, "grad_norm": 1.7518683705047189, "learning_rate": 4.9997086808847534e-05, "loss": 0.9171, "step": 1171 }, { "epoch": 0.13897782521048263, "grad_norm": 2.037762937444253, "learning_rate": 4.999705005042417e-05, "loss": 1.1708, "step": 1172 }, { "epoch": 0.1390964069726076, "grad_norm": 1.7094853046049336, "learning_rate": 4.999701306155866e-05, "loss": 0.9993, "step": 1173 }, { "epoch": 0.1392149887347326, "grad_norm": 1.7175331116889134, "learning_rate": 4.999697584225134e-05, "loss": 1.1446, "step": 1174 }, { "epoch": 0.13933357049685757, "grad_norm": 1.6984031425181925, "learning_rate": 4.9996938392502545e-05, "loss": 1.0772, "step": 1175 }, { "epoch": 0.13945215225898258, "grad_norm": 2.295462813306635, "learning_rate": 4.9996900712312644e-05, "loss": 1.3928, "step": 1176 }, { "epoch": 0.13957073402110756, "grad_norm": 2.017049743044323, "learning_rate": 4.999686280168197e-05, "loss": 1.4378, "step": 1177 }, { "epoch": 0.13968931578323254, "grad_norm": 1.7071582975539423, "learning_rate": 4.999682466061087e-05, "loss": 0.9676, "step": 1178 }, { "epoch": 0.13980789754535752, "grad_norm": 1.986355541613088, "learning_rate": 4.999678628909971e-05, "loss": 1.3657, "step": 1179 }, { "epoch": 0.1399264793074825, "grad_norm": 1.7591926645544043, "learning_rate": 4.9996747687148814e-05, "loss": 1.2089, "step": 1180 }, { "epoch": 0.14004506106960748, "grad_norm": 1.8527089057921522, "learning_rate": 4.9996708854758576e-05, "loss": 1.085, "step": 1181 }, { "epoch": 0.1401636428317325, "grad_norm": 1.7350098650979797, "learning_rate": 4.999666979192933e-05, "loss": 1.3144, "step": 1182 }, { "epoch": 0.14028222459385747, "grad_norm": 1.8868965019686643, "learning_rate": 4.999663049866143e-05, "loss": 1.2824, "step": 1183 }, { "epoch": 0.14040080635598245, "grad_norm": 1.7163461818046193, "learning_rate": 4.9996590974955266e-05, "loss": 1.1217, "step": 1184 }, { "epoch": 0.14051938811810744, "grad_norm": 1.576818259174689, "learning_rate": 4.9996551220811184e-05, "loss": 1.3079, "step": 1185 }, { "epoch": 0.14063796988023242, "grad_norm": 1.8157868535658144, "learning_rate": 4.9996511236229545e-05, "loss": 1.0443, "step": 1186 }, { "epoch": 0.1407565516423574, "grad_norm": 1.7552961924004054, "learning_rate": 4.9996471021210724e-05, "loss": 1.1268, "step": 1187 }, { "epoch": 0.14087513340448238, "grad_norm": 1.6686548181995204, "learning_rate": 4.99964305757551e-05, "loss": 1.1303, "step": 1188 }, { "epoch": 0.1409937151666074, "grad_norm": 1.6197950471484572, "learning_rate": 4.999638989986304e-05, "loss": 1.0167, "step": 1189 }, { "epoch": 0.14111229692873237, "grad_norm": 1.732361576161734, "learning_rate": 4.999634899353491e-05, "loss": 0.9718, "step": 1190 }, { "epoch": 0.14123087869085735, "grad_norm": 2.066262402635185, "learning_rate": 4.9996307856771096e-05, "loss": 1.2907, "step": 1191 }, { "epoch": 0.14134946045298233, "grad_norm": 1.793716343723841, "learning_rate": 4.999626648957198e-05, "loss": 1.0441, "step": 1192 }, { "epoch": 0.1414680422151073, "grad_norm": 1.8863311630968926, "learning_rate": 4.999622489193794e-05, "loss": 1.3592, "step": 1193 }, { "epoch": 0.1415866239772323, "grad_norm": 1.7735250385507957, "learning_rate": 4.9996183063869355e-05, "loss": 0.9285, "step": 1194 }, { "epoch": 0.1417052057393573, "grad_norm": 1.756861961412436, "learning_rate": 4.999614100536662e-05, "loss": 1.2972, "step": 1195 }, { "epoch": 0.14182378750148228, "grad_norm": 1.6064191847559832, "learning_rate": 4.9996098716430104e-05, "loss": 1.1341, "step": 1196 }, { "epoch": 0.14194236926360726, "grad_norm": 1.6956565278627764, "learning_rate": 4.999605619706022e-05, "loss": 0.9694, "step": 1197 }, { "epoch": 0.14206095102573224, "grad_norm": 1.6832988393402268, "learning_rate": 4.999601344725735e-05, "loss": 1.177, "step": 1198 }, { "epoch": 0.14217953278785722, "grad_norm": 1.8715433487081345, "learning_rate": 4.999597046702189e-05, "loss": 1.1384, "step": 1199 }, { "epoch": 0.1422981145499822, "grad_norm": 1.8804678595689712, "learning_rate": 4.999592725635424e-05, "loss": 1.202, "step": 1200 }, { "epoch": 0.1424166963121072, "grad_norm": 1.5527524736543963, "learning_rate": 4.999588381525478e-05, "loss": 0.7849, "step": 1201 }, { "epoch": 0.1425352780742322, "grad_norm": 1.6428617808559762, "learning_rate": 4.999584014372393e-05, "loss": 0.8942, "step": 1202 }, { "epoch": 0.14265385983635717, "grad_norm": 1.545120757575061, "learning_rate": 4.9995796241762084e-05, "loss": 1.1661, "step": 1203 }, { "epoch": 0.14277244159848215, "grad_norm": 1.7755186816207469, "learning_rate": 4.999575210936965e-05, "loss": 1.309, "step": 1204 }, { "epoch": 0.14289102336060713, "grad_norm": 1.5555418872843174, "learning_rate": 4.999570774654703e-05, "loss": 1.0607, "step": 1205 }, { "epoch": 0.14300960512273211, "grad_norm": 1.902952478040249, "learning_rate": 4.999566315329464e-05, "loss": 1.2914, "step": 1206 }, { "epoch": 0.14312818688485712, "grad_norm": 1.7056092733989878, "learning_rate": 4.999561832961288e-05, "loss": 1.1355, "step": 1207 }, { "epoch": 0.1432467686469821, "grad_norm": 1.6053042385025378, "learning_rate": 4.999557327550218e-05, "loss": 1.2679, "step": 1208 }, { "epoch": 0.14336535040910708, "grad_norm": 1.5607468083661529, "learning_rate": 4.999552799096294e-05, "loss": 1.1363, "step": 1209 }, { "epoch": 0.14348393217123206, "grad_norm": 1.9027257314605164, "learning_rate": 4.999548247599559e-05, "loss": 1.3458, "step": 1210 }, { "epoch": 0.14360251393335705, "grad_norm": 1.7321633169997943, "learning_rate": 4.9995436730600534e-05, "loss": 1.3314, "step": 1211 }, { "epoch": 0.14372109569548203, "grad_norm": 1.499493316242706, "learning_rate": 4.999539075477821e-05, "loss": 1.1102, "step": 1212 }, { "epoch": 0.143839677457607, "grad_norm": 1.6018622171252828, "learning_rate": 4.9995344548529036e-05, "loss": 1.243, "step": 1213 }, { "epoch": 0.14395825921973202, "grad_norm": 1.5331922748009625, "learning_rate": 4.9995298111853425e-05, "loss": 1.0878, "step": 1214 }, { "epoch": 0.144076840981857, "grad_norm": 1.490080635786967, "learning_rate": 4.9995251444751825e-05, "loss": 1.1486, "step": 1215 }, { "epoch": 0.14419542274398198, "grad_norm": 1.5612252033418892, "learning_rate": 4.999520454722466e-05, "loss": 0.9337, "step": 1216 }, { "epoch": 0.14431400450610696, "grad_norm": 1.8738043685946761, "learning_rate": 4.999515741927235e-05, "loss": 1.1505, "step": 1217 }, { "epoch": 0.14443258626823194, "grad_norm": 1.766246009871135, "learning_rate": 4.999511006089536e-05, "loss": 1.3653, "step": 1218 }, { "epoch": 0.14455116803035692, "grad_norm": 1.8767318977122809, "learning_rate": 4.999506247209409e-05, "loss": 1.4427, "step": 1219 }, { "epoch": 0.14466974979248193, "grad_norm": 1.593975328322896, "learning_rate": 4.9995014652869e-05, "loss": 1.0437, "step": 1220 }, { "epoch": 0.1447883315546069, "grad_norm": 1.6004199974583742, "learning_rate": 4.999496660322052e-05, "loss": 1.0681, "step": 1221 }, { "epoch": 0.1449069133167319, "grad_norm": 1.7266539291336964, "learning_rate": 4.99949183231491e-05, "loss": 0.9899, "step": 1222 }, { "epoch": 0.14502549507885687, "grad_norm": 1.861815187600703, "learning_rate": 4.999486981265519e-05, "loss": 1.2356, "step": 1223 }, { "epoch": 0.14514407684098185, "grad_norm": 1.4483260694883633, "learning_rate": 4.999482107173923e-05, "loss": 1.2101, "step": 1224 }, { "epoch": 0.14526265860310683, "grad_norm": 1.6195627653520417, "learning_rate": 4.999477210040166e-05, "loss": 1.1378, "step": 1225 }, { "epoch": 0.14538124036523184, "grad_norm": 1.8762253817057535, "learning_rate": 4.9994722898642954e-05, "loss": 1.0455, "step": 1226 }, { "epoch": 0.14549982212735682, "grad_norm": 1.6191483027767335, "learning_rate": 4.9994673466463555e-05, "loss": 1.0258, "step": 1227 }, { "epoch": 0.1456184038894818, "grad_norm": 1.875887775892771, "learning_rate": 4.9994623803863913e-05, "loss": 1.2244, "step": 1228 }, { "epoch": 0.14573698565160678, "grad_norm": 1.9528563844228284, "learning_rate": 4.999457391084449e-05, "loss": 1.2241, "step": 1229 }, { "epoch": 0.14585556741373176, "grad_norm": 2.454791627347521, "learning_rate": 4.999452378740574e-05, "loss": 1.3756, "step": 1230 }, { "epoch": 0.14597414917585674, "grad_norm": 1.685031826684907, "learning_rate": 4.999447343354814e-05, "loss": 1.3783, "step": 1231 }, { "epoch": 0.14609273093798175, "grad_norm": 1.7289974628520888, "learning_rate": 4.999442284927214e-05, "loss": 1.198, "step": 1232 }, { "epoch": 0.14621131270010673, "grad_norm": 1.667868317618341, "learning_rate": 4.999437203457821e-05, "loss": 1.1139, "step": 1233 }, { "epoch": 0.1463298944622317, "grad_norm": 1.4831022830691543, "learning_rate": 4.999432098946683e-05, "loss": 0.9182, "step": 1234 }, { "epoch": 0.1464484762243567, "grad_norm": 1.6292358001358807, "learning_rate": 4.999426971393845e-05, "loss": 1.1391, "step": 1235 }, { "epoch": 0.14656705798648167, "grad_norm": 1.8952586846307453, "learning_rate": 4.999421820799355e-05, "loss": 0.9573, "step": 1236 }, { "epoch": 0.14668563974860666, "grad_norm": 1.7177120734797628, "learning_rate": 4.999416647163262e-05, "loss": 1.1769, "step": 1237 }, { "epoch": 0.14680422151073164, "grad_norm": 1.71426271180194, "learning_rate": 4.9994114504856114e-05, "loss": 1.3694, "step": 1238 }, { "epoch": 0.14692280327285664, "grad_norm": 1.770585262166228, "learning_rate": 4.9994062307664524e-05, "loss": 1.0823, "step": 1239 }, { "epoch": 0.14704138503498163, "grad_norm": 1.4668295248851515, "learning_rate": 4.9994009880058336e-05, "loss": 1.1306, "step": 1240 }, { "epoch": 0.1471599667971066, "grad_norm": 1.8994295369008685, "learning_rate": 4.9993957222038024e-05, "loss": 1.153, "step": 1241 }, { "epoch": 0.1472785485592316, "grad_norm": 2.534146900128462, "learning_rate": 4.9993904333604075e-05, "loss": 1.287, "step": 1242 }, { "epoch": 0.14739713032135657, "grad_norm": 1.6395031609579849, "learning_rate": 4.999385121475698e-05, "loss": 1.1112, "step": 1243 }, { "epoch": 0.14751571208348155, "grad_norm": 1.6593852805239584, "learning_rate": 4.999379786549722e-05, "loss": 1.0031, "step": 1244 }, { "epoch": 0.14763429384560656, "grad_norm": 1.684307673154832, "learning_rate": 4.9993744285825294e-05, "loss": 0.6741, "step": 1245 }, { "epoch": 0.14775287560773154, "grad_norm": 1.8788236046229874, "learning_rate": 4.999369047574171e-05, "loss": 1.2566, "step": 1246 }, { "epoch": 0.14787145736985652, "grad_norm": 1.832074938470291, "learning_rate": 4.9993636435246925e-05, "loss": 0.9454, "step": 1247 }, { "epoch": 0.1479900391319815, "grad_norm": 1.9481679343587273, "learning_rate": 4.999358216434148e-05, "loss": 1.264, "step": 1248 }, { "epoch": 0.14810862089410648, "grad_norm": 1.5833200533708982, "learning_rate": 4.999352766302585e-05, "loss": 1.2313, "step": 1249 }, { "epoch": 0.14822720265623146, "grad_norm": 1.765278360808599, "learning_rate": 4.999347293130055e-05, "loss": 1.4331, "step": 1250 }, { "epoch": 0.14834578441835647, "grad_norm": 1.5132943499508853, "learning_rate": 4.999341796916607e-05, "loss": 1.1507, "step": 1251 }, { "epoch": 0.14846436618048145, "grad_norm": 1.638414784503229, "learning_rate": 4.999336277662292e-05, "loss": 1.1789, "step": 1252 }, { "epoch": 0.14858294794260643, "grad_norm": 1.5462375596262405, "learning_rate": 4.999330735367163e-05, "loss": 1.0959, "step": 1253 }, { "epoch": 0.1487015297047314, "grad_norm": 1.8943635083491603, "learning_rate": 4.999325170031268e-05, "loss": 1.3259, "step": 1254 }, { "epoch": 0.1488201114668564, "grad_norm": 1.5685533536017315, "learning_rate": 4.9993195816546606e-05, "loss": 1.4312, "step": 1255 }, { "epoch": 0.14893869322898137, "grad_norm": 1.7391159423665425, "learning_rate": 4.999313970237392e-05, "loss": 0.9545, "step": 1256 }, { "epoch": 0.14905727499110638, "grad_norm": 1.518565995609839, "learning_rate": 4.999308335779512e-05, "loss": 1.0849, "step": 1257 }, { "epoch": 0.14917585675323136, "grad_norm": 1.575538375423931, "learning_rate": 4.999302678281075e-05, "loss": 1.1261, "step": 1258 }, { "epoch": 0.14929443851535634, "grad_norm": 1.6499055699179195, "learning_rate": 4.9992969977421326e-05, "loss": 1.232, "step": 1259 }, { "epoch": 0.14941302027748132, "grad_norm": 1.8399268893507483, "learning_rate": 4.999291294162736e-05, "loss": 1.1835, "step": 1260 }, { "epoch": 0.1495316020396063, "grad_norm": 1.8902854118282284, "learning_rate": 4.999285567542938e-05, "loss": 1.2072, "step": 1261 }, { "epoch": 0.14965018380173128, "grad_norm": 1.5635735692501445, "learning_rate": 4.9992798178827935e-05, "loss": 1.1974, "step": 1262 }, { "epoch": 0.14976876556385627, "grad_norm": 1.4881035857644036, "learning_rate": 4.9992740451823525e-05, "loss": 1.0461, "step": 1263 }, { "epoch": 0.14988734732598127, "grad_norm": 1.619204907852576, "learning_rate": 4.99926824944167e-05, "loss": 1.123, "step": 1264 }, { "epoch": 0.15000592908810625, "grad_norm": 1.5595905844771718, "learning_rate": 4.9992624306607996e-05, "loss": 0.7769, "step": 1265 }, { "epoch": 0.15012451085023124, "grad_norm": 1.6648495040578173, "learning_rate": 4.9992565888397934e-05, "loss": 1.0967, "step": 1266 }, { "epoch": 0.15024309261235622, "grad_norm": 1.7292098187813407, "learning_rate": 4.999250723978707e-05, "loss": 0.9824, "step": 1267 }, { "epoch": 0.1503616743744812, "grad_norm": 1.6001301790444746, "learning_rate": 4.9992448360775934e-05, "loss": 1.0827, "step": 1268 }, { "epoch": 0.15048025613660618, "grad_norm": 2.329032038747233, "learning_rate": 4.999238925136507e-05, "loss": 1.0457, "step": 1269 }, { "epoch": 0.15059883789873119, "grad_norm": 2.121790223281565, "learning_rate": 4.999232991155503e-05, "loss": 0.9609, "step": 1270 }, { "epoch": 0.15071741966085617, "grad_norm": 2.1223927317500637, "learning_rate": 4.999227034134635e-05, "loss": 1.0869, "step": 1271 }, { "epoch": 0.15083600142298115, "grad_norm": 1.4723309732232674, "learning_rate": 4.999221054073959e-05, "loss": 1.0952, "step": 1272 }, { "epoch": 0.15095458318510613, "grad_norm": 2.0059441102497986, "learning_rate": 4.9992150509735295e-05, "loss": 1.0492, "step": 1273 }, { "epoch": 0.1510731649472311, "grad_norm": 1.574135491265611, "learning_rate": 4.9992090248334025e-05, "loss": 1.0685, "step": 1274 }, { "epoch": 0.1511917467093561, "grad_norm": 1.5836557229593584, "learning_rate": 4.999202975653632e-05, "loss": 0.9973, "step": 1275 }, { "epoch": 0.1513103284714811, "grad_norm": 2.0161056400445423, "learning_rate": 4.9991969034342755e-05, "loss": 1.2552, "step": 1276 }, { "epoch": 0.15142891023360608, "grad_norm": 1.4780318902030296, "learning_rate": 4.999190808175388e-05, "loss": 1.1236, "step": 1277 }, { "epoch": 0.15154749199573106, "grad_norm": 2.085160808055643, "learning_rate": 4.999184689877026e-05, "loss": 1.2996, "step": 1278 }, { "epoch": 0.15166607375785604, "grad_norm": 1.4645509468166276, "learning_rate": 4.999178548539246e-05, "loss": 1.0603, "step": 1279 }, { "epoch": 0.15178465551998102, "grad_norm": 2.300259013177836, "learning_rate": 4.999172384162104e-05, "loss": 1.4209, "step": 1280 }, { "epoch": 0.151903237282106, "grad_norm": 1.7191836693698612, "learning_rate": 4.999166196745657e-05, "loss": 1.0689, "step": 1281 }, { "epoch": 0.152021819044231, "grad_norm": 1.8953768239030517, "learning_rate": 4.999159986289963e-05, "loss": 0.9715, "step": 1282 }, { "epoch": 0.152140400806356, "grad_norm": 1.5667141784888319, "learning_rate": 4.999153752795079e-05, "loss": 1.0007, "step": 1283 }, { "epoch": 0.15225898256848097, "grad_norm": 1.8265074488912159, "learning_rate": 4.999147496261062e-05, "loss": 1.0861, "step": 1284 }, { "epoch": 0.15237756433060595, "grad_norm": 1.610189090899638, "learning_rate": 4.999141216687969e-05, "loss": 1.1591, "step": 1285 }, { "epoch": 0.15249614609273093, "grad_norm": 1.547954513498484, "learning_rate": 4.999134914075859e-05, "loss": 1.1521, "step": 1286 }, { "epoch": 0.1526147278548559, "grad_norm": 1.7512618319061934, "learning_rate": 4.99912858842479e-05, "loss": 1.1999, "step": 1287 }, { "epoch": 0.1527333096169809, "grad_norm": 1.582230793080285, "learning_rate": 4.9991222397348194e-05, "loss": 1.0295, "step": 1288 }, { "epoch": 0.1528518913791059, "grad_norm": 1.7510589960283165, "learning_rate": 4.999115868006007e-05, "loss": 1.1902, "step": 1289 }, { "epoch": 0.15297047314123088, "grad_norm": 1.8049958783819773, "learning_rate": 4.9991094732384104e-05, "loss": 0.9099, "step": 1290 }, { "epoch": 0.15308905490335586, "grad_norm": 1.5267877574714108, "learning_rate": 4.99910305543209e-05, "loss": 0.8774, "step": 1291 }, { "epoch": 0.15320763666548085, "grad_norm": 1.6976143638780485, "learning_rate": 4.9990966145871023e-05, "loss": 1.3123, "step": 1292 }, { "epoch": 0.15332621842760583, "grad_norm": 1.792778435355543, "learning_rate": 4.99909015070351e-05, "loss": 1.1311, "step": 1293 }, { "epoch": 0.1534448001897308, "grad_norm": 1.5252738558389507, "learning_rate": 4.99908366378137e-05, "loss": 1.007, "step": 1294 }, { "epoch": 0.15356338195185582, "grad_norm": 1.5763695995571658, "learning_rate": 4.9990771538207435e-05, "loss": 1.081, "step": 1295 }, { "epoch": 0.1536819637139808, "grad_norm": 1.826054469333273, "learning_rate": 4.9990706208216906e-05, "loss": 0.925, "step": 1296 }, { "epoch": 0.15380054547610578, "grad_norm": 1.7731372862635013, "learning_rate": 4.999064064784271e-05, "loss": 1.3251, "step": 1297 }, { "epoch": 0.15391912723823076, "grad_norm": 1.9228675927892496, "learning_rate": 4.999057485708546e-05, "loss": 1.2426, "step": 1298 }, { "epoch": 0.15403770900035574, "grad_norm": 1.7422907778919667, "learning_rate": 4.999050883594575e-05, "loss": 1.0248, "step": 1299 }, { "epoch": 0.15415629076248072, "grad_norm": 1.5990375363783687, "learning_rate": 4.999044258442419e-05, "loss": 1.2214, "step": 1300 }, { "epoch": 0.15427487252460573, "grad_norm": 1.6756229929557358, "learning_rate": 4.9990376102521394e-05, "loss": 1.0051, "step": 1301 }, { "epoch": 0.1543934542867307, "grad_norm": 1.4674982656206077, "learning_rate": 4.999030939023798e-05, "loss": 1.0562, "step": 1302 }, { "epoch": 0.1545120360488557, "grad_norm": 1.4418958174552292, "learning_rate": 4.999024244757456e-05, "loss": 1.193, "step": 1303 }, { "epoch": 0.15463061781098067, "grad_norm": 1.8164264072967786, "learning_rate": 4.999017527453174e-05, "loss": 1.2013, "step": 1304 }, { "epoch": 0.15474919957310565, "grad_norm": 1.5694835313937379, "learning_rate": 4.999010787111016e-05, "loss": 0.9759, "step": 1305 }, { "epoch": 0.15486778133523063, "grad_norm": 1.54852895811122, "learning_rate": 4.999004023731042e-05, "loss": 1.3139, "step": 1306 }, { "epoch": 0.15498636309735564, "grad_norm": 1.4948946221920836, "learning_rate": 4.998997237313316e-05, "loss": 1.2137, "step": 1307 }, { "epoch": 0.15510494485948062, "grad_norm": 1.8472444111636044, "learning_rate": 4.9989904278579004e-05, "loss": 0.9788, "step": 1308 }, { "epoch": 0.1552235266216056, "grad_norm": 1.672584990814677, "learning_rate": 4.998983595364857e-05, "loss": 1.1889, "step": 1309 }, { "epoch": 0.15534210838373058, "grad_norm": 1.6549177998243312, "learning_rate": 4.998976739834249e-05, "loss": 0.8923, "step": 1310 }, { "epoch": 0.15546069014585556, "grad_norm": 1.46146487362686, "learning_rate": 4.9989698612661405e-05, "loss": 0.7893, "step": 1311 }, { "epoch": 0.15557927190798054, "grad_norm": 1.7377509622463765, "learning_rate": 4.998962959660594e-05, "loss": 1.2051, "step": 1312 }, { "epoch": 0.15569785367010555, "grad_norm": 1.7403031131102804, "learning_rate": 4.998956035017673e-05, "loss": 0.7649, "step": 1313 }, { "epoch": 0.15581643543223053, "grad_norm": 2.0088239292121397, "learning_rate": 4.998949087337443e-05, "loss": 1.3126, "step": 1314 }, { "epoch": 0.1559350171943555, "grad_norm": 1.6978563988062945, "learning_rate": 4.998942116619966e-05, "loss": 0.898, "step": 1315 }, { "epoch": 0.1560535989564805, "grad_norm": 1.6525233218943678, "learning_rate": 4.998935122865307e-05, "loss": 0.8661, "step": 1316 }, { "epoch": 0.15617218071860547, "grad_norm": 1.5736867371926349, "learning_rate": 4.998928106073531e-05, "loss": 1.1599, "step": 1317 }, { "epoch": 0.15629076248073046, "grad_norm": 1.776632946810059, "learning_rate": 4.998921066244702e-05, "loss": 1.2149, "step": 1318 }, { "epoch": 0.15640934424285544, "grad_norm": 1.7595684570916508, "learning_rate": 4.998914003378885e-05, "loss": 1.0396, "step": 1319 }, { "epoch": 0.15652792600498044, "grad_norm": 1.6960609255284567, "learning_rate": 4.998906917476146e-05, "loss": 1.0262, "step": 1320 }, { "epoch": 0.15664650776710543, "grad_norm": 1.4378377791277002, "learning_rate": 4.9988998085365485e-05, "loss": 1.1506, "step": 1321 }, { "epoch": 0.1567650895292304, "grad_norm": 1.758680227858358, "learning_rate": 4.9988926765601595e-05, "loss": 1.2901, "step": 1322 }, { "epoch": 0.1568836712913554, "grad_norm": 1.8502493514386316, "learning_rate": 4.9988855215470445e-05, "loss": 1.2058, "step": 1323 }, { "epoch": 0.15700225305348037, "grad_norm": 1.8381725506849718, "learning_rate": 4.9988783434972694e-05, "loss": 1.1684, "step": 1324 }, { "epoch": 0.15712083481560535, "grad_norm": 1.6551184548015327, "learning_rate": 4.9988711424109005e-05, "loss": 1.1479, "step": 1325 }, { "epoch": 0.15723941657773036, "grad_norm": 1.5375455606786526, "learning_rate": 4.9988639182880035e-05, "loss": 1.013, "step": 1326 }, { "epoch": 0.15735799833985534, "grad_norm": 1.8127577910221961, "learning_rate": 4.9988566711286456e-05, "loss": 1.0397, "step": 1327 }, { "epoch": 0.15747658010198032, "grad_norm": 1.7400170529434082, "learning_rate": 4.998849400932894e-05, "loss": 1.1312, "step": 1328 }, { "epoch": 0.1575951618641053, "grad_norm": 1.5720575633215734, "learning_rate": 4.9988421077008144e-05, "loss": 1.1339, "step": 1329 }, { "epoch": 0.15771374362623028, "grad_norm": 1.616725737387187, "learning_rate": 4.9988347914324754e-05, "loss": 1.0865, "step": 1330 }, { "epoch": 0.15783232538835526, "grad_norm": 1.846202621900896, "learning_rate": 4.998827452127944e-05, "loss": 0.9726, "step": 1331 }, { "epoch": 0.15795090715048027, "grad_norm": 1.4488651346871844, "learning_rate": 4.998820089787287e-05, "loss": 0.9041, "step": 1332 }, { "epoch": 0.15806948891260525, "grad_norm": 1.9464798400163108, "learning_rate": 4.9988127044105735e-05, "loss": 0.9932, "step": 1333 }, { "epoch": 0.15818807067473023, "grad_norm": 1.7024723527878545, "learning_rate": 4.998805295997872e-05, "loss": 1.0388, "step": 1334 }, { "epoch": 0.1583066524368552, "grad_norm": 1.701304856457065, "learning_rate": 4.9987978645492485e-05, "loss": 1.2282, "step": 1335 }, { "epoch": 0.1584252341989802, "grad_norm": 1.7499374448902882, "learning_rate": 4.998790410064773e-05, "loss": 1.2021, "step": 1336 }, { "epoch": 0.15854381596110517, "grad_norm": 1.606080755997141, "learning_rate": 4.998782932544515e-05, "loss": 1.2471, "step": 1337 }, { "epoch": 0.15866239772323018, "grad_norm": 1.9367341909970133, "learning_rate": 4.998775431988542e-05, "loss": 1.0381, "step": 1338 }, { "epoch": 0.15878097948535516, "grad_norm": 2.558115803232956, "learning_rate": 4.998767908396924e-05, "loss": 1.2334, "step": 1339 }, { "epoch": 0.15889956124748014, "grad_norm": 1.7251439119382295, "learning_rate": 4.99876036176973e-05, "loss": 1.0612, "step": 1340 }, { "epoch": 0.15901814300960512, "grad_norm": 1.693088520139888, "learning_rate": 4.99875279210703e-05, "loss": 1.0109, "step": 1341 }, { "epoch": 0.1591367247717301, "grad_norm": 1.6539676170027573, "learning_rate": 4.998745199408893e-05, "loss": 1.1749, "step": 1342 }, { "epoch": 0.15925530653385508, "grad_norm": 1.9358835265570449, "learning_rate": 4.998737583675389e-05, "loss": 1.1675, "step": 1343 }, { "epoch": 0.15937388829598007, "grad_norm": 1.5526320915382463, "learning_rate": 4.998729944906589e-05, "loss": 1.271, "step": 1344 }, { "epoch": 0.15949247005810507, "grad_norm": 1.416996187632587, "learning_rate": 4.998722283102564e-05, "loss": 1.1774, "step": 1345 }, { "epoch": 0.15961105182023005, "grad_norm": 1.8451392660335684, "learning_rate": 4.998714598263382e-05, "loss": 1.2966, "step": 1346 }, { "epoch": 0.15972963358235504, "grad_norm": 1.6480227665248777, "learning_rate": 4.9987068903891166e-05, "loss": 1.0489, "step": 1347 }, { "epoch": 0.15984821534448002, "grad_norm": 1.6033367077826735, "learning_rate": 4.998699159479838e-05, "loss": 1.1511, "step": 1348 }, { "epoch": 0.159966797106605, "grad_norm": 1.5169813261234948, "learning_rate": 4.9986914055356164e-05, "loss": 1.1875, "step": 1349 }, { "epoch": 0.16008537886872998, "grad_norm": 1.4747467622712633, "learning_rate": 4.9986836285565245e-05, "loss": 0.8391, "step": 1350 }, { "epoch": 0.16020396063085499, "grad_norm": 1.655132936590586, "learning_rate": 4.998675828542634e-05, "loss": 0.9983, "step": 1351 }, { "epoch": 0.16032254239297997, "grad_norm": 1.7669111830096487, "learning_rate": 4.998668005494016e-05, "loss": 0.9647, "step": 1352 }, { "epoch": 0.16044112415510495, "grad_norm": 2.1959404382497616, "learning_rate": 4.998660159410743e-05, "loss": 0.9244, "step": 1353 }, { "epoch": 0.16055970591722993, "grad_norm": 2.279622871669324, "learning_rate": 4.9986522902928875e-05, "loss": 1.2936, "step": 1354 }, { "epoch": 0.1606782876793549, "grad_norm": 2.5419759825599613, "learning_rate": 4.998644398140522e-05, "loss": 1.2477, "step": 1355 }, { "epoch": 0.1607968694414799, "grad_norm": 1.885575999643548, "learning_rate": 4.998636482953719e-05, "loss": 1.1913, "step": 1356 }, { "epoch": 0.1609154512036049, "grad_norm": 1.4529551505269742, "learning_rate": 4.998628544732552e-05, "loss": 0.7107, "step": 1357 }, { "epoch": 0.16103403296572988, "grad_norm": 1.470745515845118, "learning_rate": 4.998620583477094e-05, "loss": 1.0223, "step": 1358 }, { "epoch": 0.16115261472785486, "grad_norm": 1.5322377747313256, "learning_rate": 4.998612599187418e-05, "loss": 1.18, "step": 1359 }, { "epoch": 0.16127119648997984, "grad_norm": 2.0058627224604413, "learning_rate": 4.998604591863598e-05, "loss": 1.0544, "step": 1360 }, { "epoch": 0.16138977825210482, "grad_norm": 1.4782689787274887, "learning_rate": 4.9985965615057074e-05, "loss": 1.1748, "step": 1361 }, { "epoch": 0.1615083600142298, "grad_norm": 1.6602189463986323, "learning_rate": 4.99858850811382e-05, "loss": 1.1235, "step": 1362 }, { "epoch": 0.1616269417763548, "grad_norm": 1.4690540337794304, "learning_rate": 4.998580431688011e-05, "loss": 0.943, "step": 1363 }, { "epoch": 0.1617455235384798, "grad_norm": 1.5618257016045878, "learning_rate": 4.998572332228354e-05, "loss": 1.0188, "step": 1364 }, { "epoch": 0.16186410530060477, "grad_norm": 1.6128620612067248, "learning_rate": 4.998564209734925e-05, "loss": 1.1391, "step": 1365 }, { "epoch": 0.16198268706272975, "grad_norm": 1.6236778085614243, "learning_rate": 4.998556064207798e-05, "loss": 0.8928, "step": 1366 }, { "epoch": 0.16210126882485473, "grad_norm": 1.7596560704096995, "learning_rate": 4.998547895647047e-05, "loss": 1.2288, "step": 1367 }, { "epoch": 0.1622198505869797, "grad_norm": 1.718194006170096, "learning_rate": 4.998539704052749e-05, "loss": 0.9102, "step": 1368 }, { "epoch": 0.1623384323491047, "grad_norm": 1.6039558904898432, "learning_rate": 4.998531489424978e-05, "loss": 0.9787, "step": 1369 }, { "epoch": 0.1624570141112297, "grad_norm": 1.7869624821254941, "learning_rate": 4.9985232517638115e-05, "loss": 1.0885, "step": 1370 }, { "epoch": 0.16257559587335468, "grad_norm": 1.7293962754362249, "learning_rate": 4.9985149910693244e-05, "loss": 1.1675, "step": 1371 }, { "epoch": 0.16269417763547966, "grad_norm": 1.8622520945426955, "learning_rate": 4.998506707341592e-05, "loss": 1.198, "step": 1372 }, { "epoch": 0.16281275939760464, "grad_norm": 1.8362667213437427, "learning_rate": 4.9984984005806925e-05, "loss": 1.0501, "step": 1373 }, { "epoch": 0.16293134115972963, "grad_norm": 1.9210347977058373, "learning_rate": 4.9984900707867016e-05, "loss": 1.0317, "step": 1374 }, { "epoch": 0.1630499229218546, "grad_norm": 1.7071580333555145, "learning_rate": 4.998481717959696e-05, "loss": 1.2073, "step": 1375 }, { "epoch": 0.16316850468397961, "grad_norm": 1.4449328645374082, "learning_rate": 4.998473342099753e-05, "loss": 0.8054, "step": 1376 }, { "epoch": 0.1632870864461046, "grad_norm": 1.6902855839068058, "learning_rate": 4.998464943206949e-05, "loss": 1.1447, "step": 1377 }, { "epoch": 0.16340566820822958, "grad_norm": 1.547117913099056, "learning_rate": 4.9984565212813626e-05, "loss": 0.9696, "step": 1378 }, { "epoch": 0.16352424997035456, "grad_norm": 1.7937492179941836, "learning_rate": 4.998448076323071e-05, "loss": 1.0036, "step": 1379 }, { "epoch": 0.16364283173247954, "grad_norm": 1.4696067729434532, "learning_rate": 4.998439608332152e-05, "loss": 0.7646, "step": 1380 }, { "epoch": 0.16376141349460452, "grad_norm": 1.6232357800223798, "learning_rate": 4.998431117308683e-05, "loss": 1.0554, "step": 1381 }, { "epoch": 0.16387999525672953, "grad_norm": 1.7774372565714536, "learning_rate": 4.9984226032527427e-05, "loss": 1.1452, "step": 1382 }, { "epoch": 0.1639985770188545, "grad_norm": 1.5122969230074477, "learning_rate": 4.9984140661644104e-05, "loss": 1.1687, "step": 1383 }, { "epoch": 0.1641171587809795, "grad_norm": 1.707824051786149, "learning_rate": 4.998405506043764e-05, "loss": 0.992, "step": 1384 }, { "epoch": 0.16423574054310447, "grad_norm": 1.7964291947552244, "learning_rate": 4.998396922890882e-05, "loss": 1.2336, "step": 1385 }, { "epoch": 0.16435432230522945, "grad_norm": 1.8507942404277464, "learning_rate": 4.9983883167058446e-05, "loss": 1.1822, "step": 1386 }, { "epoch": 0.16447290406735443, "grad_norm": 1.7083047860778546, "learning_rate": 4.998379687488731e-05, "loss": 1.0221, "step": 1387 }, { "epoch": 0.16459148582947944, "grad_norm": 1.8890501093404444, "learning_rate": 4.9983710352396194e-05, "loss": 1.1994, "step": 1388 }, { "epoch": 0.16471006759160442, "grad_norm": 1.6359330352054908, "learning_rate": 4.9983623599585916e-05, "loss": 0.814, "step": 1389 }, { "epoch": 0.1648286493537294, "grad_norm": 1.8083874461192206, "learning_rate": 4.998353661645726e-05, "loss": 1.2825, "step": 1390 }, { "epoch": 0.16494723111585438, "grad_norm": 1.4181332793894104, "learning_rate": 4.998344940301104e-05, "loss": 0.9403, "step": 1391 }, { "epoch": 0.16506581287797936, "grad_norm": 1.7019727168625205, "learning_rate": 4.998336195924804e-05, "loss": 1.1425, "step": 1392 }, { "epoch": 0.16518439464010434, "grad_norm": 1.5952923275470947, "learning_rate": 4.9983274285169093e-05, "loss": 1.0634, "step": 1393 }, { "epoch": 0.16530297640222932, "grad_norm": 1.8433030883516914, "learning_rate": 4.9983186380774996e-05, "loss": 1.2255, "step": 1394 }, { "epoch": 0.16542155816435433, "grad_norm": 1.8219505592892096, "learning_rate": 4.998309824606654e-05, "loss": 1.1347, "step": 1395 }, { "epoch": 0.1655401399264793, "grad_norm": 1.755335266754154, "learning_rate": 4.998300988104456e-05, "loss": 1.0888, "step": 1396 }, { "epoch": 0.1656587216886043, "grad_norm": 1.675502777760315, "learning_rate": 4.9982921285709874e-05, "loss": 1.0251, "step": 1397 }, { "epoch": 0.16577730345072927, "grad_norm": 1.6596941761025816, "learning_rate": 4.998283246006329e-05, "loss": 1.1501, "step": 1398 }, { "epoch": 0.16589588521285425, "grad_norm": 1.4645756874087867, "learning_rate": 4.998274340410561e-05, "loss": 1.2798, "step": 1399 }, { "epoch": 0.16601446697497924, "grad_norm": 1.5820250620259302, "learning_rate": 4.998265411783769e-05, "loss": 1.1002, "step": 1400 }, { "epoch": 0.16613304873710424, "grad_norm": 1.4078825987526167, "learning_rate": 4.998256460126033e-05, "loss": 0.9054, "step": 1401 }, { "epoch": 0.16625163049922922, "grad_norm": 1.5243710760865883, "learning_rate": 4.998247485437436e-05, "loss": 0.9966, "step": 1402 }, { "epoch": 0.1663702122613542, "grad_norm": 1.6470781801332368, "learning_rate": 4.99823848771806e-05, "loss": 0.9787, "step": 1403 }, { "epoch": 0.1664887940234792, "grad_norm": 1.5889052541175233, "learning_rate": 4.998229466967989e-05, "loss": 1.0884, "step": 1404 }, { "epoch": 0.16660737578560417, "grad_norm": 1.3704529808819026, "learning_rate": 4.998220423187306e-05, "loss": 1.2483, "step": 1405 }, { "epoch": 0.16672595754772915, "grad_norm": 1.927980647407345, "learning_rate": 4.9982113563760945e-05, "loss": 1.2073, "step": 1406 }, { "epoch": 0.16684453930985416, "grad_norm": 1.5410154330439827, "learning_rate": 4.998202266534438e-05, "loss": 1.0818, "step": 1407 }, { "epoch": 0.16696312107197914, "grad_norm": 1.402050506527795, "learning_rate": 4.998193153662419e-05, "loss": 1.1509, "step": 1408 }, { "epoch": 0.16708170283410412, "grad_norm": 1.4241215245509102, "learning_rate": 4.998184017760123e-05, "loss": 1.1866, "step": 1409 }, { "epoch": 0.1672002845962291, "grad_norm": 1.5441533052492682, "learning_rate": 4.998174858827634e-05, "loss": 1.1454, "step": 1410 }, { "epoch": 0.16731886635835408, "grad_norm": 1.5800478469216788, "learning_rate": 4.998165676865037e-05, "loss": 0.8861, "step": 1411 }, { "epoch": 0.16743744812047906, "grad_norm": 1.5705578617296188, "learning_rate": 4.998156471872415e-05, "loss": 1.0652, "step": 1412 }, { "epoch": 0.16755602988260407, "grad_norm": 1.4863429599215927, "learning_rate": 4.998147243849855e-05, "loss": 1.1484, "step": 1413 }, { "epoch": 0.16767461164472905, "grad_norm": 1.6347150361636131, "learning_rate": 4.998137992797439e-05, "loss": 1.0709, "step": 1414 }, { "epoch": 0.16779319340685403, "grad_norm": 1.5368943593595368, "learning_rate": 4.9981287187152546e-05, "loss": 1.1044, "step": 1415 }, { "epoch": 0.167911775168979, "grad_norm": 1.6502131474726844, "learning_rate": 4.9981194216033875e-05, "loss": 1.1846, "step": 1416 }, { "epoch": 0.168030356931104, "grad_norm": 1.9846678863048601, "learning_rate": 4.9981101014619224e-05, "loss": 0.884, "step": 1417 }, { "epoch": 0.16814893869322897, "grad_norm": 1.3600737258061761, "learning_rate": 4.998100758290946e-05, "loss": 0.7511, "step": 1418 }, { "epoch": 0.16826752045535395, "grad_norm": 1.5691300525267515, "learning_rate": 4.998091392090544e-05, "loss": 1.199, "step": 1419 }, { "epoch": 0.16838610221747896, "grad_norm": 1.7665247104005, "learning_rate": 4.9980820028608025e-05, "loss": 1.1579, "step": 1420 }, { "epoch": 0.16850468397960394, "grad_norm": 1.6733398921950076, "learning_rate": 4.9980725906018074e-05, "loss": 0.9913, "step": 1421 }, { "epoch": 0.16862326574172892, "grad_norm": 1.7739747092382598, "learning_rate": 4.998063155313647e-05, "loss": 1.2746, "step": 1422 }, { "epoch": 0.1687418475038539, "grad_norm": 1.682865054549608, "learning_rate": 4.998053696996408e-05, "loss": 1.13, "step": 1423 }, { "epoch": 0.16886042926597888, "grad_norm": 1.8069592696566756, "learning_rate": 4.998044215650177e-05, "loss": 1.1755, "step": 1424 }, { "epoch": 0.16897901102810386, "grad_norm": 1.6273140479001253, "learning_rate": 4.9980347112750414e-05, "loss": 0.8599, "step": 1425 }, { "epoch": 0.16909759279022887, "grad_norm": 1.5384645308976084, "learning_rate": 4.998025183871089e-05, "loss": 1.0985, "step": 1426 }, { "epoch": 0.16921617455235385, "grad_norm": 1.7328047000606086, "learning_rate": 4.9980156334384084e-05, "loss": 1.1288, "step": 1427 }, { "epoch": 0.16933475631447883, "grad_norm": 1.5348274356762661, "learning_rate": 4.998006059977086e-05, "loss": 1.0162, "step": 1428 }, { "epoch": 0.16945333807660382, "grad_norm": 1.653035063915359, "learning_rate": 4.997996463487212e-05, "loss": 0.9438, "step": 1429 }, { "epoch": 0.1695719198387288, "grad_norm": 1.619869964671937, "learning_rate": 4.997986843968873e-05, "loss": 1.0679, "step": 1430 }, { "epoch": 0.16969050160085378, "grad_norm": 1.7021100950289887, "learning_rate": 4.997977201422159e-05, "loss": 1.1952, "step": 1431 }, { "epoch": 0.16980908336297879, "grad_norm": 1.7110899386599807, "learning_rate": 4.997967535847158e-05, "loss": 1.0525, "step": 1432 }, { "epoch": 0.16992766512510377, "grad_norm": 1.8266389853219662, "learning_rate": 4.99795784724396e-05, "loss": 1.0271, "step": 1433 }, { "epoch": 0.17004624688722875, "grad_norm": 1.8332102111802118, "learning_rate": 4.997948135612653e-05, "loss": 0.8833, "step": 1434 }, { "epoch": 0.17016482864935373, "grad_norm": 1.6140013024256237, "learning_rate": 4.997938400953328e-05, "loss": 1.1543, "step": 1435 }, { "epoch": 0.1702834104114787, "grad_norm": 1.6583537380804787, "learning_rate": 4.997928643266074e-05, "loss": 1.1544, "step": 1436 }, { "epoch": 0.1704019921736037, "grad_norm": 1.9182840917386577, "learning_rate": 4.9979188625509814e-05, "loss": 0.8248, "step": 1437 }, { "epoch": 0.1705205739357287, "grad_norm": 1.7287301114514082, "learning_rate": 4.99790905880814e-05, "loss": 1.2935, "step": 1438 }, { "epoch": 0.17063915569785368, "grad_norm": 1.5761391017594453, "learning_rate": 4.99789923203764e-05, "loss": 1.0135, "step": 1439 }, { "epoch": 0.17075773745997866, "grad_norm": 1.6950791739319335, "learning_rate": 4.9978893822395724e-05, "loss": 1.142, "step": 1440 }, { "epoch": 0.17087631922210364, "grad_norm": 1.44275203338743, "learning_rate": 4.9978795094140275e-05, "loss": 0.7813, "step": 1441 }, { "epoch": 0.17099490098422862, "grad_norm": 1.7452004002603028, "learning_rate": 4.997869613561097e-05, "loss": 1.1846, "step": 1442 }, { "epoch": 0.1711134827463536, "grad_norm": 1.4030538144467086, "learning_rate": 4.997859694680871e-05, "loss": 1.1223, "step": 1443 }, { "epoch": 0.1712320645084786, "grad_norm": 1.4818477284841778, "learning_rate": 4.9978497527734426e-05, "loss": 0.908, "step": 1444 }, { "epoch": 0.1713506462706036, "grad_norm": 1.5831523565636467, "learning_rate": 4.9978397878389024e-05, "loss": 1.0888, "step": 1445 }, { "epoch": 0.17146922803272857, "grad_norm": 1.636573204338768, "learning_rate": 4.997829799877342e-05, "loss": 1.1432, "step": 1446 }, { "epoch": 0.17158780979485355, "grad_norm": 1.4279258283797855, "learning_rate": 4.997819788888854e-05, "loss": 0.9166, "step": 1447 }, { "epoch": 0.17170639155697853, "grad_norm": 1.5964752851183637, "learning_rate": 4.9978097548735306e-05, "loss": 1.1063, "step": 1448 }, { "epoch": 0.1718249733191035, "grad_norm": 1.7517647870587683, "learning_rate": 4.997799697831464e-05, "loss": 1.1025, "step": 1449 }, { "epoch": 0.1719435550812285, "grad_norm": 1.380213447604419, "learning_rate": 4.997789617762748e-05, "loss": 0.7001, "step": 1450 }, { "epoch": 0.1720621368433535, "grad_norm": 1.8734690070113083, "learning_rate": 4.997779514667474e-05, "loss": 1.2019, "step": 1451 }, { "epoch": 0.17218071860547848, "grad_norm": 1.8192272605829554, "learning_rate": 4.997769388545736e-05, "loss": 1.1731, "step": 1452 }, { "epoch": 0.17229930036760346, "grad_norm": 1.775085065519624, "learning_rate": 4.9977592393976266e-05, "loss": 1.2824, "step": 1453 }, { "epoch": 0.17241788212972844, "grad_norm": 2.265627287758425, "learning_rate": 4.997749067223241e-05, "loss": 1.3224, "step": 1454 }, { "epoch": 0.17253646389185343, "grad_norm": 2.064325840663082, "learning_rate": 4.997738872022672e-05, "loss": 1.1425, "step": 1455 }, { "epoch": 0.1726550456539784, "grad_norm": 1.6187393667641785, "learning_rate": 4.9977286537960134e-05, "loss": 0.8909, "step": 1456 }, { "epoch": 0.17277362741610341, "grad_norm": 1.9316235828255726, "learning_rate": 4.997718412543358e-05, "loss": 1.0881, "step": 1457 }, { "epoch": 0.1728922091782284, "grad_norm": 1.799445331389091, "learning_rate": 4.9977081482648034e-05, "loss": 1.1179, "step": 1458 }, { "epoch": 0.17301079094035338, "grad_norm": 1.6134467133889536, "learning_rate": 4.997697860960443e-05, "loss": 1.0122, "step": 1459 }, { "epoch": 0.17312937270247836, "grad_norm": 1.6234587648135672, "learning_rate": 4.99768755063037e-05, "loss": 1.0887, "step": 1460 }, { "epoch": 0.17324795446460334, "grad_norm": 1.4403799320594506, "learning_rate": 4.997677217274681e-05, "loss": 1.4116, "step": 1461 }, { "epoch": 0.17336653622672832, "grad_norm": 1.482711243411262, "learning_rate": 4.997666860893471e-05, "loss": 1.2614, "step": 1462 }, { "epoch": 0.17348511798885333, "grad_norm": 1.332450678599432, "learning_rate": 4.9976564814868355e-05, "loss": 0.9241, "step": 1463 }, { "epoch": 0.1736036997509783, "grad_norm": 1.6241224258990767, "learning_rate": 4.997646079054869e-05, "loss": 1.0423, "step": 1464 }, { "epoch": 0.1737222815131033, "grad_norm": 1.3316483396682481, "learning_rate": 4.99763565359767e-05, "loss": 1.1342, "step": 1465 }, { "epoch": 0.17384086327522827, "grad_norm": 1.2856547831546101, "learning_rate": 4.997625205115332e-05, "loss": 1.0796, "step": 1466 }, { "epoch": 0.17395944503735325, "grad_norm": 1.4410122889544619, "learning_rate": 4.997614733607953e-05, "loss": 1.17, "step": 1467 }, { "epoch": 0.17407802679947823, "grad_norm": 1.558541533942193, "learning_rate": 4.997604239075629e-05, "loss": 0.861, "step": 1468 }, { "epoch": 0.17419660856160324, "grad_norm": 1.4902226374816268, "learning_rate": 4.9975937215184565e-05, "loss": 0.8577, "step": 1469 }, { "epoch": 0.17431519032372822, "grad_norm": 1.841154795727841, "learning_rate": 4.9975831809365326e-05, "loss": 1.2982, "step": 1470 }, { "epoch": 0.1744337720858532, "grad_norm": 1.5465443159511676, "learning_rate": 4.997572617329954e-05, "loss": 1.161, "step": 1471 }, { "epoch": 0.17455235384797818, "grad_norm": 1.5677916292699619, "learning_rate": 4.997562030698819e-05, "loss": 1.0781, "step": 1472 }, { "epoch": 0.17467093561010316, "grad_norm": 2.01829886524247, "learning_rate": 4.997551421043225e-05, "loss": 1.2409, "step": 1473 }, { "epoch": 0.17478951737222814, "grad_norm": 1.6911966877832518, "learning_rate": 4.9975407883632694e-05, "loss": 0.8492, "step": 1474 }, { "epoch": 0.17490809913435312, "grad_norm": 1.8276022629462294, "learning_rate": 4.9975301326590505e-05, "loss": 1.4104, "step": 1475 }, { "epoch": 0.17502668089647813, "grad_norm": 1.2870209023721173, "learning_rate": 4.997519453930667e-05, "loss": 1.0204, "step": 1476 }, { "epoch": 0.1751452626586031, "grad_norm": 1.8952367266962644, "learning_rate": 4.997508752178216e-05, "loss": 1.0593, "step": 1477 }, { "epoch": 0.1752638444207281, "grad_norm": 1.6375402616836958, "learning_rate": 4.997498027401797e-05, "loss": 1.1603, "step": 1478 }, { "epoch": 0.17538242618285307, "grad_norm": 1.694895787038009, "learning_rate": 4.9974872796015094e-05, "loss": 1.2543, "step": 1479 }, { "epoch": 0.17550100794497805, "grad_norm": 1.4015770953166906, "learning_rate": 4.997476508777451e-05, "loss": 0.966, "step": 1480 }, { "epoch": 0.17561958970710304, "grad_norm": 1.6679901695572588, "learning_rate": 4.9974657149297224e-05, "loss": 1.1412, "step": 1481 }, { "epoch": 0.17573817146922804, "grad_norm": 1.591288894090236, "learning_rate": 4.997454898058422e-05, "loss": 1.0461, "step": 1482 }, { "epoch": 0.17585675323135302, "grad_norm": 1.7695886916045607, "learning_rate": 4.99744405816365e-05, "loss": 1.1108, "step": 1483 }, { "epoch": 0.175975334993478, "grad_norm": 1.7972988499455775, "learning_rate": 4.9974331952455066e-05, "loss": 1.2997, "step": 1484 }, { "epoch": 0.17609391675560299, "grad_norm": 1.579948511635081, "learning_rate": 4.9974223093040914e-05, "loss": 1.2128, "step": 1485 }, { "epoch": 0.17621249851772797, "grad_norm": 1.7119639176786876, "learning_rate": 4.9974114003395055e-05, "loss": 1.3646, "step": 1486 }, { "epoch": 0.17633108027985295, "grad_norm": 1.3915413627293984, "learning_rate": 4.9974004683518486e-05, "loss": 1.0938, "step": 1487 }, { "epoch": 0.17644966204197796, "grad_norm": 1.5234884759219331, "learning_rate": 4.997389513341222e-05, "loss": 1.0556, "step": 1488 }, { "epoch": 0.17656824380410294, "grad_norm": 1.5575362725223327, "learning_rate": 4.997378535307727e-05, "loss": 1.1533, "step": 1489 }, { "epoch": 0.17668682556622792, "grad_norm": 1.5947918650552788, "learning_rate": 4.997367534251464e-05, "loss": 1.1162, "step": 1490 }, { "epoch": 0.1768054073283529, "grad_norm": 1.5685493577313225, "learning_rate": 4.997356510172535e-05, "loss": 1.1426, "step": 1491 }, { "epoch": 0.17692398909047788, "grad_norm": 1.6912356860419224, "learning_rate": 4.9973454630710415e-05, "loss": 1.0526, "step": 1492 }, { "epoch": 0.17704257085260286, "grad_norm": 1.5945533763197095, "learning_rate": 4.997334392947085e-05, "loss": 1.3125, "step": 1493 }, { "epoch": 0.17716115261472787, "grad_norm": 1.2198739588388656, "learning_rate": 4.997323299800768e-05, "loss": 0.695, "step": 1494 }, { "epoch": 0.17727973437685285, "grad_norm": 1.8101120795210968, "learning_rate": 4.9973121836321926e-05, "loss": 1.0565, "step": 1495 }, { "epoch": 0.17739831613897783, "grad_norm": 1.634643368357219, "learning_rate": 4.997301044441461e-05, "loss": 1.0169, "step": 1496 }, { "epoch": 0.1775168979011028, "grad_norm": 1.981421224236605, "learning_rate": 4.997289882228677e-05, "loss": 1.1409, "step": 1497 }, { "epoch": 0.1776354796632278, "grad_norm": 1.5375694100235646, "learning_rate": 4.997278696993942e-05, "loss": 0.8433, "step": 1498 }, { "epoch": 0.17775406142535277, "grad_norm": 1.9124195112188587, "learning_rate": 4.9972674887373595e-05, "loss": 0.9968, "step": 1499 }, { "epoch": 0.17787264318747775, "grad_norm": 1.8604747789840341, "learning_rate": 4.997256257459033e-05, "loss": 1.1898, "step": 1500 }, { "epoch": 0.17799122494960276, "grad_norm": 2.0490196148552973, "learning_rate": 4.997245003159067e-05, "loss": 1.2243, "step": 1501 }, { "epoch": 0.17810980671172774, "grad_norm": 1.5531823121979078, "learning_rate": 4.997233725837564e-05, "loss": 1.1142, "step": 1502 }, { "epoch": 0.17822838847385272, "grad_norm": 2.1710003866311265, "learning_rate": 4.9972224254946287e-05, "loss": 1.2255, "step": 1503 }, { "epoch": 0.1783469702359777, "grad_norm": 1.658832936724596, "learning_rate": 4.997211102130365e-05, "loss": 0.9675, "step": 1504 }, { "epoch": 0.17846555199810268, "grad_norm": 1.5609785327705152, "learning_rate": 4.9971997557448754e-05, "loss": 1.0465, "step": 1505 }, { "epoch": 0.17858413376022766, "grad_norm": 1.5189526883854214, "learning_rate": 4.9971883863382684e-05, "loss": 0.7537, "step": 1506 }, { "epoch": 0.17870271552235267, "grad_norm": 1.5552763545544326, "learning_rate": 4.997176993910646e-05, "loss": 0.9973, "step": 1507 }, { "epoch": 0.17882129728447765, "grad_norm": 1.5640200550146752, "learning_rate": 4.997165578462114e-05, "loss": 1.1428, "step": 1508 }, { "epoch": 0.17893987904660263, "grad_norm": 1.630544982785924, "learning_rate": 4.997154139992778e-05, "loss": 0.974, "step": 1509 }, { "epoch": 0.17905846080872762, "grad_norm": 1.8309397256139732, "learning_rate": 4.997142678502742e-05, "loss": 1.1002, "step": 1510 }, { "epoch": 0.1791770425708526, "grad_norm": 1.5067299008405102, "learning_rate": 4.997131193992114e-05, "loss": 1.0767, "step": 1511 }, { "epoch": 0.17929562433297758, "grad_norm": 1.908703220521912, "learning_rate": 4.9971196864609975e-05, "loss": 0.914, "step": 1512 }, { "epoch": 0.17941420609510259, "grad_norm": 1.595290521437111, "learning_rate": 4.9971081559095e-05, "loss": 0.9302, "step": 1513 }, { "epoch": 0.17953278785722757, "grad_norm": 1.8397020354562523, "learning_rate": 4.9970966023377276e-05, "loss": 0.9759, "step": 1514 }, { "epoch": 0.17965136961935255, "grad_norm": 1.5394998969824591, "learning_rate": 4.997085025745787e-05, "loss": 1.0322, "step": 1515 }, { "epoch": 0.17976995138147753, "grad_norm": 1.666277470371369, "learning_rate": 4.997073426133784e-05, "loss": 1.2071, "step": 1516 }, { "epoch": 0.1798885331436025, "grad_norm": 1.6803052962291969, "learning_rate": 4.997061803501826e-05, "loss": 1.1268, "step": 1517 }, { "epoch": 0.1800071149057275, "grad_norm": 1.7071040119875576, "learning_rate": 4.997050157850021e-05, "loss": 1.1063, "step": 1518 }, { "epoch": 0.1801256966678525, "grad_norm": 1.576905839128205, "learning_rate": 4.997038489178475e-05, "loss": 1.0089, "step": 1519 }, { "epoch": 0.18024427842997748, "grad_norm": 1.7541192620099566, "learning_rate": 4.997026797487296e-05, "loss": 1.1136, "step": 1520 }, { "epoch": 0.18036286019210246, "grad_norm": 1.337539222603322, "learning_rate": 4.9970150827765924e-05, "loss": 0.8104, "step": 1521 }, { "epoch": 0.18048144195422744, "grad_norm": 1.26884287581894, "learning_rate": 4.9970033450464726e-05, "loss": 0.9024, "step": 1522 }, { "epoch": 0.18060002371635242, "grad_norm": 1.639011493873352, "learning_rate": 4.9969915842970427e-05, "loss": 1.3009, "step": 1523 }, { "epoch": 0.1807186054784774, "grad_norm": 1.6740358964049336, "learning_rate": 4.996979800528413e-05, "loss": 1.3152, "step": 1524 }, { "epoch": 0.18083718724060238, "grad_norm": 1.7612752135538128, "learning_rate": 4.996967993740692e-05, "loss": 1.1132, "step": 1525 }, { "epoch": 0.1809557690027274, "grad_norm": 2.047014102194751, "learning_rate": 4.996956163933987e-05, "loss": 1.1026, "step": 1526 }, { "epoch": 0.18107435076485237, "grad_norm": 1.6109427837288615, "learning_rate": 4.996944311108408e-05, "loss": 0.9948, "step": 1527 }, { "epoch": 0.18119293252697735, "grad_norm": 1.4986031352082327, "learning_rate": 4.9969324352640655e-05, "loss": 1.1169, "step": 1528 }, { "epoch": 0.18131151428910233, "grad_norm": 1.4617013846761768, "learning_rate": 4.996920536401067e-05, "loss": 0.9764, "step": 1529 }, { "epoch": 0.1814300960512273, "grad_norm": 1.3788176100202822, "learning_rate": 4.996908614519524e-05, "loss": 1.0428, "step": 1530 }, { "epoch": 0.1815486778133523, "grad_norm": 1.7710026774067769, "learning_rate": 4.996896669619545e-05, "loss": 0.8615, "step": 1531 }, { "epoch": 0.1816672595754773, "grad_norm": 1.7100416230946485, "learning_rate": 4.996884701701241e-05, "loss": 1.0277, "step": 1532 }, { "epoch": 0.18178584133760228, "grad_norm": 1.663728112202658, "learning_rate": 4.996872710764721e-05, "loss": 0.9948, "step": 1533 }, { "epoch": 0.18190442309972726, "grad_norm": 1.7722070223271151, "learning_rate": 4.996860696810097e-05, "loss": 1.1701, "step": 1534 }, { "epoch": 0.18202300486185224, "grad_norm": 1.7150482641625249, "learning_rate": 4.99684865983748e-05, "loss": 1.0361, "step": 1535 }, { "epoch": 0.18214158662397723, "grad_norm": 1.7881928321923817, "learning_rate": 4.996836599846979e-05, "loss": 1.0756, "step": 1536 }, { "epoch": 0.1822601683861022, "grad_norm": 1.7257719944101488, "learning_rate": 4.996824516838707e-05, "loss": 1.1809, "step": 1537 }, { "epoch": 0.18237875014822721, "grad_norm": 1.961229936757753, "learning_rate": 4.9968124108127746e-05, "loss": 1.1316, "step": 1538 }, { "epoch": 0.1824973319103522, "grad_norm": 1.4424018846943025, "learning_rate": 4.996800281769293e-05, "loss": 0.9507, "step": 1539 }, { "epoch": 0.18261591367247718, "grad_norm": 2.0653052528596163, "learning_rate": 4.996788129708375e-05, "loss": 1.3239, "step": 1540 }, { "epoch": 0.18273449543460216, "grad_norm": 1.5385058486297811, "learning_rate": 4.996775954630133e-05, "loss": 0.9697, "step": 1541 }, { "epoch": 0.18285307719672714, "grad_norm": 1.6188812183821766, "learning_rate": 4.9967637565346774e-05, "loss": 1.1462, "step": 1542 }, { "epoch": 0.18297165895885212, "grad_norm": 1.460773623800915, "learning_rate": 4.996751535422122e-05, "loss": 1.1681, "step": 1543 }, { "epoch": 0.18309024072097713, "grad_norm": 1.5371762096946808, "learning_rate": 4.996739291292579e-05, "loss": 1.0415, "step": 1544 }, { "epoch": 0.1832088224831021, "grad_norm": 1.4937286727022654, "learning_rate": 4.996727024146162e-05, "loss": 1.2516, "step": 1545 }, { "epoch": 0.1833274042452271, "grad_norm": 1.6603403180655056, "learning_rate": 4.9967147339829835e-05, "loss": 1.1199, "step": 1546 }, { "epoch": 0.18344598600735207, "grad_norm": 1.4583604112193427, "learning_rate": 4.9967024208031566e-05, "loss": 1.3386, "step": 1547 }, { "epoch": 0.18356456776947705, "grad_norm": 1.5544086365430707, "learning_rate": 4.9966900846067944e-05, "loss": 1.0127, "step": 1548 }, { "epoch": 0.18368314953160203, "grad_norm": 1.5341383614114819, "learning_rate": 4.996677725394012e-05, "loss": 0.9757, "step": 1549 }, { "epoch": 0.183801731293727, "grad_norm": 1.5440330020968618, "learning_rate": 4.9966653431649225e-05, "loss": 1.0375, "step": 1550 }, { "epoch": 0.18392031305585202, "grad_norm": 1.6783147772359568, "learning_rate": 4.9966529379196406e-05, "loss": 1.0863, "step": 1551 }, { "epoch": 0.184038894817977, "grad_norm": 1.7130596092543076, "learning_rate": 4.996640509658279e-05, "loss": 1.164, "step": 1552 }, { "epoch": 0.18415747658010198, "grad_norm": 1.5557615062034251, "learning_rate": 4.9966280583809546e-05, "loss": 0.9441, "step": 1553 }, { "epoch": 0.18427605834222696, "grad_norm": 1.8976070426085894, "learning_rate": 4.99661558408778e-05, "loss": 1.2119, "step": 1554 }, { "epoch": 0.18439464010435194, "grad_norm": 1.755881234025144, "learning_rate": 4.996603086778873e-05, "loss": 1.1316, "step": 1555 }, { "epoch": 0.18451322186647692, "grad_norm": 1.6336313731120855, "learning_rate": 4.9965905664543455e-05, "loss": 1.1469, "step": 1556 }, { "epoch": 0.18463180362860193, "grad_norm": 1.7604787396012924, "learning_rate": 4.996578023114314e-05, "loss": 1.07, "step": 1557 }, { "epoch": 0.1847503853907269, "grad_norm": 1.5672944326904943, "learning_rate": 4.996565456758897e-05, "loss": 1.0435, "step": 1558 }, { "epoch": 0.1848689671528519, "grad_norm": 1.9068366815275553, "learning_rate": 4.996552867388206e-05, "loss": 1.0765, "step": 1559 }, { "epoch": 0.18498754891497687, "grad_norm": 1.7476299569943206, "learning_rate": 4.99654025500236e-05, "loss": 1.2208, "step": 1560 }, { "epoch": 0.18510613067710185, "grad_norm": 1.660763032483411, "learning_rate": 4.996527619601473e-05, "loss": 1.0627, "step": 1561 }, { "epoch": 0.18522471243922684, "grad_norm": 1.547670732331173, "learning_rate": 4.996514961185664e-05, "loss": 0.9164, "step": 1562 }, { "epoch": 0.18534329420135184, "grad_norm": 1.730788351270218, "learning_rate": 4.996502279755049e-05, "loss": 0.9246, "step": 1563 }, { "epoch": 0.18546187596347682, "grad_norm": 1.8215216898016657, "learning_rate": 4.996489575309743e-05, "loss": 0.8069, "step": 1564 }, { "epoch": 0.1855804577256018, "grad_norm": 1.783487920841857, "learning_rate": 4.9964768478498655e-05, "loss": 1.0708, "step": 1565 }, { "epoch": 0.18569903948772679, "grad_norm": 1.7990803976020977, "learning_rate": 4.996464097375532e-05, "loss": 0.9731, "step": 1566 }, { "epoch": 0.18581762124985177, "grad_norm": 1.7644737502639234, "learning_rate": 4.996451323886862e-05, "loss": 1.2162, "step": 1567 }, { "epoch": 0.18593620301197675, "grad_norm": 1.6336924370427168, "learning_rate": 4.996438527383971e-05, "loss": 0.9407, "step": 1568 }, { "epoch": 0.18605478477410176, "grad_norm": 1.826532888364708, "learning_rate": 4.9964257078669785e-05, "loss": 1.0371, "step": 1569 }, { "epoch": 0.18617336653622674, "grad_norm": 1.668822514249424, "learning_rate": 4.9964128653360024e-05, "loss": 1.229, "step": 1570 }, { "epoch": 0.18629194829835172, "grad_norm": 1.578288551131229, "learning_rate": 4.9963999997911615e-05, "loss": 0.9798, "step": 1571 }, { "epoch": 0.1864105300604767, "grad_norm": 1.6065515753527535, "learning_rate": 4.9963871112325736e-05, "loss": 1.2065, "step": 1572 }, { "epoch": 0.18652911182260168, "grad_norm": 1.6926844626428836, "learning_rate": 4.996374199660357e-05, "loss": 1.0218, "step": 1573 }, { "epoch": 0.18664769358472666, "grad_norm": 1.5660879970131347, "learning_rate": 4.996361265074632e-05, "loss": 0.9985, "step": 1574 }, { "epoch": 0.18676627534685167, "grad_norm": 1.8349596018395025, "learning_rate": 4.996348307475518e-05, "loss": 1.1781, "step": 1575 }, { "epoch": 0.18688485710897665, "grad_norm": 1.6200880153490875, "learning_rate": 4.996335326863133e-05, "loss": 1.3366, "step": 1576 }, { "epoch": 0.18700343887110163, "grad_norm": 1.4726909357333346, "learning_rate": 4.996322323237598e-05, "loss": 1.0393, "step": 1577 }, { "epoch": 0.1871220206332266, "grad_norm": 1.644644569904305, "learning_rate": 4.996309296599032e-05, "loss": 1.2533, "step": 1578 }, { "epoch": 0.1872406023953516, "grad_norm": 1.6092633734896153, "learning_rate": 4.996296246947556e-05, "loss": 1.1218, "step": 1579 }, { "epoch": 0.18735918415747657, "grad_norm": 1.3625614405223025, "learning_rate": 4.996283174283289e-05, "loss": 1.0278, "step": 1580 }, { "epoch": 0.18747776591960155, "grad_norm": 1.6056241331557157, "learning_rate": 4.996270078606353e-05, "loss": 1.1424, "step": 1581 }, { "epoch": 0.18759634768172656, "grad_norm": 1.5612493425851182, "learning_rate": 4.9962569599168674e-05, "loss": 0.7986, "step": 1582 }, { "epoch": 0.18771492944385154, "grad_norm": 1.7312694491015879, "learning_rate": 4.996243818214954e-05, "loss": 1.1667, "step": 1583 }, { "epoch": 0.18783351120597652, "grad_norm": 1.5825270657591424, "learning_rate": 4.996230653500734e-05, "loss": 0.8741, "step": 1584 }, { "epoch": 0.1879520929681015, "grad_norm": 1.54593603729105, "learning_rate": 4.9962174657743286e-05, "loss": 0.8064, "step": 1585 }, { "epoch": 0.18807067473022648, "grad_norm": 1.6697306264364022, "learning_rate": 4.996204255035858e-05, "loss": 1.202, "step": 1586 }, { "epoch": 0.18818925649235146, "grad_norm": 1.6299478725188448, "learning_rate": 4.996191021285447e-05, "loss": 1.0735, "step": 1587 }, { "epoch": 0.18830783825447647, "grad_norm": 1.615862379203652, "learning_rate": 4.996177764523214e-05, "loss": 0.8661, "step": 1588 }, { "epoch": 0.18842642001660145, "grad_norm": 1.6562773593366404, "learning_rate": 4.996164484749284e-05, "loss": 0.8983, "step": 1589 }, { "epoch": 0.18854500177872643, "grad_norm": 1.8255610662468014, "learning_rate": 4.9961511819637784e-05, "loss": 1.1389, "step": 1590 }, { "epoch": 0.18866358354085142, "grad_norm": 1.6954937022304208, "learning_rate": 4.9961378561668204e-05, "loss": 1.0967, "step": 1591 }, { "epoch": 0.1887821653029764, "grad_norm": 1.6979503309447315, "learning_rate": 4.996124507358532e-05, "loss": 1.0346, "step": 1592 }, { "epoch": 0.18890074706510138, "grad_norm": 1.6459416960962332, "learning_rate": 4.9961111355390354e-05, "loss": 1.097, "step": 1593 }, { "epoch": 0.18901932882722638, "grad_norm": 1.7014786529806827, "learning_rate": 4.9960977407084565e-05, "loss": 1.0413, "step": 1594 }, { "epoch": 0.18913791058935137, "grad_norm": 1.479642376560751, "learning_rate": 4.996084322866917e-05, "loss": 1.1937, "step": 1595 }, { "epoch": 0.18925649235147635, "grad_norm": 1.5638024996354594, "learning_rate": 4.996070882014541e-05, "loss": 0.5993, "step": 1596 }, { "epoch": 0.18937507411360133, "grad_norm": 1.5882216569684302, "learning_rate": 4.996057418151452e-05, "loss": 1.0294, "step": 1597 }, { "epoch": 0.1894936558757263, "grad_norm": 1.6466476608655074, "learning_rate": 4.996043931277774e-05, "loss": 1.0351, "step": 1598 }, { "epoch": 0.1896122376378513, "grad_norm": 1.763414535423779, "learning_rate": 4.9960304213936325e-05, "loss": 0.936, "step": 1599 }, { "epoch": 0.1897308193999763, "grad_norm": 1.46580655597137, "learning_rate": 4.996016888499152e-05, "loss": 1.065, "step": 1600 }, { "epoch": 0.18984940116210128, "grad_norm": 1.7656122257680125, "learning_rate": 4.996003332594455e-05, "loss": 1.0626, "step": 1601 }, { "epoch": 0.18996798292422626, "grad_norm": 1.6396504604458573, "learning_rate": 4.9959897536796696e-05, "loss": 0.6935, "step": 1602 }, { "epoch": 0.19008656468635124, "grad_norm": 1.665624202116392, "learning_rate": 4.995976151754919e-05, "loss": 1.09, "step": 1603 }, { "epoch": 0.19020514644847622, "grad_norm": 1.711105281387666, "learning_rate": 4.995962526820328e-05, "loss": 1.0259, "step": 1604 }, { "epoch": 0.1903237282106012, "grad_norm": 1.9345464158644985, "learning_rate": 4.995948878876025e-05, "loss": 1.1515, "step": 1605 }, { "epoch": 0.19044230997272618, "grad_norm": 1.70825965581751, "learning_rate": 4.995935207922133e-05, "loss": 1.2688, "step": 1606 }, { "epoch": 0.1905608917348512, "grad_norm": 1.5389291662264457, "learning_rate": 4.995921513958779e-05, "loss": 1.0209, "step": 1607 }, { "epoch": 0.19067947349697617, "grad_norm": 1.73641096466723, "learning_rate": 4.99590779698609e-05, "loss": 1.2913, "step": 1608 }, { "epoch": 0.19079805525910115, "grad_norm": 1.6294992044280474, "learning_rate": 4.9958940570041915e-05, "loss": 0.878, "step": 1609 }, { "epoch": 0.19091663702122613, "grad_norm": 1.4940994114039168, "learning_rate": 4.99588029401321e-05, "loss": 1.0951, "step": 1610 }, { "epoch": 0.1910352187833511, "grad_norm": 1.4801674321823637, "learning_rate": 4.995866508013273e-05, "loss": 0.9697, "step": 1611 }, { "epoch": 0.1911538005454761, "grad_norm": 1.4450292156047881, "learning_rate": 4.995852699004508e-05, "loss": 1.0828, "step": 1612 }, { "epoch": 0.1912723823076011, "grad_norm": 1.7517146057630513, "learning_rate": 4.9958388669870416e-05, "loss": 1.3314, "step": 1613 }, { "epoch": 0.19139096406972608, "grad_norm": 1.5972451399551157, "learning_rate": 4.995825011961001e-05, "loss": 1.0655, "step": 1614 }, { "epoch": 0.19150954583185106, "grad_norm": 1.4740666792983896, "learning_rate": 4.9958111339265144e-05, "loss": 0.9281, "step": 1615 }, { "epoch": 0.19162812759397604, "grad_norm": 1.7321921331202959, "learning_rate": 4.99579723288371e-05, "loss": 1.1222, "step": 1616 }, { "epoch": 0.19174670935610102, "grad_norm": 1.4080161674973215, "learning_rate": 4.9957833088327154e-05, "loss": 1.0761, "step": 1617 }, { "epoch": 0.191865291118226, "grad_norm": 1.5072447382472143, "learning_rate": 4.99576936177366e-05, "loss": 0.9452, "step": 1618 }, { "epoch": 0.19198387288035101, "grad_norm": 1.7615166325438916, "learning_rate": 4.9957553917066704e-05, "loss": 1.0928, "step": 1619 }, { "epoch": 0.192102454642476, "grad_norm": 1.6848493445171975, "learning_rate": 4.995741398631878e-05, "loss": 1.0243, "step": 1620 }, { "epoch": 0.19222103640460098, "grad_norm": 1.577187289991337, "learning_rate": 4.995727382549409e-05, "loss": 0.8626, "step": 1621 }, { "epoch": 0.19233961816672596, "grad_norm": 1.5829535952372968, "learning_rate": 4.995713343459395e-05, "loss": 1.0412, "step": 1622 }, { "epoch": 0.19245819992885094, "grad_norm": 1.503173660178691, "learning_rate": 4.995699281361964e-05, "loss": 0.9868, "step": 1623 }, { "epoch": 0.19257678169097592, "grad_norm": 1.5597795071662501, "learning_rate": 4.995685196257246e-05, "loss": 1.0105, "step": 1624 }, { "epoch": 0.19269536345310093, "grad_norm": 1.7191342371999896, "learning_rate": 4.9956710881453706e-05, "loss": 1.0825, "step": 1625 }, { "epoch": 0.1928139452152259, "grad_norm": 1.6608030975252759, "learning_rate": 4.995656957026469e-05, "loss": 1.1788, "step": 1626 }, { "epoch": 0.1929325269773509, "grad_norm": 1.695166747511289, "learning_rate": 4.9956428029006696e-05, "loss": 0.9869, "step": 1627 }, { "epoch": 0.19305110873947587, "grad_norm": 1.8141557233582302, "learning_rate": 4.995628625768105e-05, "loss": 1.0292, "step": 1628 }, { "epoch": 0.19316969050160085, "grad_norm": 2.145845017514409, "learning_rate": 4.995614425628904e-05, "loss": 1.2887, "step": 1629 }, { "epoch": 0.19328827226372583, "grad_norm": 1.7478758497788793, "learning_rate": 4.995600202483198e-05, "loss": 0.9496, "step": 1630 }, { "epoch": 0.1934068540258508, "grad_norm": 1.7116924285331965, "learning_rate": 4.99558595633112e-05, "loss": 0.9766, "step": 1631 }, { "epoch": 0.19352543578797582, "grad_norm": 1.8139529437771327, "learning_rate": 4.995571687172799e-05, "loss": 1.138, "step": 1632 }, { "epoch": 0.1936440175501008, "grad_norm": 1.4801469341164957, "learning_rate": 4.9955573950083666e-05, "loss": 1.073, "step": 1633 }, { "epoch": 0.19376259931222578, "grad_norm": 1.5079528918607126, "learning_rate": 4.995543079837955e-05, "loss": 1.1936, "step": 1634 }, { "epoch": 0.19388118107435076, "grad_norm": 1.542575806221861, "learning_rate": 4.995528741661698e-05, "loss": 0.9571, "step": 1635 }, { "epoch": 0.19399976283647574, "grad_norm": 1.5332456874515554, "learning_rate": 4.995514380479726e-05, "loss": 1.2609, "step": 1636 }, { "epoch": 0.19411834459860072, "grad_norm": 1.6082813430068659, "learning_rate": 4.995499996292171e-05, "loss": 1.1247, "step": 1637 }, { "epoch": 0.19423692636072573, "grad_norm": 1.5853263242043059, "learning_rate": 4.9954855890991655e-05, "loss": 1.1335, "step": 1638 }, { "epoch": 0.1943555081228507, "grad_norm": 1.599811253749778, "learning_rate": 4.995471158900844e-05, "loss": 1.0815, "step": 1639 }, { "epoch": 0.1944740898849757, "grad_norm": 1.5591151974354802, "learning_rate": 4.995456705697339e-05, "loss": 1.0416, "step": 1640 }, { "epoch": 0.19459267164710067, "grad_norm": 1.554402008379964, "learning_rate": 4.995442229488782e-05, "loss": 0.9294, "step": 1641 }, { "epoch": 0.19471125340922565, "grad_norm": 1.4412584466658627, "learning_rate": 4.995427730275309e-05, "loss": 0.8145, "step": 1642 }, { "epoch": 0.19482983517135063, "grad_norm": 1.5872184017615707, "learning_rate": 4.995413208057052e-05, "loss": 1.0059, "step": 1643 }, { "epoch": 0.19494841693347564, "grad_norm": 1.682573626022976, "learning_rate": 4.9953986628341446e-05, "loss": 0.9632, "step": 1644 }, { "epoch": 0.19506699869560062, "grad_norm": 1.6410360964108557, "learning_rate": 4.995384094606722e-05, "loss": 1.0105, "step": 1645 }, { "epoch": 0.1951855804577256, "grad_norm": 1.7698020970257424, "learning_rate": 4.995369503374919e-05, "loss": 1.1068, "step": 1646 }, { "epoch": 0.19530416221985059, "grad_norm": 1.7708635646376054, "learning_rate": 4.995354889138868e-05, "loss": 1.2475, "step": 1647 }, { "epoch": 0.19542274398197557, "grad_norm": 1.7270509486243872, "learning_rate": 4.9953402518987055e-05, "loss": 1.0318, "step": 1648 }, { "epoch": 0.19554132574410055, "grad_norm": 1.7145746326288438, "learning_rate": 4.995325591654566e-05, "loss": 1.0454, "step": 1649 }, { "epoch": 0.19565990750622556, "grad_norm": 1.5585643287270916, "learning_rate": 4.9953109084065844e-05, "loss": 1.1918, "step": 1650 }, { "epoch": 0.19577848926835054, "grad_norm": 1.5658708845858225, "learning_rate": 4.9952962021548956e-05, "loss": 0.9562, "step": 1651 }, { "epoch": 0.19589707103047552, "grad_norm": 1.846981826824355, "learning_rate": 4.9952814728996364e-05, "loss": 0.9759, "step": 1652 }, { "epoch": 0.1960156527926005, "grad_norm": 1.6646992654048087, "learning_rate": 4.995266720640942e-05, "loss": 1.0486, "step": 1653 }, { "epoch": 0.19613423455472548, "grad_norm": 1.5808156338629298, "learning_rate": 4.995251945378948e-05, "loss": 1.2012, "step": 1654 }, { "epoch": 0.19625281631685046, "grad_norm": 1.496615175883074, "learning_rate": 4.9952371471137906e-05, "loss": 1.1035, "step": 1655 }, { "epoch": 0.19637139807897544, "grad_norm": 1.9117194169781258, "learning_rate": 4.9952223258456073e-05, "loss": 1.0139, "step": 1656 }, { "epoch": 0.19648997984110045, "grad_norm": 1.4165521527467075, "learning_rate": 4.9952074815745334e-05, "loss": 1.1724, "step": 1657 }, { "epoch": 0.19660856160322543, "grad_norm": 1.41997264004625, "learning_rate": 4.995192614300707e-05, "loss": 0.9647, "step": 1658 }, { "epoch": 0.1967271433653504, "grad_norm": 1.6020967041237606, "learning_rate": 4.9951777240242636e-05, "loss": 1.1342, "step": 1659 }, { "epoch": 0.1968457251274754, "grad_norm": 1.7317468486424006, "learning_rate": 4.995162810745342e-05, "loss": 1.0423, "step": 1660 }, { "epoch": 0.19696430688960037, "grad_norm": 1.7652024631449514, "learning_rate": 4.995147874464079e-05, "loss": 0.9864, "step": 1661 }, { "epoch": 0.19708288865172535, "grad_norm": 1.842025099507146, "learning_rate": 4.995132915180612e-05, "loss": 1.118, "step": 1662 }, { "epoch": 0.19720147041385036, "grad_norm": 1.9452712316805012, "learning_rate": 4.99511793289508e-05, "loss": 1.2296, "step": 1663 }, { "epoch": 0.19732005217597534, "grad_norm": 1.4646914187637992, "learning_rate": 4.99510292760762e-05, "loss": 1.0104, "step": 1664 }, { "epoch": 0.19743863393810032, "grad_norm": 1.6669624750251892, "learning_rate": 4.99508789931837e-05, "loss": 0.9821, "step": 1665 }, { "epoch": 0.1975572157002253, "grad_norm": 1.626903801356354, "learning_rate": 4.99507284802747e-05, "loss": 1.1056, "step": 1666 }, { "epoch": 0.19767579746235028, "grad_norm": 1.605058845384302, "learning_rate": 4.9950577737350576e-05, "loss": 1.1125, "step": 1667 }, { "epoch": 0.19779437922447526, "grad_norm": 1.969487312458771, "learning_rate": 4.995042676441273e-05, "loss": 1.1872, "step": 1668 }, { "epoch": 0.19791296098660027, "grad_norm": 1.5932370098646527, "learning_rate": 4.995027556146254e-05, "loss": 1.0766, "step": 1669 }, { "epoch": 0.19803154274872525, "grad_norm": 1.4561715443258096, "learning_rate": 4.995012412850141e-05, "loss": 1.0901, "step": 1670 }, { "epoch": 0.19815012451085023, "grad_norm": 1.2345701828227944, "learning_rate": 4.994997246553073e-05, "loss": 0.8799, "step": 1671 }, { "epoch": 0.19826870627297521, "grad_norm": 1.7080506443303032, "learning_rate": 4.994982057255189e-05, "loss": 0.9506, "step": 1672 }, { "epoch": 0.1983872880351002, "grad_norm": 1.413957169756916, "learning_rate": 4.994966844956631e-05, "loss": 0.9748, "step": 1673 }, { "epoch": 0.19850586979722518, "grad_norm": 1.5219243786423844, "learning_rate": 4.994951609657538e-05, "loss": 0.9359, "step": 1674 }, { "epoch": 0.19862445155935018, "grad_norm": 1.497740175307494, "learning_rate": 4.9949363513580496e-05, "loss": 0.9727, "step": 1675 }, { "epoch": 0.19874303332147517, "grad_norm": 1.5136099462825126, "learning_rate": 4.9949210700583085e-05, "loss": 1.2001, "step": 1676 }, { "epoch": 0.19886161508360015, "grad_norm": 1.5189415011992695, "learning_rate": 4.994905765758455e-05, "loss": 0.9744, "step": 1677 }, { "epoch": 0.19898019684572513, "grad_norm": 1.5936937633766755, "learning_rate": 4.9948904384586294e-05, "loss": 0.8591, "step": 1678 }, { "epoch": 0.1990987786078501, "grad_norm": 1.7883333249974978, "learning_rate": 4.994875088158973e-05, "loss": 1.1883, "step": 1679 }, { "epoch": 0.1992173603699751, "grad_norm": 1.8700564302397769, "learning_rate": 4.994859714859628e-05, "loss": 1.1277, "step": 1680 }, { "epoch": 0.1993359421321001, "grad_norm": 1.6341812365503992, "learning_rate": 4.9948443185607364e-05, "loss": 0.7868, "step": 1681 }, { "epoch": 0.19945452389422508, "grad_norm": 1.6733235830605675, "learning_rate": 4.994828899262439e-05, "loss": 1.138, "step": 1682 }, { "epoch": 0.19957310565635006, "grad_norm": 1.7030875989263967, "learning_rate": 4.994813456964878e-05, "loss": 1.2738, "step": 1683 }, { "epoch": 0.19969168741847504, "grad_norm": 1.6348095529259548, "learning_rate": 4.994797991668197e-05, "loss": 0.9949, "step": 1684 }, { "epoch": 0.19981026918060002, "grad_norm": 1.7179726782240836, "learning_rate": 4.9947825033725374e-05, "loss": 1.1669, "step": 1685 }, { "epoch": 0.199928850942725, "grad_norm": 1.484490939792777, "learning_rate": 4.9947669920780424e-05, "loss": 0.9256, "step": 1686 }, { "epoch": 0.20004743270484998, "grad_norm": 1.6176959254885943, "learning_rate": 4.994751457784856e-05, "loss": 1.1282, "step": 1687 }, { "epoch": 0.200166014466975, "grad_norm": 1.576108462036272, "learning_rate": 4.994735900493119e-05, "loss": 1.2005, "step": 1688 }, { "epoch": 0.20028459622909997, "grad_norm": 1.4282791041216865, "learning_rate": 4.9947203202029775e-05, "loss": 0.761, "step": 1689 }, { "epoch": 0.20040317799122495, "grad_norm": 1.5184503581956634, "learning_rate": 4.9947047169145734e-05, "loss": 1.1169, "step": 1690 }, { "epoch": 0.20052175975334993, "grad_norm": 1.683957066742088, "learning_rate": 4.994689090628051e-05, "loss": 1.1099, "step": 1691 }, { "epoch": 0.2006403415154749, "grad_norm": 1.6462819844418561, "learning_rate": 4.994673441343554e-05, "loss": 1.2618, "step": 1692 }, { "epoch": 0.2007589232775999, "grad_norm": 1.584046479519788, "learning_rate": 4.994657769061227e-05, "loss": 1.1248, "step": 1693 }, { "epoch": 0.2008775050397249, "grad_norm": 1.4571926508724682, "learning_rate": 4.9946420737812157e-05, "loss": 1.0697, "step": 1694 }, { "epoch": 0.20099608680184988, "grad_norm": 1.7497312421048874, "learning_rate": 4.9946263555036626e-05, "loss": 1.0349, "step": 1695 }, { "epoch": 0.20111466856397486, "grad_norm": 1.4432742180577316, "learning_rate": 4.994610614228714e-05, "loss": 0.9521, "step": 1696 }, { "epoch": 0.20123325032609984, "grad_norm": 1.4523741163492265, "learning_rate": 4.994594849956514e-05, "loss": 1.202, "step": 1697 }, { "epoch": 0.20135183208822482, "grad_norm": 1.6742439301787915, "learning_rate": 4.9945790626872094e-05, "loss": 1.205, "step": 1698 }, { "epoch": 0.2014704138503498, "grad_norm": 1.8230204578107454, "learning_rate": 4.994563252420944e-05, "loss": 0.9721, "step": 1699 }, { "epoch": 0.20158899561247481, "grad_norm": 1.4769890734951583, "learning_rate": 4.994547419157865e-05, "loss": 1.1395, "step": 1700 }, { "epoch": 0.2017075773745998, "grad_norm": 1.681804084431201, "learning_rate": 4.994531562898118e-05, "loss": 0.9031, "step": 1701 }, { "epoch": 0.20182615913672478, "grad_norm": 1.45242561534564, "learning_rate": 4.994515683641849e-05, "loss": 0.9718, "step": 1702 }, { "epoch": 0.20194474089884976, "grad_norm": 1.6177282166256657, "learning_rate": 4.994499781389204e-05, "loss": 1.1625, "step": 1703 }, { "epoch": 0.20206332266097474, "grad_norm": 1.477489806041545, "learning_rate": 4.99448385614033e-05, "loss": 0.9836, "step": 1704 }, { "epoch": 0.20218190442309972, "grad_norm": 1.6771280363493812, "learning_rate": 4.9944679078953736e-05, "loss": 1.017, "step": 1705 }, { "epoch": 0.20230048618522473, "grad_norm": 1.6054470041505133, "learning_rate": 4.994451936654483e-05, "loss": 1.1695, "step": 1706 }, { "epoch": 0.2024190679473497, "grad_norm": 1.7878402675389176, "learning_rate": 4.994435942417803e-05, "loss": 1.1955, "step": 1707 }, { "epoch": 0.2025376497094747, "grad_norm": 1.5873248595629983, "learning_rate": 4.994419925185484e-05, "loss": 0.9112, "step": 1708 }, { "epoch": 0.20265623147159967, "grad_norm": 1.5418366133079218, "learning_rate": 4.9944038849576715e-05, "loss": 0.8404, "step": 1709 }, { "epoch": 0.20277481323372465, "grad_norm": 1.527032797405593, "learning_rate": 4.994387821734514e-05, "loss": 1.1733, "step": 1710 }, { "epoch": 0.20289339499584963, "grad_norm": 1.64101637416326, "learning_rate": 4.994371735516159e-05, "loss": 0.9959, "step": 1711 }, { "epoch": 0.2030119767579746, "grad_norm": 1.556261750926364, "learning_rate": 4.9943556263027556e-05, "loss": 0.9996, "step": 1712 }, { "epoch": 0.20313055852009962, "grad_norm": 1.5810757376291598, "learning_rate": 4.9943394940944524e-05, "loss": 0.9193, "step": 1713 }, { "epoch": 0.2032491402822246, "grad_norm": 1.6768514622149213, "learning_rate": 4.994323338891398e-05, "loss": 1.0607, "step": 1714 }, { "epoch": 0.20336772204434958, "grad_norm": 1.6954795868482353, "learning_rate": 4.9943071606937406e-05, "loss": 0.7783, "step": 1715 }, { "epoch": 0.20348630380647456, "grad_norm": 1.797214731172522, "learning_rate": 4.994290959501631e-05, "loss": 0.8388, "step": 1716 }, { "epoch": 0.20360488556859954, "grad_norm": 1.6715602372481064, "learning_rate": 4.994274735315217e-05, "loss": 0.8317, "step": 1717 }, { "epoch": 0.20372346733072452, "grad_norm": 1.581351744055121, "learning_rate": 4.994258488134648e-05, "loss": 0.8884, "step": 1718 }, { "epoch": 0.20384204909284953, "grad_norm": 1.6984808000978888, "learning_rate": 4.9942422179600744e-05, "loss": 1.0653, "step": 1719 }, { "epoch": 0.2039606308549745, "grad_norm": 1.6633539099896346, "learning_rate": 4.9942259247916466e-05, "loss": 1.0328, "step": 1720 }, { "epoch": 0.2040792126170995, "grad_norm": 1.6844877781073115, "learning_rate": 4.994209608629514e-05, "loss": 1.0734, "step": 1721 }, { "epoch": 0.20419779437922447, "grad_norm": 1.7034391268998788, "learning_rate": 4.994193269473828e-05, "loss": 1.3863, "step": 1722 }, { "epoch": 0.20431637614134945, "grad_norm": 1.520890327606274, "learning_rate": 4.994176907324739e-05, "loss": 0.7457, "step": 1723 }, { "epoch": 0.20443495790347443, "grad_norm": 1.9135702434326587, "learning_rate": 4.9941605221823966e-05, "loss": 0.9161, "step": 1724 }, { "epoch": 0.20455353966559944, "grad_norm": 1.6540452264322365, "learning_rate": 4.994144114046953e-05, "loss": 1.0658, "step": 1725 }, { "epoch": 0.20467212142772442, "grad_norm": 1.4760096447067115, "learning_rate": 4.994127682918559e-05, "loss": 1.0304, "step": 1726 }, { "epoch": 0.2047907031898494, "grad_norm": 1.5426711581734656, "learning_rate": 4.9941112287973667e-05, "loss": 1.0164, "step": 1727 }, { "epoch": 0.20490928495197439, "grad_norm": 1.441155608760965, "learning_rate": 4.994094751683527e-05, "loss": 0.905, "step": 1728 }, { "epoch": 0.20502786671409937, "grad_norm": 1.4559611456947479, "learning_rate": 4.994078251577192e-05, "loss": 1.1475, "step": 1729 }, { "epoch": 0.20514644847622435, "grad_norm": 1.620660284370224, "learning_rate": 4.994061728478515e-05, "loss": 1.1511, "step": 1730 }, { "epoch": 0.20526503023834936, "grad_norm": 2.31359467476029, "learning_rate": 4.994045182387646e-05, "loss": 1.2315, "step": 1731 }, { "epoch": 0.20538361200047434, "grad_norm": 1.9084462912569549, "learning_rate": 4.99402861330474e-05, "loss": 0.9592, "step": 1732 }, { "epoch": 0.20550219376259932, "grad_norm": 1.7834346334879625, "learning_rate": 4.994012021229947e-05, "loss": 1.2626, "step": 1733 }, { "epoch": 0.2056207755247243, "grad_norm": 1.2791137489708033, "learning_rate": 4.993995406163423e-05, "loss": 0.6817, "step": 1734 }, { "epoch": 0.20573935728684928, "grad_norm": 1.4828581130834255, "learning_rate": 4.993978768105319e-05, "loss": 1.1931, "step": 1735 }, { "epoch": 0.20585793904897426, "grad_norm": 1.388861603365636, "learning_rate": 4.993962107055789e-05, "loss": 1.2472, "step": 1736 }, { "epoch": 0.20597652081109924, "grad_norm": 1.5273895517373055, "learning_rate": 4.9939454230149876e-05, "loss": 1.0781, "step": 1737 }, { "epoch": 0.20609510257322425, "grad_norm": 1.3887691937906943, "learning_rate": 4.993928715983066e-05, "loss": 0.6454, "step": 1738 }, { "epoch": 0.20621368433534923, "grad_norm": 1.5077867803934344, "learning_rate": 4.9939119859601815e-05, "loss": 1.1834, "step": 1739 }, { "epoch": 0.2063322660974742, "grad_norm": 1.8678705487037224, "learning_rate": 4.993895232946486e-05, "loss": 1.0303, "step": 1740 }, { "epoch": 0.2064508478595992, "grad_norm": 1.5442292056873277, "learning_rate": 4.993878456942135e-05, "loss": 0.9555, "step": 1741 }, { "epoch": 0.20656942962172417, "grad_norm": 1.447615081627025, "learning_rate": 4.993861657947282e-05, "loss": 1.0007, "step": 1742 }, { "epoch": 0.20668801138384915, "grad_norm": 1.3730228427725495, "learning_rate": 4.993844835962083e-05, "loss": 0.9227, "step": 1743 }, { "epoch": 0.20680659314597416, "grad_norm": 1.6208873327976283, "learning_rate": 4.9938279909866934e-05, "loss": 1.0335, "step": 1744 }, { "epoch": 0.20692517490809914, "grad_norm": 1.6873979395166983, "learning_rate": 4.993811123021267e-05, "loss": 0.9054, "step": 1745 }, { "epoch": 0.20704375667022412, "grad_norm": 1.5399954937671008, "learning_rate": 4.9937942320659606e-05, "loss": 1.1125, "step": 1746 }, { "epoch": 0.2071623384323491, "grad_norm": 1.8398817072076532, "learning_rate": 4.9937773181209303e-05, "loss": 1.1029, "step": 1747 }, { "epoch": 0.20728092019447408, "grad_norm": 1.6226195596555468, "learning_rate": 4.9937603811863295e-05, "loss": 1.0397, "step": 1748 }, { "epoch": 0.20739950195659906, "grad_norm": 1.589253326721244, "learning_rate": 4.993743421262317e-05, "loss": 0.9006, "step": 1749 }, { "epoch": 0.20751808371872407, "grad_norm": 1.5925971027788899, "learning_rate": 4.993726438349048e-05, "loss": 1.0645, "step": 1750 }, { "epoch": 0.20763666548084905, "grad_norm": 1.6467258830513347, "learning_rate": 4.9937094324466796e-05, "loss": 1.1085, "step": 1751 }, { "epoch": 0.20775524724297403, "grad_norm": 1.8338451967704728, "learning_rate": 4.9936924035553685e-05, "loss": 1.1637, "step": 1752 }, { "epoch": 0.20787382900509901, "grad_norm": 1.6765550683077919, "learning_rate": 4.993675351675271e-05, "loss": 1.398, "step": 1753 }, { "epoch": 0.207992410767224, "grad_norm": 1.5325063413242601, "learning_rate": 4.9936582768065444e-05, "loss": 0.9329, "step": 1754 }, { "epoch": 0.20811099252934898, "grad_norm": 1.6769241364689538, "learning_rate": 4.9936411789493466e-05, "loss": 1.2809, "step": 1755 }, { "epoch": 0.20822957429147398, "grad_norm": 1.7583528951550318, "learning_rate": 4.993624058103835e-05, "loss": 1.135, "step": 1756 }, { "epoch": 0.20834815605359897, "grad_norm": 1.4495803853251503, "learning_rate": 4.993606914270167e-05, "loss": 0.7078, "step": 1757 }, { "epoch": 0.20846673781572395, "grad_norm": 1.703141278093442, "learning_rate": 4.9935897474485024e-05, "loss": 1.0222, "step": 1758 }, { "epoch": 0.20858531957784893, "grad_norm": 1.6444780116013784, "learning_rate": 4.993572557638997e-05, "loss": 0.9826, "step": 1759 }, { "epoch": 0.2087039013399739, "grad_norm": 1.3462293716961187, "learning_rate": 4.993555344841811e-05, "loss": 0.7668, "step": 1760 }, { "epoch": 0.2088224831020989, "grad_norm": 1.6900900173921347, "learning_rate": 4.993538109057102e-05, "loss": 1.2862, "step": 1761 }, { "epoch": 0.20894106486422387, "grad_norm": 1.697847244274863, "learning_rate": 4.99352085028503e-05, "loss": 1.1521, "step": 1762 }, { "epoch": 0.20905964662634888, "grad_norm": 1.5904989333198283, "learning_rate": 4.993503568525753e-05, "loss": 0.9432, "step": 1763 }, { "epoch": 0.20917822838847386, "grad_norm": 1.704765592971509, "learning_rate": 4.993486263779431e-05, "loss": 0.9619, "step": 1764 }, { "epoch": 0.20929681015059884, "grad_norm": 1.5132574536824275, "learning_rate": 4.993468936046224e-05, "loss": 0.7403, "step": 1765 }, { "epoch": 0.20941539191272382, "grad_norm": 1.6435486594042532, "learning_rate": 4.993451585326291e-05, "loss": 1.1546, "step": 1766 }, { "epoch": 0.2095339736748488, "grad_norm": 1.9369754408662387, "learning_rate": 4.993434211619791e-05, "loss": 1.1746, "step": 1767 }, { "epoch": 0.20965255543697378, "grad_norm": 1.8354854014944162, "learning_rate": 4.993416814926886e-05, "loss": 0.8454, "step": 1768 }, { "epoch": 0.2097711371990988, "grad_norm": 1.6939830390941384, "learning_rate": 4.9933993952477356e-05, "loss": 1.2456, "step": 1769 }, { "epoch": 0.20988971896122377, "grad_norm": 2.0557622507307496, "learning_rate": 4.9933819525825e-05, "loss": 1.021, "step": 1770 }, { "epoch": 0.21000830072334875, "grad_norm": 1.4863482070848308, "learning_rate": 4.993364486931341e-05, "loss": 1.1479, "step": 1771 }, { "epoch": 0.21012688248547373, "grad_norm": 1.5346269924438494, "learning_rate": 4.993346998294418e-05, "loss": 0.7894, "step": 1772 }, { "epoch": 0.2102454642475987, "grad_norm": 1.4579088906263657, "learning_rate": 4.9933294866718944e-05, "loss": 1.0244, "step": 1773 }, { "epoch": 0.2103640460097237, "grad_norm": 1.6202177346562843, "learning_rate": 4.99331195206393e-05, "loss": 1.0095, "step": 1774 }, { "epoch": 0.2104826277718487, "grad_norm": 1.5905943899383632, "learning_rate": 4.9932943944706866e-05, "loss": 1.1442, "step": 1775 }, { "epoch": 0.21060120953397368, "grad_norm": 1.466998129111414, "learning_rate": 4.9932768138923266e-05, "loss": 0.6028, "step": 1776 }, { "epoch": 0.21071979129609866, "grad_norm": 1.776335044910245, "learning_rate": 4.993259210329012e-05, "loss": 0.8719, "step": 1777 }, { "epoch": 0.21083837305822364, "grad_norm": 1.919229595620604, "learning_rate": 4.993241583780904e-05, "loss": 0.9369, "step": 1778 }, { "epoch": 0.21095695482034862, "grad_norm": 1.5995657368290241, "learning_rate": 4.9932239342481675e-05, "loss": 0.8465, "step": 1779 }, { "epoch": 0.2110755365824736, "grad_norm": 1.5582019366268987, "learning_rate": 4.993206261730963e-05, "loss": 1.0723, "step": 1780 }, { "epoch": 0.2111941183445986, "grad_norm": 1.6606393223791542, "learning_rate": 4.9931885662294534e-05, "loss": 1.2432, "step": 1781 }, { "epoch": 0.2113127001067236, "grad_norm": 1.6178539362453277, "learning_rate": 4.993170847743803e-05, "loss": 0.9048, "step": 1782 }, { "epoch": 0.21143128186884858, "grad_norm": 1.5738489570577288, "learning_rate": 4.9931531062741756e-05, "loss": 0.8792, "step": 1783 }, { "epoch": 0.21154986363097356, "grad_norm": 1.947769728555521, "learning_rate": 4.993135341820733e-05, "loss": 1.0994, "step": 1784 }, { "epoch": 0.21166844539309854, "grad_norm": 1.5472524602914977, "learning_rate": 4.9931175543836405e-05, "loss": 1.0497, "step": 1785 }, { "epoch": 0.21178702715522352, "grad_norm": 1.6382354891865967, "learning_rate": 4.993099743963061e-05, "loss": 1.2515, "step": 1786 }, { "epoch": 0.2119056089173485, "grad_norm": 1.9099621718026734, "learning_rate": 4.9930819105591586e-05, "loss": 0.9412, "step": 1787 }, { "epoch": 0.2120241906794735, "grad_norm": 1.559666977472927, "learning_rate": 4.993064054172099e-05, "loss": 0.8705, "step": 1788 }, { "epoch": 0.2121427724415985, "grad_norm": 1.5062377742763124, "learning_rate": 4.993046174802046e-05, "loss": 0.8809, "step": 1789 }, { "epoch": 0.21226135420372347, "grad_norm": 1.784795651827728, "learning_rate": 4.9930282724491636e-05, "loss": 1.0041, "step": 1790 }, { "epoch": 0.21237993596584845, "grad_norm": 1.9033927797707701, "learning_rate": 4.993010347113618e-05, "loss": 1.1279, "step": 1791 }, { "epoch": 0.21249851772797343, "grad_norm": 1.4942523448965166, "learning_rate": 4.992992398795575e-05, "loss": 1.1273, "step": 1792 }, { "epoch": 0.2126170994900984, "grad_norm": 1.8056002101571806, "learning_rate": 4.992974427495198e-05, "loss": 1.0004, "step": 1793 }, { "epoch": 0.21273568125222342, "grad_norm": 1.5329191765979016, "learning_rate": 4.9929564332126544e-05, "loss": 1.0626, "step": 1794 }, { "epoch": 0.2128542630143484, "grad_norm": 1.7662363309001166, "learning_rate": 4.99293841594811e-05, "loss": 0.9929, "step": 1795 }, { "epoch": 0.21297284477647338, "grad_norm": 1.498106031027585, "learning_rate": 4.99292037570173e-05, "loss": 0.9335, "step": 1796 }, { "epoch": 0.21309142653859836, "grad_norm": 1.339438713242771, "learning_rate": 4.9929023124736815e-05, "loss": 1.1458, "step": 1797 }, { "epoch": 0.21321000830072334, "grad_norm": 1.584225465802162, "learning_rate": 4.99288422626413e-05, "loss": 0.7808, "step": 1798 }, { "epoch": 0.21332859006284832, "grad_norm": 1.513684303964356, "learning_rate": 4.9928661170732435e-05, "loss": 0.9652, "step": 1799 }, { "epoch": 0.21344717182497333, "grad_norm": 1.4038372598318014, "learning_rate": 4.992847984901188e-05, "loss": 0.8727, "step": 1800 }, { "epoch": 0.2135657535870983, "grad_norm": 1.31897524492445, "learning_rate": 4.9928298297481314e-05, "loss": 0.8711, "step": 1801 }, { "epoch": 0.2136843353492233, "grad_norm": 1.695394487890238, "learning_rate": 4.99281165161424e-05, "loss": 0.7944, "step": 1802 }, { "epoch": 0.21380291711134827, "grad_norm": 1.5551358484408893, "learning_rate": 4.992793450499682e-05, "loss": 1.1532, "step": 1803 }, { "epoch": 0.21392149887347325, "grad_norm": 1.5269241141660146, "learning_rate": 4.992775226404626e-05, "loss": 0.8349, "step": 1804 }, { "epoch": 0.21404008063559823, "grad_norm": 1.5805087719348463, "learning_rate": 4.9927569793292394e-05, "loss": 1.0319, "step": 1805 }, { "epoch": 0.21415866239772324, "grad_norm": 1.6503897720899885, "learning_rate": 4.99273870927369e-05, "loss": 1.1086, "step": 1806 }, { "epoch": 0.21427724415984822, "grad_norm": 1.5635799591579393, "learning_rate": 4.992720416238146e-05, "loss": 0.8309, "step": 1807 }, { "epoch": 0.2143958259219732, "grad_norm": 1.6465010435770757, "learning_rate": 4.992702100222777e-05, "loss": 1.1674, "step": 1808 }, { "epoch": 0.21451440768409819, "grad_norm": 1.8014310311556232, "learning_rate": 4.992683761227751e-05, "loss": 0.8525, "step": 1809 }, { "epoch": 0.21463298944622317, "grad_norm": 1.8974436233092724, "learning_rate": 4.992665399253238e-05, "loss": 1.2909, "step": 1810 }, { "epoch": 0.21475157120834815, "grad_norm": 1.6709769946132582, "learning_rate": 4.9926470142994064e-05, "loss": 1.1106, "step": 1811 }, { "epoch": 0.21487015297047315, "grad_norm": 1.798964123628831, "learning_rate": 4.992628606366426e-05, "loss": 1.0588, "step": 1812 }, { "epoch": 0.21498873473259814, "grad_norm": 1.374801196280157, "learning_rate": 4.992610175454466e-05, "loss": 0.8615, "step": 1813 }, { "epoch": 0.21510731649472312, "grad_norm": 1.4826999433025927, "learning_rate": 4.992591721563698e-05, "loss": 1.1079, "step": 1814 }, { "epoch": 0.2152258982568481, "grad_norm": 1.67673822479578, "learning_rate": 4.99257324469429e-05, "loss": 1.061, "step": 1815 }, { "epoch": 0.21534448001897308, "grad_norm": 1.4063072308004343, "learning_rate": 4.992554744846414e-05, "loss": 0.8714, "step": 1816 }, { "epoch": 0.21546306178109806, "grad_norm": 1.3383712365027427, "learning_rate": 4.9925362220202394e-05, "loss": 0.8772, "step": 1817 }, { "epoch": 0.21558164354322304, "grad_norm": 1.3765919670828244, "learning_rate": 4.9925176762159374e-05, "loss": 0.7735, "step": 1818 }, { "epoch": 0.21570022530534805, "grad_norm": 1.5172475275785322, "learning_rate": 4.992499107433679e-05, "loss": 0.8821, "step": 1819 }, { "epoch": 0.21581880706747303, "grad_norm": 1.4200037960979732, "learning_rate": 4.992480515673635e-05, "loss": 0.9199, "step": 1820 }, { "epoch": 0.215937388829598, "grad_norm": 1.6199742256974323, "learning_rate": 4.992461900935977e-05, "loss": 1.0698, "step": 1821 }, { "epoch": 0.216055970591723, "grad_norm": 1.320286491922516, "learning_rate": 4.9924432632208777e-05, "loss": 0.9704, "step": 1822 }, { "epoch": 0.21617455235384797, "grad_norm": 1.6772689637681681, "learning_rate": 4.992424602528508e-05, "loss": 0.9608, "step": 1823 }, { "epoch": 0.21629313411597295, "grad_norm": 1.7511405558431619, "learning_rate": 4.992405918859039e-05, "loss": 1.1828, "step": 1824 }, { "epoch": 0.21641171587809796, "grad_norm": 1.7018676054889086, "learning_rate": 4.992387212212644e-05, "loss": 1.2891, "step": 1825 }, { "epoch": 0.21653029764022294, "grad_norm": 1.6533398538861304, "learning_rate": 4.9923684825894956e-05, "loss": 1.1131, "step": 1826 }, { "epoch": 0.21664887940234792, "grad_norm": 1.4746690963194322, "learning_rate": 4.992349729989766e-05, "loss": 0.8581, "step": 1827 }, { "epoch": 0.2167674611644729, "grad_norm": 1.5928974923688193, "learning_rate": 4.992330954413628e-05, "loss": 0.8705, "step": 1828 }, { "epoch": 0.21688604292659788, "grad_norm": 1.3997086429905612, "learning_rate": 4.992312155861255e-05, "loss": 0.9265, "step": 1829 }, { "epoch": 0.21700462468872286, "grad_norm": 1.4520132435561812, "learning_rate": 4.99229333433282e-05, "loss": 1.0254, "step": 1830 }, { "epoch": 0.21712320645084787, "grad_norm": 1.6876108352881252, "learning_rate": 4.992274489828497e-05, "loss": 1.0986, "step": 1831 }, { "epoch": 0.21724178821297285, "grad_norm": 1.7470058293604902, "learning_rate": 4.9922556223484597e-05, "loss": 1.0919, "step": 1832 }, { "epoch": 0.21736036997509783, "grad_norm": 1.6596551857657267, "learning_rate": 4.992236731892881e-05, "loss": 0.8802, "step": 1833 }, { "epoch": 0.21747895173722281, "grad_norm": 1.927651363943336, "learning_rate": 4.992217818461936e-05, "loss": 0.9943, "step": 1834 }, { "epoch": 0.2175975334993478, "grad_norm": 1.5664505955630812, "learning_rate": 4.9921988820558e-05, "loss": 1.0237, "step": 1835 }, { "epoch": 0.21771611526147278, "grad_norm": 1.5156700004582557, "learning_rate": 4.992179922674645e-05, "loss": 1.0596, "step": 1836 }, { "epoch": 0.21783469702359778, "grad_norm": 1.856405408669696, "learning_rate": 4.992160940318648e-05, "loss": 1.1608, "step": 1837 }, { "epoch": 0.21795327878572276, "grad_norm": 1.542693395552593, "learning_rate": 4.9921419349879825e-05, "loss": 1.0861, "step": 1838 }, { "epoch": 0.21807186054784775, "grad_norm": 1.7158448204857237, "learning_rate": 4.9921229066828255e-05, "loss": 1.0082, "step": 1839 }, { "epoch": 0.21819044230997273, "grad_norm": 1.525708005750468, "learning_rate": 4.9921038554033506e-05, "loss": 1.0691, "step": 1840 }, { "epoch": 0.2183090240720977, "grad_norm": 1.653714723069598, "learning_rate": 4.992084781149735e-05, "loss": 0.9966, "step": 1841 }, { "epoch": 0.2184276058342227, "grad_norm": 1.3515594570526117, "learning_rate": 4.9920656839221526e-05, "loss": 1.3076, "step": 1842 }, { "epoch": 0.21854618759634767, "grad_norm": 1.9200494896537197, "learning_rate": 4.992046563720781e-05, "loss": 1.0626, "step": 1843 }, { "epoch": 0.21866476935847268, "grad_norm": 1.4220167862980555, "learning_rate": 4.9920274205457964e-05, "loss": 1.1005, "step": 1844 }, { "epoch": 0.21878335112059766, "grad_norm": 1.5352022156011431, "learning_rate": 4.992008254397375e-05, "loss": 1.1366, "step": 1845 }, { "epoch": 0.21890193288272264, "grad_norm": 1.3843520916336933, "learning_rate": 4.9919890652756924e-05, "loss": 1.0221, "step": 1846 }, { "epoch": 0.21902051464484762, "grad_norm": 1.57518514189869, "learning_rate": 4.991969853180927e-05, "loss": 1.0119, "step": 1847 }, { "epoch": 0.2191390964069726, "grad_norm": 1.5895401987736604, "learning_rate": 4.991950618113256e-05, "loss": 1.0046, "step": 1848 }, { "epoch": 0.21925767816909758, "grad_norm": 1.5784149900482223, "learning_rate": 4.991931360072855e-05, "loss": 1.1749, "step": 1849 }, { "epoch": 0.2193762599312226, "grad_norm": 1.5334504225497627, "learning_rate": 4.9919120790599034e-05, "loss": 0.9645, "step": 1850 }, { "epoch": 0.21949484169334757, "grad_norm": 1.4970358394519854, "learning_rate": 4.991892775074578e-05, "loss": 0.9116, "step": 1851 }, { "epoch": 0.21961342345547255, "grad_norm": 1.7281591112546828, "learning_rate": 4.991873448117057e-05, "loss": 1.0225, "step": 1852 }, { "epoch": 0.21973200521759753, "grad_norm": 1.6350897061630507, "learning_rate": 4.991854098187519e-05, "loss": 1.1064, "step": 1853 }, { "epoch": 0.2198505869797225, "grad_norm": 1.7686439260996882, "learning_rate": 4.991834725286141e-05, "loss": 0.8749, "step": 1854 }, { "epoch": 0.2199691687418475, "grad_norm": 1.7677798921713643, "learning_rate": 4.991815329413103e-05, "loss": 1.158, "step": 1855 }, { "epoch": 0.2200877505039725, "grad_norm": 1.9020717492809287, "learning_rate": 4.991795910568584e-05, "loss": 1.1096, "step": 1856 }, { "epoch": 0.22020633226609748, "grad_norm": 1.9522659629924806, "learning_rate": 4.991776468752761e-05, "loss": 1.0991, "step": 1857 }, { "epoch": 0.22032491402822246, "grad_norm": 1.4349160179485294, "learning_rate": 4.991757003965816e-05, "loss": 1.0615, "step": 1858 }, { "epoch": 0.22044349579034744, "grad_norm": 1.423929420477791, "learning_rate": 4.991737516207926e-05, "loss": 0.8839, "step": 1859 }, { "epoch": 0.22056207755247242, "grad_norm": 1.5044535285252192, "learning_rate": 4.991718005479272e-05, "loss": 1.1417, "step": 1860 }, { "epoch": 0.2206806593145974, "grad_norm": 1.7414041355935288, "learning_rate": 4.9916984717800344e-05, "loss": 1.0484, "step": 1861 }, { "epoch": 0.2207992410767224, "grad_norm": 1.4437084033895031, "learning_rate": 4.991678915110391e-05, "loss": 1.1822, "step": 1862 }, { "epoch": 0.2209178228388474, "grad_norm": 1.4455191507665384, "learning_rate": 4.991659335470524e-05, "loss": 1.1809, "step": 1863 }, { "epoch": 0.22103640460097237, "grad_norm": 1.4945772055832913, "learning_rate": 4.9916397328606134e-05, "loss": 1.1299, "step": 1864 }, { "epoch": 0.22115498636309736, "grad_norm": 1.3635213557253822, "learning_rate": 4.99162010728084e-05, "loss": 0.9142, "step": 1865 }, { "epoch": 0.22127356812522234, "grad_norm": 1.509206278558251, "learning_rate": 4.991600458731385e-05, "loss": 0.9276, "step": 1866 }, { "epoch": 0.22139214988734732, "grad_norm": 1.4188155367741584, "learning_rate": 4.9915807872124286e-05, "loss": 1.0059, "step": 1867 }, { "epoch": 0.2215107316494723, "grad_norm": 1.3584141328533217, "learning_rate": 4.991561092724153e-05, "loss": 0.861, "step": 1868 }, { "epoch": 0.2216293134115973, "grad_norm": 1.2936543472621367, "learning_rate": 4.9915413752667394e-05, "loss": 0.743, "step": 1869 }, { "epoch": 0.2217478951737223, "grad_norm": 1.468589159350368, "learning_rate": 4.9915216348403696e-05, "loss": 0.7735, "step": 1870 }, { "epoch": 0.22186647693584727, "grad_norm": 1.4481797410067405, "learning_rate": 4.9915018714452255e-05, "loss": 0.8009, "step": 1871 }, { "epoch": 0.22198505869797225, "grad_norm": 1.46429387221064, "learning_rate": 4.9914820850814895e-05, "loss": 1.0691, "step": 1872 }, { "epoch": 0.22210364046009723, "grad_norm": 1.6581296181932614, "learning_rate": 4.9914622757493445e-05, "loss": 1.1952, "step": 1873 }, { "epoch": 0.2222222222222222, "grad_norm": 1.6169357029747768, "learning_rate": 4.9914424434489714e-05, "loss": 1.0887, "step": 1874 }, { "epoch": 0.22234080398434722, "grad_norm": 1.7652292970013435, "learning_rate": 4.991422588180555e-05, "loss": 0.9275, "step": 1875 }, { "epoch": 0.2224593857464722, "grad_norm": 2.2091805829322753, "learning_rate": 4.991402709944277e-05, "loss": 1.0107, "step": 1876 }, { "epoch": 0.22257796750859718, "grad_norm": 1.6709860603153783, "learning_rate": 4.991382808740322e-05, "loss": 1.0192, "step": 1877 }, { "epoch": 0.22269654927072216, "grad_norm": 1.5914127315097557, "learning_rate": 4.991362884568872e-05, "loss": 1.0754, "step": 1878 }, { "epoch": 0.22281513103284714, "grad_norm": 2.1609347023659455, "learning_rate": 4.9913429374301117e-05, "loss": 0.9836, "step": 1879 }, { "epoch": 0.22293371279497212, "grad_norm": 1.4324581759099748, "learning_rate": 4.9913229673242245e-05, "loss": 0.9519, "step": 1880 }, { "epoch": 0.22305229455709713, "grad_norm": 1.6315959590837206, "learning_rate": 4.991302974251395e-05, "loss": 0.86, "step": 1881 }, { "epoch": 0.2231708763192221, "grad_norm": 1.9131010948498428, "learning_rate": 4.991282958211806e-05, "loss": 0.9786, "step": 1882 }, { "epoch": 0.2232894580813471, "grad_norm": 1.7170206484114023, "learning_rate": 4.991262919205644e-05, "loss": 0.934, "step": 1883 }, { "epoch": 0.22340803984347207, "grad_norm": 1.5232059656739256, "learning_rate": 4.991242857233093e-05, "loss": 0.9466, "step": 1884 }, { "epoch": 0.22352662160559705, "grad_norm": 1.4154528681373066, "learning_rate": 4.991222772294337e-05, "loss": 1.0804, "step": 1885 }, { "epoch": 0.22364520336772203, "grad_norm": 1.4662514737883914, "learning_rate": 4.9912026643895626e-05, "loss": 0.7908, "step": 1886 }, { "epoch": 0.22376378512984704, "grad_norm": 1.5075440337244752, "learning_rate": 4.9911825335189545e-05, "loss": 0.8633, "step": 1887 }, { "epoch": 0.22388236689197202, "grad_norm": 1.5296579985702576, "learning_rate": 4.991162379682698e-05, "loss": 0.9413, "step": 1888 }, { "epoch": 0.224000948654097, "grad_norm": 1.5047062348677775, "learning_rate": 4.99114220288098e-05, "loss": 1.1115, "step": 1889 }, { "epoch": 0.22411953041622198, "grad_norm": 1.8633852031190115, "learning_rate": 4.991122003113985e-05, "loss": 0.9855, "step": 1890 }, { "epoch": 0.22423811217834697, "grad_norm": 1.8703982112153525, "learning_rate": 4.9911017803819e-05, "loss": 1.1626, "step": 1891 }, { "epoch": 0.22435669394047195, "grad_norm": 1.4825220322872457, "learning_rate": 4.9910815346849114e-05, "loss": 0.6765, "step": 1892 }, { "epoch": 0.22447527570259693, "grad_norm": 1.6751652057381117, "learning_rate": 4.991061266023206e-05, "loss": 0.9255, "step": 1893 }, { "epoch": 0.22459385746472194, "grad_norm": 1.5593401623687249, "learning_rate": 4.99104097439697e-05, "loss": 0.9443, "step": 1894 }, { "epoch": 0.22471243922684692, "grad_norm": 1.580381420129191, "learning_rate": 4.9910206598063914e-05, "loss": 0.8111, "step": 1895 }, { "epoch": 0.2248310209889719, "grad_norm": 1.553131240829057, "learning_rate": 4.9910003222516575e-05, "loss": 0.9166, "step": 1896 }, { "epoch": 0.22494960275109688, "grad_norm": 1.4657259942333234, "learning_rate": 4.990979961732954e-05, "loss": 0.9469, "step": 1897 }, { "epoch": 0.22506818451322186, "grad_norm": 1.7574242945632421, "learning_rate": 4.990959578250471e-05, "loss": 0.8758, "step": 1898 }, { "epoch": 0.22518676627534684, "grad_norm": 1.7582971652958272, "learning_rate": 4.9909391718043953e-05, "loss": 1.0759, "step": 1899 }, { "epoch": 0.22530534803747185, "grad_norm": 1.5176181013992036, "learning_rate": 4.990918742394914e-05, "loss": 0.9966, "step": 1900 }, { "epoch": 0.22542392979959683, "grad_norm": 1.6082399969320567, "learning_rate": 4.9908982900222175e-05, "loss": 1.2566, "step": 1901 }, { "epoch": 0.2255425115617218, "grad_norm": 1.7371549585852941, "learning_rate": 4.990877814686493e-05, "loss": 1.0014, "step": 1902 }, { "epoch": 0.2256610933238468, "grad_norm": 1.6710487071226605, "learning_rate": 4.990857316387929e-05, "loss": 1.119, "step": 1903 }, { "epoch": 0.22577967508597177, "grad_norm": 1.5574127459027793, "learning_rate": 4.9908367951267156e-05, "loss": 0.8728, "step": 1904 }, { "epoch": 0.22589825684809675, "grad_norm": 1.5277533332292048, "learning_rate": 4.990816250903041e-05, "loss": 0.9944, "step": 1905 }, { "epoch": 0.22601683861022176, "grad_norm": 1.7278124086170772, "learning_rate": 4.990795683717095e-05, "loss": 0.9807, "step": 1906 }, { "epoch": 0.22613542037234674, "grad_norm": 1.5763014784081089, "learning_rate": 4.9907750935690686e-05, "loss": 0.992, "step": 1907 }, { "epoch": 0.22625400213447172, "grad_norm": 1.8351843750735692, "learning_rate": 4.990754480459149e-05, "loss": 1.2093, "step": 1908 }, { "epoch": 0.2263725838965967, "grad_norm": 1.6613475605159602, "learning_rate": 4.9907338443875276e-05, "loss": 1.0477, "step": 1909 }, { "epoch": 0.22649116565872168, "grad_norm": 1.6068511644574766, "learning_rate": 4.990713185354394e-05, "loss": 1.1514, "step": 1910 }, { "epoch": 0.22660974742084666, "grad_norm": 1.4188395394930773, "learning_rate": 4.99069250335994e-05, "loss": 0.7674, "step": 1911 }, { "epoch": 0.22672832918297167, "grad_norm": 1.5271961528559412, "learning_rate": 4.990671798404355e-05, "loss": 0.9017, "step": 1912 }, { "epoch": 0.22684691094509665, "grad_norm": 1.81350360089599, "learning_rate": 4.990651070487831e-05, "loss": 1.0771, "step": 1913 }, { "epoch": 0.22696549270722163, "grad_norm": 2.037348342366854, "learning_rate": 4.990630319610558e-05, "loss": 1.089, "step": 1914 }, { "epoch": 0.22708407446934661, "grad_norm": 1.6453973283766061, "learning_rate": 4.9906095457727266e-05, "loss": 0.9462, "step": 1915 }, { "epoch": 0.2272026562314716, "grad_norm": 1.5056713329533733, "learning_rate": 4.99058874897453e-05, "loss": 0.9865, "step": 1916 }, { "epoch": 0.22732123799359658, "grad_norm": 1.9082870688953486, "learning_rate": 4.99056792921616e-05, "loss": 1.4323, "step": 1917 }, { "epoch": 0.22743981975572156, "grad_norm": 1.5632063982546813, "learning_rate": 4.990547086497807e-05, "loss": 1.0358, "step": 1918 }, { "epoch": 0.22755840151784656, "grad_norm": 1.4126413997959382, "learning_rate": 4.990526220819665e-05, "loss": 0.8899, "step": 1919 }, { "epoch": 0.22767698327997155, "grad_norm": 1.2892210652447933, "learning_rate": 4.990505332181924e-05, "loss": 0.748, "step": 1920 }, { "epoch": 0.22779556504209653, "grad_norm": 1.3786902145782214, "learning_rate": 4.990484420584778e-05, "loss": 0.9859, "step": 1921 }, { "epoch": 0.2279141468042215, "grad_norm": 1.7865509519350509, "learning_rate": 4.9904634860284205e-05, "loss": 1.031, "step": 1922 }, { "epoch": 0.2280327285663465, "grad_norm": 1.3964635841098754, "learning_rate": 4.990442528513043e-05, "loss": 1.0724, "step": 1923 }, { "epoch": 0.22815131032847147, "grad_norm": 1.9025682596244988, "learning_rate": 4.99042154803884e-05, "loss": 1.0905, "step": 1924 }, { "epoch": 0.22826989209059648, "grad_norm": 1.4511239729599272, "learning_rate": 4.990400544606004e-05, "loss": 1.0908, "step": 1925 }, { "epoch": 0.22838847385272146, "grad_norm": 1.4839117555497356, "learning_rate": 4.990379518214729e-05, "loss": 1.2301, "step": 1926 }, { "epoch": 0.22850705561484644, "grad_norm": 1.446694712344075, "learning_rate": 4.990358468865208e-05, "loss": 0.9176, "step": 1927 }, { "epoch": 0.22862563737697142, "grad_norm": 1.4566533932211327, "learning_rate": 4.9903373965576374e-05, "loss": 0.9359, "step": 1928 }, { "epoch": 0.2287442191390964, "grad_norm": 1.5647290237653095, "learning_rate": 4.990316301292208e-05, "loss": 0.9452, "step": 1929 }, { "epoch": 0.22886280090122138, "grad_norm": 1.683677343503608, "learning_rate": 4.9902951830691174e-05, "loss": 0.9945, "step": 1930 }, { "epoch": 0.2289813826633464, "grad_norm": 1.4797446705615598, "learning_rate": 4.9902740418885584e-05, "loss": 0.7636, "step": 1931 }, { "epoch": 0.22909996442547137, "grad_norm": 1.7636490481249374, "learning_rate": 4.990252877750727e-05, "loss": 1.1825, "step": 1932 }, { "epoch": 0.22921854618759635, "grad_norm": 1.8783997288101326, "learning_rate": 4.9902316906558176e-05, "loss": 1.1034, "step": 1933 }, { "epoch": 0.22933712794972133, "grad_norm": 1.7742946874538126, "learning_rate": 4.990210480604026e-05, "loss": 1.3336, "step": 1934 }, { "epoch": 0.2294557097118463, "grad_norm": 1.7007761766981013, "learning_rate": 4.990189247595547e-05, "loss": 1.0209, "step": 1935 }, { "epoch": 0.2295742914739713, "grad_norm": 1.4748321185817905, "learning_rate": 4.990167991630577e-05, "loss": 1.0802, "step": 1936 }, { "epoch": 0.2296928732360963, "grad_norm": 1.591573942222844, "learning_rate": 4.990146712709311e-05, "loss": 0.9398, "step": 1937 }, { "epoch": 0.22981145499822128, "grad_norm": 1.806026602602224, "learning_rate": 4.990125410831947e-05, "loss": 1.2122, "step": 1938 }, { "epoch": 0.22993003676034626, "grad_norm": 1.5119910869947426, "learning_rate": 4.99010408599868e-05, "loss": 0.8531, "step": 1939 }, { "epoch": 0.23004861852247124, "grad_norm": 1.450478371624302, "learning_rate": 4.990082738209707e-05, "loss": 1.0147, "step": 1940 }, { "epoch": 0.23016720028459622, "grad_norm": 1.7400018832520756, "learning_rate": 4.990061367465225e-05, "loss": 0.9817, "step": 1941 }, { "epoch": 0.2302857820467212, "grad_norm": 1.7258653564423474, "learning_rate": 4.99003997376543e-05, "loss": 1.0653, "step": 1942 }, { "epoch": 0.2304043638088462, "grad_norm": 1.5900747720824306, "learning_rate": 4.990018557110521e-05, "loss": 0.9803, "step": 1943 }, { "epoch": 0.2305229455709712, "grad_norm": 1.664635743731406, "learning_rate": 4.9899971175006935e-05, "loss": 1.0196, "step": 1944 }, { "epoch": 0.23064152733309617, "grad_norm": 1.7589702915611536, "learning_rate": 4.989975654936145e-05, "loss": 1.0616, "step": 1945 }, { "epoch": 0.23076010909522116, "grad_norm": 1.6676224590932205, "learning_rate": 4.989954169417076e-05, "loss": 1.0653, "step": 1946 }, { "epoch": 0.23087869085734614, "grad_norm": 1.7437219448372143, "learning_rate": 4.989932660943683e-05, "loss": 0.9015, "step": 1947 }, { "epoch": 0.23099727261947112, "grad_norm": 1.7960057698934622, "learning_rate": 4.989911129516163e-05, "loss": 1.116, "step": 1948 }, { "epoch": 0.2311158543815961, "grad_norm": 1.742686596708882, "learning_rate": 4.989889575134716e-05, "loss": 1.0465, "step": 1949 }, { "epoch": 0.2312344361437211, "grad_norm": 1.621432342286824, "learning_rate": 4.989867997799541e-05, "loss": 1.0201, "step": 1950 }, { "epoch": 0.2313530179058461, "grad_norm": 1.5817729674707892, "learning_rate": 4.989846397510836e-05, "loss": 1.0506, "step": 1951 }, { "epoch": 0.23147159966797107, "grad_norm": 1.6586796590566955, "learning_rate": 4.989824774268801e-05, "loss": 1.1147, "step": 1952 }, { "epoch": 0.23159018143009605, "grad_norm": 1.4491229999935904, "learning_rate": 4.989803128073634e-05, "loss": 1.0071, "step": 1953 }, { "epoch": 0.23170876319222103, "grad_norm": 1.5262658252815045, "learning_rate": 4.9897814589255356e-05, "loss": 1.0998, "step": 1954 }, { "epoch": 0.231827344954346, "grad_norm": 1.5414346089595201, "learning_rate": 4.9897597668247053e-05, "loss": 0.8681, "step": 1955 }, { "epoch": 0.23194592671647102, "grad_norm": 1.6120838818867245, "learning_rate": 4.9897380517713435e-05, "loss": 0.886, "step": 1956 }, { "epoch": 0.232064508478596, "grad_norm": 1.4884754413795882, "learning_rate": 4.98971631376565e-05, "loss": 0.9615, "step": 1957 }, { "epoch": 0.23218309024072098, "grad_norm": 1.6553146961222758, "learning_rate": 4.989694552807825e-05, "loss": 0.9295, "step": 1958 }, { "epoch": 0.23230167200284596, "grad_norm": 1.5886543437484113, "learning_rate": 4.9896727688980685e-05, "loss": 1.1488, "step": 1959 }, { "epoch": 0.23242025376497094, "grad_norm": 1.4116963124391961, "learning_rate": 4.9896509620365837e-05, "loss": 0.7749, "step": 1960 }, { "epoch": 0.23253883552709592, "grad_norm": 1.6624504635704234, "learning_rate": 4.989629132223569e-05, "loss": 0.9358, "step": 1961 }, { "epoch": 0.23265741728922093, "grad_norm": 1.5117295325039906, "learning_rate": 4.9896072794592265e-05, "loss": 0.7922, "step": 1962 }, { "epoch": 0.2327759990513459, "grad_norm": 1.563330494149231, "learning_rate": 4.989585403743758e-05, "loss": 0.8102, "step": 1963 }, { "epoch": 0.2328945808134709, "grad_norm": 1.8466863593704017, "learning_rate": 4.9895635050773655e-05, "loss": 1.0596, "step": 1964 }, { "epoch": 0.23301316257559587, "grad_norm": 1.6115387898798983, "learning_rate": 4.98954158346025e-05, "loss": 1.082, "step": 1965 }, { "epoch": 0.23313174433772085, "grad_norm": 1.4355556888688414, "learning_rate": 4.9895196388926134e-05, "loss": 0.9935, "step": 1966 }, { "epoch": 0.23325032609984583, "grad_norm": 1.6208885503856885, "learning_rate": 4.98949767137466e-05, "loss": 1.0307, "step": 1967 }, { "epoch": 0.23336890786197084, "grad_norm": 1.4891803992804684, "learning_rate": 4.98947568090659e-05, "loss": 0.8513, "step": 1968 }, { "epoch": 0.23348748962409582, "grad_norm": 1.6014904889871437, "learning_rate": 4.989453667488607e-05, "loss": 0.7923, "step": 1969 }, { "epoch": 0.2336060713862208, "grad_norm": 1.5880776397570076, "learning_rate": 4.9894316311209146e-05, "loss": 0.9867, "step": 1970 }, { "epoch": 0.23372465314834578, "grad_norm": 1.688895651743746, "learning_rate": 4.989409571803714e-05, "loss": 0.8132, "step": 1971 }, { "epoch": 0.23384323491047077, "grad_norm": 1.6150611764032206, "learning_rate": 4.9893874895372104e-05, "loss": 1.1188, "step": 1972 }, { "epoch": 0.23396181667259575, "grad_norm": 1.5076617450993473, "learning_rate": 4.989365384321608e-05, "loss": 0.9619, "step": 1973 }, { "epoch": 0.23408039843472073, "grad_norm": 1.7063303632871143, "learning_rate": 4.989343256157109e-05, "loss": 0.9928, "step": 1974 }, { "epoch": 0.23419898019684574, "grad_norm": 1.9220408091723056, "learning_rate": 4.989321105043917e-05, "loss": 1.143, "step": 1975 }, { "epoch": 0.23431756195897072, "grad_norm": 1.6124276217819924, "learning_rate": 4.9892989309822366e-05, "loss": 1.1244, "step": 1976 }, { "epoch": 0.2344361437210957, "grad_norm": 1.5301767942807971, "learning_rate": 4.989276733972274e-05, "loss": 1.0421, "step": 1977 }, { "epoch": 0.23455472548322068, "grad_norm": 1.654123751710572, "learning_rate": 4.989254514014231e-05, "loss": 1.0686, "step": 1978 }, { "epoch": 0.23467330724534566, "grad_norm": 1.614887529451615, "learning_rate": 4.989232271108315e-05, "loss": 1.0473, "step": 1979 }, { "epoch": 0.23479188900747064, "grad_norm": 1.542641082392051, "learning_rate": 4.98921000525473e-05, "loss": 0.8383, "step": 1980 }, { "epoch": 0.23491047076959565, "grad_norm": 1.5262365828713016, "learning_rate": 4.989187716453681e-05, "loss": 1.0046, "step": 1981 }, { "epoch": 0.23502905253172063, "grad_norm": 1.53292325365757, "learning_rate": 4.989165404705374e-05, "loss": 0.8342, "step": 1982 }, { "epoch": 0.2351476342938456, "grad_norm": 1.3475749264992607, "learning_rate": 4.989143070010014e-05, "loss": 0.9374, "step": 1983 }, { "epoch": 0.2352662160559706, "grad_norm": 1.6221715921766138, "learning_rate": 4.9891207123678066e-05, "loss": 1.1065, "step": 1984 }, { "epoch": 0.23538479781809557, "grad_norm": 1.4483109153226144, "learning_rate": 4.98909833177896e-05, "loss": 0.8322, "step": 1985 }, { "epoch": 0.23550337958022055, "grad_norm": 1.5834207187261131, "learning_rate": 4.989075928243678e-05, "loss": 1.2424, "step": 1986 }, { "epoch": 0.23562196134234556, "grad_norm": 1.6574874702700058, "learning_rate": 4.989053501762169e-05, "loss": 0.9054, "step": 1987 }, { "epoch": 0.23574054310447054, "grad_norm": 1.4067236059962838, "learning_rate": 4.989031052334639e-05, "loss": 1.0597, "step": 1988 }, { "epoch": 0.23585912486659552, "grad_norm": 1.6466900317681978, "learning_rate": 4.989008579961294e-05, "loss": 1.0568, "step": 1989 }, { "epoch": 0.2359777066287205, "grad_norm": 1.3927862769275399, "learning_rate": 4.9889860846423424e-05, "loss": 1.0058, "step": 1990 }, { "epoch": 0.23609628839084548, "grad_norm": 1.3822215912561235, "learning_rate": 4.9889635663779924e-05, "loss": 0.8841, "step": 1991 }, { "epoch": 0.23621487015297046, "grad_norm": 1.4574827504673697, "learning_rate": 4.988941025168449e-05, "loss": 0.6813, "step": 1992 }, { "epoch": 0.23633345191509547, "grad_norm": 1.719004768181018, "learning_rate": 4.9889184610139224e-05, "loss": 0.9685, "step": 1993 }, { "epoch": 0.23645203367722045, "grad_norm": 2.0527458524994926, "learning_rate": 4.98889587391462e-05, "loss": 1.1746, "step": 1994 }, { "epoch": 0.23657061543934543, "grad_norm": 1.4936699647859821, "learning_rate": 4.988873263870749e-05, "loss": 0.9448, "step": 1995 }, { "epoch": 0.2366891972014704, "grad_norm": 1.4439063670962893, "learning_rate": 4.988850630882518e-05, "loss": 0.977, "step": 1996 }, { "epoch": 0.2368077789635954, "grad_norm": 1.419087140707462, "learning_rate": 4.9888279749501376e-05, "loss": 0.9751, "step": 1997 }, { "epoch": 0.23692636072572038, "grad_norm": 1.4273422518297376, "learning_rate": 4.988805296073814e-05, "loss": 1.1365, "step": 1998 }, { "epoch": 0.23704494248784536, "grad_norm": 1.453329512122131, "learning_rate": 4.988782594253759e-05, "loss": 0.844, "step": 1999 }, { "epoch": 0.23716352424997036, "grad_norm": 1.4305247347925087, "learning_rate": 4.988759869490179e-05, "loss": 1.0306, "step": 2000 }, { "epoch": 0.23728210601209535, "grad_norm": 1.5011974457556396, "learning_rate": 4.988737121783286e-05, "loss": 0.9683, "step": 2001 }, { "epoch": 0.23740068777422033, "grad_norm": 1.6997714885769597, "learning_rate": 4.988714351133288e-05, "loss": 1.0922, "step": 2002 }, { "epoch": 0.2375192695363453, "grad_norm": 1.6200599655597099, "learning_rate": 4.988691557540396e-05, "loss": 0.9344, "step": 2003 }, { "epoch": 0.2376378512984703, "grad_norm": 1.7060622861567702, "learning_rate": 4.988668741004819e-05, "loss": 0.9164, "step": 2004 }, { "epoch": 0.23775643306059527, "grad_norm": 1.5158364073916335, "learning_rate": 4.9886459015267686e-05, "loss": 1.0081, "step": 2005 }, { "epoch": 0.23787501482272028, "grad_norm": 1.5915494494909135, "learning_rate": 4.9886230391064546e-05, "loss": 0.9278, "step": 2006 }, { "epoch": 0.23799359658484526, "grad_norm": 1.537703096356776, "learning_rate": 4.988600153744088e-05, "loss": 1.0062, "step": 2007 }, { "epoch": 0.23811217834697024, "grad_norm": 1.4974768986296876, "learning_rate": 4.988577245439879e-05, "loss": 1.1319, "step": 2008 }, { "epoch": 0.23823076010909522, "grad_norm": 1.351841212945489, "learning_rate": 4.988554314194041e-05, "loss": 0.9002, "step": 2009 }, { "epoch": 0.2383493418712202, "grad_norm": 1.5079069125205833, "learning_rate": 4.988531360006783e-05, "loss": 0.9929, "step": 2010 }, { "epoch": 0.23846792363334518, "grad_norm": 1.6035690337295347, "learning_rate": 4.9885083828783177e-05, "loss": 0.8551, "step": 2011 }, { "epoch": 0.2385865053954702, "grad_norm": 1.6362171810052264, "learning_rate": 4.988485382808856e-05, "loss": 1.0243, "step": 2012 }, { "epoch": 0.23870508715759517, "grad_norm": 1.7725984468404168, "learning_rate": 4.9884623597986114e-05, "loss": 1.0561, "step": 2013 }, { "epoch": 0.23882366891972015, "grad_norm": 1.6036716667503024, "learning_rate": 4.988439313847795e-05, "loss": 1.0867, "step": 2014 }, { "epoch": 0.23894225068184513, "grad_norm": 1.4178518793455746, "learning_rate": 4.9884162449566195e-05, "loss": 0.7159, "step": 2015 }, { "epoch": 0.2390608324439701, "grad_norm": 1.3227277049945634, "learning_rate": 4.988393153125298e-05, "loss": 0.7813, "step": 2016 }, { "epoch": 0.2391794142060951, "grad_norm": 1.4046718367654667, "learning_rate": 4.988370038354043e-05, "loss": 0.9646, "step": 2017 }, { "epoch": 0.2392979959682201, "grad_norm": 1.8928861731811264, "learning_rate": 4.988346900643068e-05, "loss": 0.9578, "step": 2018 }, { "epoch": 0.23941657773034508, "grad_norm": 1.6294544528772894, "learning_rate": 4.9883237399925866e-05, "loss": 1.0142, "step": 2019 }, { "epoch": 0.23953515949247006, "grad_norm": 1.66823103934097, "learning_rate": 4.988300556402811e-05, "loss": 1.0936, "step": 2020 }, { "epoch": 0.23965374125459504, "grad_norm": 1.3626170809403297, "learning_rate": 4.9882773498739554e-05, "loss": 0.7495, "step": 2021 }, { "epoch": 0.23977232301672002, "grad_norm": 1.6676941995618688, "learning_rate": 4.988254120406234e-05, "loss": 0.8901, "step": 2022 }, { "epoch": 0.239890904778845, "grad_norm": 1.4399585787567406, "learning_rate": 4.988230867999861e-05, "loss": 0.9626, "step": 2023 }, { "epoch": 0.24000948654096999, "grad_norm": 1.5074320076237988, "learning_rate": 4.988207592655051e-05, "loss": 0.921, "step": 2024 }, { "epoch": 0.240128068303095, "grad_norm": 1.6650399626345913, "learning_rate": 4.9881842943720175e-05, "loss": 0.9947, "step": 2025 }, { "epoch": 0.24024665006521997, "grad_norm": 1.2800387119811232, "learning_rate": 4.988160973150976e-05, "loss": 0.7056, "step": 2026 }, { "epoch": 0.24036523182734496, "grad_norm": 1.5124841667699283, "learning_rate": 4.988137628992142e-05, "loss": 0.9348, "step": 2027 }, { "epoch": 0.24048381358946994, "grad_norm": 1.5476100031479807, "learning_rate": 4.9881142618957306e-05, "loss": 1.0174, "step": 2028 }, { "epoch": 0.24060239535159492, "grad_norm": 1.506464539576529, "learning_rate": 4.988090871861956e-05, "loss": 0.7834, "step": 2029 }, { "epoch": 0.2407209771137199, "grad_norm": 1.429115642549729, "learning_rate": 4.988067458891035e-05, "loss": 1.1353, "step": 2030 }, { "epoch": 0.2408395588758449, "grad_norm": 1.3947657879611741, "learning_rate": 4.988044022983184e-05, "loss": 0.9847, "step": 2031 }, { "epoch": 0.2409581406379699, "grad_norm": 1.6371821895710357, "learning_rate": 4.9880205641386166e-05, "loss": 1.1881, "step": 2032 }, { "epoch": 0.24107672240009487, "grad_norm": 1.5567591383339119, "learning_rate": 4.9879970823575516e-05, "loss": 0.8566, "step": 2033 }, { "epoch": 0.24119530416221985, "grad_norm": 1.5132029475982507, "learning_rate": 4.987973577640204e-05, "loss": 1.0845, "step": 2034 }, { "epoch": 0.24131388592434483, "grad_norm": 1.685839343053115, "learning_rate": 4.987950049986792e-05, "loss": 0.9758, "step": 2035 }, { "epoch": 0.2414324676864698, "grad_norm": 1.7948089256830078, "learning_rate": 4.98792649939753e-05, "loss": 0.9953, "step": 2036 }, { "epoch": 0.24155104944859482, "grad_norm": 1.5236375384391327, "learning_rate": 4.987902925872637e-05, "loss": 1.1155, "step": 2037 }, { "epoch": 0.2416696312107198, "grad_norm": 1.9413252307354085, "learning_rate": 4.98787932941233e-05, "loss": 1.1305, "step": 2038 }, { "epoch": 0.24178821297284478, "grad_norm": 1.5927993451224265, "learning_rate": 4.987855710016827e-05, "loss": 0.8815, "step": 2039 }, { "epoch": 0.24190679473496976, "grad_norm": 1.5541187183107388, "learning_rate": 4.9878320676863447e-05, "loss": 0.9947, "step": 2040 }, { "epoch": 0.24202537649709474, "grad_norm": 1.7437707027452671, "learning_rate": 4.9878084024211016e-05, "loss": 1.1141, "step": 2041 }, { "epoch": 0.24214395825921972, "grad_norm": 1.3092698415644695, "learning_rate": 4.987784714221315e-05, "loss": 0.6535, "step": 2042 }, { "epoch": 0.24226254002134473, "grad_norm": 1.4673804816217528, "learning_rate": 4.987761003087205e-05, "loss": 0.8086, "step": 2043 }, { "epoch": 0.2423811217834697, "grad_norm": 1.611202688500416, "learning_rate": 4.98773726901899e-05, "loss": 1.1189, "step": 2044 }, { "epoch": 0.2424997035455947, "grad_norm": 1.6697317423480866, "learning_rate": 4.987713512016886e-05, "loss": 0.9961, "step": 2045 }, { "epoch": 0.24261828530771967, "grad_norm": 1.549985483044613, "learning_rate": 4.9876897320811156e-05, "loss": 0.9909, "step": 2046 }, { "epoch": 0.24273686706984465, "grad_norm": 1.7362645097211575, "learning_rate": 4.9876659292118954e-05, "loss": 1.1183, "step": 2047 }, { "epoch": 0.24285544883196963, "grad_norm": 1.3649091551872485, "learning_rate": 4.9876421034094465e-05, "loss": 0.9134, "step": 2048 }, { "epoch": 0.24297403059409464, "grad_norm": 1.4132473796853338, "learning_rate": 4.987618254673988e-05, "loss": 0.9402, "step": 2049 }, { "epoch": 0.24309261235621962, "grad_norm": 1.6130785263956027, "learning_rate": 4.98759438300574e-05, "loss": 0.9947, "step": 2050 }, { "epoch": 0.2432111941183446, "grad_norm": 1.7264268441998956, "learning_rate": 4.987570488404922e-05, "loss": 0.9563, "step": 2051 }, { "epoch": 0.24332977588046958, "grad_norm": 1.9545428216677283, "learning_rate": 4.987546570871755e-05, "loss": 1.0508, "step": 2052 }, { "epoch": 0.24344835764259457, "grad_norm": 1.5359874623630718, "learning_rate": 4.9875226304064585e-05, "loss": 1.0449, "step": 2053 }, { "epoch": 0.24356693940471955, "grad_norm": 1.4272986609242349, "learning_rate": 4.9874986670092536e-05, "loss": 1.0308, "step": 2054 }, { "epoch": 0.24368552116684453, "grad_norm": 1.529774285897543, "learning_rate": 4.987474680680361e-05, "loss": 0.8353, "step": 2055 }, { "epoch": 0.24380410292896953, "grad_norm": 1.5517482498026967, "learning_rate": 4.987450671420003e-05, "loss": 0.8764, "step": 2056 }, { "epoch": 0.24392268469109452, "grad_norm": 1.894789000668694, "learning_rate": 4.9874266392283995e-05, "loss": 0.9849, "step": 2057 }, { "epoch": 0.2440412664532195, "grad_norm": 1.378135361945518, "learning_rate": 4.987402584105774e-05, "loss": 0.7118, "step": 2058 }, { "epoch": 0.24415984821534448, "grad_norm": 2.154430703838357, "learning_rate": 4.9873785060523454e-05, "loss": 1.1367, "step": 2059 }, { "epoch": 0.24427842997746946, "grad_norm": 1.8515920718017587, "learning_rate": 4.987354405068337e-05, "loss": 1.1248, "step": 2060 }, { "epoch": 0.24439701173959444, "grad_norm": 1.419908808170865, "learning_rate": 4.987330281153973e-05, "loss": 1.0317, "step": 2061 }, { "epoch": 0.24451559350171945, "grad_norm": 1.5629243897126097, "learning_rate": 4.987306134309473e-05, "loss": 0.7564, "step": 2062 }, { "epoch": 0.24463417526384443, "grad_norm": 1.7219430406112892, "learning_rate": 4.98728196453506e-05, "loss": 1.0869, "step": 2063 }, { "epoch": 0.2447527570259694, "grad_norm": 1.3396006974954793, "learning_rate": 4.987257771830958e-05, "loss": 0.6436, "step": 2064 }, { "epoch": 0.2448713387880944, "grad_norm": 1.6507259608009928, "learning_rate": 4.987233556197389e-05, "loss": 0.9214, "step": 2065 }, { "epoch": 0.24498992055021937, "grad_norm": 1.7201876753334069, "learning_rate": 4.987209317634577e-05, "loss": 1.2168, "step": 2066 }, { "epoch": 0.24510850231234435, "grad_norm": 1.6582412831686764, "learning_rate": 4.987185056142745e-05, "loss": 1.1449, "step": 2067 }, { "epoch": 0.24522708407446936, "grad_norm": 1.5572669492435904, "learning_rate": 4.987160771722117e-05, "loss": 0.9175, "step": 2068 }, { "epoch": 0.24534566583659434, "grad_norm": 1.6561084514887285, "learning_rate": 4.987136464372917e-05, "loss": 0.883, "step": 2069 }, { "epoch": 0.24546424759871932, "grad_norm": 2.427111843035678, "learning_rate": 4.9871121340953674e-05, "loss": 1.2025, "step": 2070 }, { "epoch": 0.2455828293608443, "grad_norm": 1.6293295543066901, "learning_rate": 4.987087780889695e-05, "loss": 1.0961, "step": 2071 }, { "epoch": 0.24570141112296928, "grad_norm": 1.7243286199876149, "learning_rate": 4.987063404756123e-05, "loss": 1.1328, "step": 2072 }, { "epoch": 0.24581999288509426, "grad_norm": 1.336293364418295, "learning_rate": 4.987039005694876e-05, "loss": 0.7303, "step": 2073 }, { "epoch": 0.24593857464721927, "grad_norm": 1.335171807715241, "learning_rate": 4.987014583706179e-05, "loss": 0.9421, "step": 2074 }, { "epoch": 0.24605715640934425, "grad_norm": 1.5627468303837904, "learning_rate": 4.986990138790257e-05, "loss": 1.1774, "step": 2075 }, { "epoch": 0.24617573817146923, "grad_norm": 1.5014928735012183, "learning_rate": 4.986965670947337e-05, "loss": 0.9542, "step": 2076 }, { "epoch": 0.2462943199335942, "grad_norm": 1.3527540617191718, "learning_rate": 4.986941180177642e-05, "loss": 1.0114, "step": 2077 }, { "epoch": 0.2464129016957192, "grad_norm": 1.2005745079586585, "learning_rate": 4.9869166664813996e-05, "loss": 0.911, "step": 2078 }, { "epoch": 0.24653148345784417, "grad_norm": 1.4295014467282676, "learning_rate": 4.986892129858835e-05, "loss": 1.0468, "step": 2079 }, { "epoch": 0.24665006521996916, "grad_norm": 1.5074075350836313, "learning_rate": 4.986867570310174e-05, "loss": 0.9018, "step": 2080 }, { "epoch": 0.24676864698209416, "grad_norm": 1.492128442222448, "learning_rate": 4.986842987835645e-05, "loss": 1.0957, "step": 2081 }, { "epoch": 0.24688722874421914, "grad_norm": 1.505625909657182, "learning_rate": 4.986818382435472e-05, "loss": 1.041, "step": 2082 }, { "epoch": 0.24700581050634413, "grad_norm": 1.417385103898961, "learning_rate": 4.9867937541098835e-05, "loss": 0.9335, "step": 2083 }, { "epoch": 0.2471243922684691, "grad_norm": 1.373392156625882, "learning_rate": 4.9867691028591054e-05, "loss": 1.0242, "step": 2084 }, { "epoch": 0.2472429740305941, "grad_norm": 1.636374582256655, "learning_rate": 4.986744428683367e-05, "loss": 0.9774, "step": 2085 }, { "epoch": 0.24736155579271907, "grad_norm": 1.7366743252086316, "learning_rate": 4.986719731582894e-05, "loss": 1.1063, "step": 2086 }, { "epoch": 0.24748013755484408, "grad_norm": 1.7257485946559759, "learning_rate": 4.9866950115579135e-05, "loss": 0.8362, "step": 2087 }, { "epoch": 0.24759871931696906, "grad_norm": 1.8215115838072902, "learning_rate": 4.986670268608655e-05, "loss": 1.0129, "step": 2088 }, { "epoch": 0.24771730107909404, "grad_norm": 1.5893486207755414, "learning_rate": 4.986645502735346e-05, "loss": 1.0106, "step": 2089 }, { "epoch": 0.24783588284121902, "grad_norm": 1.596333336718307, "learning_rate": 4.986620713938215e-05, "loss": 1.0782, "step": 2090 }, { "epoch": 0.247954464603344, "grad_norm": 1.645355479943473, "learning_rate": 4.986595902217491e-05, "loss": 1.1981, "step": 2091 }, { "epoch": 0.24807304636546898, "grad_norm": 1.5005414462213689, "learning_rate": 4.986571067573401e-05, "loss": 1.1614, "step": 2092 }, { "epoch": 0.248191628127594, "grad_norm": 1.6092147081247734, "learning_rate": 4.986546210006175e-05, "loss": 1.0217, "step": 2093 }, { "epoch": 0.24831020988971897, "grad_norm": 1.5854139290624074, "learning_rate": 4.986521329516043e-05, "loss": 0.9246, "step": 2094 }, { "epoch": 0.24842879165184395, "grad_norm": 1.4527272338093522, "learning_rate": 4.986496426103232e-05, "loss": 0.8635, "step": 2095 }, { "epoch": 0.24854737341396893, "grad_norm": 1.4962003507951074, "learning_rate": 4.986471499767974e-05, "loss": 1.0938, "step": 2096 }, { "epoch": 0.2486659551760939, "grad_norm": 1.4057909464966287, "learning_rate": 4.9864465505104985e-05, "loss": 1.0784, "step": 2097 }, { "epoch": 0.2487845369382189, "grad_norm": 1.6055504181451705, "learning_rate": 4.9864215783310344e-05, "loss": 0.9935, "step": 2098 }, { "epoch": 0.2489031187003439, "grad_norm": 1.4699657177818262, "learning_rate": 4.986396583229812e-05, "loss": 0.6405, "step": 2099 }, { "epoch": 0.24902170046246888, "grad_norm": 1.376464715833646, "learning_rate": 4.986371565207062e-05, "loss": 0.9561, "step": 2100 }, { "epoch": 0.24914028222459386, "grad_norm": 1.5723865013643497, "learning_rate": 4.986346524263016e-05, "loss": 0.8234, "step": 2101 }, { "epoch": 0.24925886398671884, "grad_norm": 1.2630350121290737, "learning_rate": 4.986321460397904e-05, "loss": 0.7528, "step": 2102 }, { "epoch": 0.24937744574884382, "grad_norm": 1.4848277535306345, "learning_rate": 4.986296373611956e-05, "loss": 0.7253, "step": 2103 }, { "epoch": 0.2494960275109688, "grad_norm": 1.6589826827990275, "learning_rate": 4.986271263905405e-05, "loss": 0.9045, "step": 2104 }, { "epoch": 0.24961460927309378, "grad_norm": 1.6055563779007433, "learning_rate": 4.986246131278481e-05, "loss": 0.9267, "step": 2105 }, { "epoch": 0.2497331910352188, "grad_norm": 1.6166123649988795, "learning_rate": 4.9862209757314184e-05, "loss": 1.118, "step": 2106 }, { "epoch": 0.24985177279734377, "grad_norm": 1.5887712600318746, "learning_rate": 4.986195797264446e-05, "loss": 1.0529, "step": 2107 }, { "epoch": 0.24997035455946875, "grad_norm": 1.6692519959274708, "learning_rate": 4.986170595877797e-05, "loss": 0.9421, "step": 2108 }, { "epoch": 0.25008893632159374, "grad_norm": 1.7195944350620005, "learning_rate": 4.986145371571704e-05, "loss": 1.2998, "step": 2109 }, { "epoch": 0.2502075180837187, "grad_norm": 1.4158535188409478, "learning_rate": 4.9861201243463994e-05, "loss": 0.9264, "step": 2110 }, { "epoch": 0.2503260998458437, "grad_norm": 1.395510290906992, "learning_rate": 4.986094854202116e-05, "loss": 0.834, "step": 2111 }, { "epoch": 0.2504446816079687, "grad_norm": 1.445087878954799, "learning_rate": 4.986069561139086e-05, "loss": 0.9971, "step": 2112 }, { "epoch": 0.25056326337009366, "grad_norm": 1.6208342599758634, "learning_rate": 4.986044245157544e-05, "loss": 0.8777, "step": 2113 }, { "epoch": 0.25068184513221864, "grad_norm": 1.6055488234224757, "learning_rate": 4.986018906257723e-05, "loss": 0.942, "step": 2114 }, { "epoch": 0.2508004268943437, "grad_norm": 1.7560188830725787, "learning_rate": 4.9859935444398556e-05, "loss": 1.0793, "step": 2115 }, { "epoch": 0.25091900865646866, "grad_norm": 1.6224173383291136, "learning_rate": 4.9859681597041765e-05, "loss": 1.0261, "step": 2116 }, { "epoch": 0.25103759041859364, "grad_norm": 1.4325652978105656, "learning_rate": 4.985942752050919e-05, "loss": 0.9031, "step": 2117 }, { "epoch": 0.2511561721807186, "grad_norm": 1.7606896343631897, "learning_rate": 4.985917321480319e-05, "loss": 1.1023, "step": 2118 }, { "epoch": 0.2512747539428436, "grad_norm": 1.853382170939425, "learning_rate": 4.985891867992609e-05, "loss": 0.9167, "step": 2119 }, { "epoch": 0.2513933357049686, "grad_norm": 1.6116968993821403, "learning_rate": 4.985866391588024e-05, "loss": 0.723, "step": 2120 }, { "epoch": 0.25151191746709356, "grad_norm": 1.8416582616185058, "learning_rate": 4.9858408922668e-05, "loss": 1.069, "step": 2121 }, { "epoch": 0.25163049922921854, "grad_norm": 1.830199379315522, "learning_rate": 4.985815370029171e-05, "loss": 1.0778, "step": 2122 }, { "epoch": 0.2517490809913435, "grad_norm": 1.59336869913217, "learning_rate": 4.985789824875372e-05, "loss": 0.6965, "step": 2123 }, { "epoch": 0.2518676627534685, "grad_norm": 1.5201708003214887, "learning_rate": 4.9857642568056395e-05, "loss": 0.9348, "step": 2124 }, { "epoch": 0.2519862445155935, "grad_norm": 1.7073867745960254, "learning_rate": 4.9857386658202086e-05, "loss": 1.0733, "step": 2125 }, { "epoch": 0.25210482627771846, "grad_norm": 1.682780684489853, "learning_rate": 4.9857130519193164e-05, "loss": 0.9605, "step": 2126 }, { "epoch": 0.2522234080398435, "grad_norm": 2.0031707997058237, "learning_rate": 4.985687415103197e-05, "loss": 1.0503, "step": 2127 }, { "epoch": 0.2523419898019685, "grad_norm": 1.6097499813200011, "learning_rate": 4.985661755372088e-05, "loss": 1.0184, "step": 2128 }, { "epoch": 0.25246057156409346, "grad_norm": 1.7698967588309922, "learning_rate": 4.9856360727262255e-05, "loss": 1.0644, "step": 2129 }, { "epoch": 0.25257915332621844, "grad_norm": 1.627925059741286, "learning_rate": 4.985610367165847e-05, "loss": 0.9859, "step": 2130 }, { "epoch": 0.2526977350883434, "grad_norm": 1.5307603267014067, "learning_rate": 4.985584638691189e-05, "loss": 1.0008, "step": 2131 }, { "epoch": 0.2528163168504684, "grad_norm": 1.6600232641719685, "learning_rate": 4.985558887302488e-05, "loss": 0.9726, "step": 2132 }, { "epoch": 0.2529348986125934, "grad_norm": 1.540626989422014, "learning_rate": 4.985533112999983e-05, "loss": 0.7683, "step": 2133 }, { "epoch": 0.25305348037471836, "grad_norm": 1.6232508751271337, "learning_rate": 4.9855073157839104e-05, "loss": 0.9407, "step": 2134 }, { "epoch": 0.25317206213684335, "grad_norm": 1.492004022909737, "learning_rate": 4.985481495654508e-05, "loss": 0.7443, "step": 2135 }, { "epoch": 0.2532906438989683, "grad_norm": 1.7337049234465745, "learning_rate": 4.9854556526120144e-05, "loss": 1.1503, "step": 2136 }, { "epoch": 0.2534092256610933, "grad_norm": 1.7877832683475772, "learning_rate": 4.9854297866566665e-05, "loss": 1.1425, "step": 2137 }, { "epoch": 0.2535278074232183, "grad_norm": 1.7253199632214957, "learning_rate": 4.9854038977887054e-05, "loss": 1.0926, "step": 2138 }, { "epoch": 0.25364638918534327, "grad_norm": 1.73219076185296, "learning_rate": 4.9853779860083675e-05, "loss": 0.9113, "step": 2139 }, { "epoch": 0.2537649709474683, "grad_norm": 1.7153471311367947, "learning_rate": 4.985352051315892e-05, "loss": 1.0412, "step": 2140 }, { "epoch": 0.2538835527095933, "grad_norm": 1.6450974655983241, "learning_rate": 4.985326093711519e-05, "loss": 1.0801, "step": 2141 }, { "epoch": 0.25400213447171827, "grad_norm": 1.5298974316076808, "learning_rate": 4.985300113195486e-05, "loss": 0.9512, "step": 2142 }, { "epoch": 0.25412071623384325, "grad_norm": 1.543117649229392, "learning_rate": 4.9852741097680344e-05, "loss": 1.1495, "step": 2143 }, { "epoch": 0.2542392979959682, "grad_norm": 1.4568123761158644, "learning_rate": 4.985248083429403e-05, "loss": 0.9383, "step": 2144 }, { "epoch": 0.2543578797580932, "grad_norm": 1.5886075473869836, "learning_rate": 4.985222034179832e-05, "loss": 1.0387, "step": 2145 }, { "epoch": 0.2544764615202182, "grad_norm": 1.4482718730262762, "learning_rate": 4.985195962019561e-05, "loss": 0.9691, "step": 2146 }, { "epoch": 0.25459504328234317, "grad_norm": 1.643185622307714, "learning_rate": 4.985169866948831e-05, "loss": 0.7969, "step": 2147 }, { "epoch": 0.25471362504446815, "grad_norm": 1.6215371189085541, "learning_rate": 4.985143748967883e-05, "loss": 0.8904, "step": 2148 }, { "epoch": 0.25483220680659313, "grad_norm": 1.4569548540730763, "learning_rate": 4.985117608076957e-05, "loss": 0.9873, "step": 2149 }, { "epoch": 0.2549507885687181, "grad_norm": 1.9108667612409593, "learning_rate": 4.985091444276293e-05, "loss": 1.144, "step": 2150 }, { "epoch": 0.2550693703308431, "grad_norm": 1.683342170167743, "learning_rate": 4.985065257566135e-05, "loss": 1.2595, "step": 2151 }, { "epoch": 0.25518795209296813, "grad_norm": 2.0167574601508518, "learning_rate": 4.9850390479467214e-05, "loss": 0.9175, "step": 2152 }, { "epoch": 0.2553065338550931, "grad_norm": 1.709546124106927, "learning_rate": 4.985012815418295e-05, "loss": 1.0466, "step": 2153 }, { "epoch": 0.2554251156172181, "grad_norm": 1.3743328469186065, "learning_rate": 4.984986559981098e-05, "loss": 0.9594, "step": 2154 }, { "epoch": 0.25554369737934307, "grad_norm": 1.6006546101028003, "learning_rate": 4.984960281635373e-05, "loss": 1.0004, "step": 2155 }, { "epoch": 0.25566227914146805, "grad_norm": 1.373543147012109, "learning_rate": 4.984933980381361e-05, "loss": 0.8517, "step": 2156 }, { "epoch": 0.25578086090359303, "grad_norm": 1.4634271540663177, "learning_rate": 4.9849076562193044e-05, "loss": 0.8683, "step": 2157 }, { "epoch": 0.255899442665718, "grad_norm": 1.5319474259217496, "learning_rate": 4.984881309149447e-05, "loss": 0.9806, "step": 2158 }, { "epoch": 0.256018024427843, "grad_norm": 1.6519943761700993, "learning_rate": 4.984854939172031e-05, "loss": 0.8745, "step": 2159 }, { "epoch": 0.256136606189968, "grad_norm": 1.5862225596842816, "learning_rate": 4.984828546287299e-05, "loss": 0.8198, "step": 2160 }, { "epoch": 0.25625518795209296, "grad_norm": 1.488037554096196, "learning_rate": 4.984802130495496e-05, "loss": 0.8924, "step": 2161 }, { "epoch": 0.25637376971421794, "grad_norm": 1.5924279560940893, "learning_rate": 4.9847756917968637e-05, "loss": 0.8875, "step": 2162 }, { "epoch": 0.2564923514763429, "grad_norm": 1.5612196989023017, "learning_rate": 4.984749230191646e-05, "loss": 0.9875, "step": 2163 }, { "epoch": 0.2566109332384679, "grad_norm": 1.5873992363771936, "learning_rate": 4.984722745680089e-05, "loss": 1.0515, "step": 2164 }, { "epoch": 0.25672951500059293, "grad_norm": 1.5157166593132512, "learning_rate": 4.984696238262434e-05, "loss": 0.9316, "step": 2165 }, { "epoch": 0.2568480967627179, "grad_norm": 1.5746688263250848, "learning_rate": 4.984669707938927e-05, "loss": 0.875, "step": 2166 }, { "epoch": 0.2569666785248429, "grad_norm": 1.4016955373441873, "learning_rate": 4.984643154709813e-05, "loss": 0.8289, "step": 2167 }, { "epoch": 0.2570852602869679, "grad_norm": 1.6548005907863415, "learning_rate": 4.984616578575335e-05, "loss": 0.9674, "step": 2168 }, { "epoch": 0.25720384204909286, "grad_norm": 1.8031069859353175, "learning_rate": 4.9845899795357385e-05, "loss": 1.1634, "step": 2169 }, { "epoch": 0.25732242381121784, "grad_norm": 1.546651082570925, "learning_rate": 4.9845633575912696e-05, "loss": 0.6455, "step": 2170 }, { "epoch": 0.2574410055733428, "grad_norm": 1.3810770801903356, "learning_rate": 4.984536712742174e-05, "loss": 0.792, "step": 2171 }, { "epoch": 0.2575595873354678, "grad_norm": 1.6841781699677776, "learning_rate": 4.984510044988696e-05, "loss": 1.1331, "step": 2172 }, { "epoch": 0.2576781690975928, "grad_norm": 1.8543678668450854, "learning_rate": 4.984483354331082e-05, "loss": 1.0579, "step": 2173 }, { "epoch": 0.25779675085971776, "grad_norm": 1.730530140611696, "learning_rate": 4.984456640769579e-05, "loss": 0.9951, "step": 2174 }, { "epoch": 0.25791533262184274, "grad_norm": 1.5446571432852814, "learning_rate": 4.984429904304432e-05, "loss": 0.939, "step": 2175 }, { "epoch": 0.2580339143839677, "grad_norm": 1.6214628783263327, "learning_rate": 4.984403144935888e-05, "loss": 1.0707, "step": 2176 }, { "epoch": 0.25815249614609276, "grad_norm": 1.5239598462709665, "learning_rate": 4.9843763626641924e-05, "loss": 0.8749, "step": 2177 }, { "epoch": 0.25827107790821774, "grad_norm": 1.6541707779285175, "learning_rate": 4.984349557489595e-05, "loss": 0.9682, "step": 2178 }, { "epoch": 0.2583896596703427, "grad_norm": 1.5466005597786918, "learning_rate": 4.98432272941234e-05, "loss": 0.9338, "step": 2179 }, { "epoch": 0.2585082414324677, "grad_norm": 1.3353576130769804, "learning_rate": 4.984295878432677e-05, "loss": 0.9302, "step": 2180 }, { "epoch": 0.2586268231945927, "grad_norm": 1.678865232338637, "learning_rate": 4.9842690045508514e-05, "loss": 1.0057, "step": 2181 }, { "epoch": 0.25874540495671766, "grad_norm": 1.4092464192696998, "learning_rate": 4.984242107767112e-05, "loss": 0.5636, "step": 2182 }, { "epoch": 0.25886398671884264, "grad_norm": 1.3882276999778254, "learning_rate": 4.9842151880817074e-05, "loss": 0.7967, "step": 2183 }, { "epoch": 0.2589825684809676, "grad_norm": 1.7965720480845428, "learning_rate": 4.984188245494885e-05, "loss": 0.9934, "step": 2184 }, { "epoch": 0.2591011502430926, "grad_norm": 1.632711314953918, "learning_rate": 4.984161280006893e-05, "loss": 0.9033, "step": 2185 }, { "epoch": 0.2592197320052176, "grad_norm": 1.3772573331821034, "learning_rate": 4.98413429161798e-05, "loss": 0.6674, "step": 2186 }, { "epoch": 0.25933831376734257, "grad_norm": 1.4466555805588186, "learning_rate": 4.9841072803283956e-05, "loss": 0.8062, "step": 2187 }, { "epoch": 0.25945689552946755, "grad_norm": 1.871384745228664, "learning_rate": 4.9840802461383886e-05, "loss": 1.0916, "step": 2188 }, { "epoch": 0.2595754772915925, "grad_norm": 1.6990502069363669, "learning_rate": 4.984053189048207e-05, "loss": 0.9982, "step": 2189 }, { "epoch": 0.25969405905371756, "grad_norm": 1.437708528667643, "learning_rate": 4.984026109058102e-05, "loss": 0.955, "step": 2190 }, { "epoch": 0.25981264081584254, "grad_norm": 1.7859525097681908, "learning_rate": 4.983999006168322e-05, "loss": 0.7692, "step": 2191 }, { "epoch": 0.2599312225779675, "grad_norm": 1.8483293806153176, "learning_rate": 4.983971880379117e-05, "loss": 1.1746, "step": 2192 }, { "epoch": 0.2600498043400925, "grad_norm": 2.0288037142827964, "learning_rate": 4.983944731690737e-05, "loss": 0.907, "step": 2193 }, { "epoch": 0.2601683861022175, "grad_norm": 1.5390537565231013, "learning_rate": 4.983917560103433e-05, "loss": 1.1079, "step": 2194 }, { "epoch": 0.26028696786434247, "grad_norm": 1.516324556302055, "learning_rate": 4.9838903656174554e-05, "loss": 0.8732, "step": 2195 }, { "epoch": 0.26040554962646745, "grad_norm": 1.4402602453072635, "learning_rate": 4.9838631482330544e-05, "loss": 0.8412, "step": 2196 }, { "epoch": 0.26052413138859243, "grad_norm": 1.6002422094818345, "learning_rate": 4.983835907950482e-05, "loss": 1.0157, "step": 2197 }, { "epoch": 0.2606427131507174, "grad_norm": 1.478293098304953, "learning_rate": 4.983808644769987e-05, "loss": 1.0273, "step": 2198 }, { "epoch": 0.2607612949128424, "grad_norm": 1.7782026275703942, "learning_rate": 4.9837813586918224e-05, "loss": 0.6987, "step": 2199 }, { "epoch": 0.26087987667496737, "grad_norm": 1.4484772310873528, "learning_rate": 4.983754049716239e-05, "loss": 1.021, "step": 2200 }, { "epoch": 0.26099845843709235, "grad_norm": 1.7128472077423973, "learning_rate": 4.9837267178434905e-05, "loss": 0.9625, "step": 2201 }, { "epoch": 0.2611170401992174, "grad_norm": 1.748379826879709, "learning_rate": 4.9836993630738264e-05, "loss": 0.9788, "step": 2202 }, { "epoch": 0.26123562196134237, "grad_norm": 1.7422509198227587, "learning_rate": 4.9836719854075005e-05, "loss": 1.0559, "step": 2203 }, { "epoch": 0.26135420372346735, "grad_norm": 1.544675532466091, "learning_rate": 4.983644584844764e-05, "loss": 0.9626, "step": 2204 }, { "epoch": 0.26147278548559233, "grad_norm": 1.6285447897168717, "learning_rate": 4.9836171613858705e-05, "loss": 1.0278, "step": 2205 }, { "epoch": 0.2615913672477173, "grad_norm": 1.531423601258921, "learning_rate": 4.983589715031072e-05, "loss": 1.1288, "step": 2206 }, { "epoch": 0.2617099490098423, "grad_norm": 1.5734436408369779, "learning_rate": 4.983562245780622e-05, "loss": 0.9444, "step": 2207 }, { "epoch": 0.26182853077196727, "grad_norm": 1.4859973449424488, "learning_rate": 4.9835347536347736e-05, "loss": 0.6696, "step": 2208 }, { "epoch": 0.26194711253409225, "grad_norm": 1.6205795107790246, "learning_rate": 4.9835072385937806e-05, "loss": 0.9353, "step": 2209 }, { "epoch": 0.26206569429621723, "grad_norm": 1.5004063496502429, "learning_rate": 4.983479700657896e-05, "loss": 1.0302, "step": 2210 }, { "epoch": 0.2621842760583422, "grad_norm": 1.5257792347590051, "learning_rate": 4.983452139827374e-05, "loss": 0.8633, "step": 2211 }, { "epoch": 0.2623028578204672, "grad_norm": 1.7636800159696502, "learning_rate": 4.983424556102469e-05, "loss": 0.9552, "step": 2212 }, { "epoch": 0.2624214395825922, "grad_norm": 1.5940702075896371, "learning_rate": 4.983396949483435e-05, "loss": 1.03, "step": 2213 }, { "epoch": 0.26254002134471716, "grad_norm": 1.6484006797332835, "learning_rate": 4.983369319970526e-05, "loss": 1.0547, "step": 2214 }, { "epoch": 0.2626586031068422, "grad_norm": 1.5736273624894186, "learning_rate": 4.983341667563998e-05, "loss": 0.8281, "step": 2215 }, { "epoch": 0.2627771848689672, "grad_norm": 1.6087840156818691, "learning_rate": 4.9833139922641035e-05, "loss": 0.8274, "step": 2216 }, { "epoch": 0.26289576663109215, "grad_norm": 1.6025409699300026, "learning_rate": 4.9832862940711015e-05, "loss": 0.9733, "step": 2217 }, { "epoch": 0.26301434839321713, "grad_norm": 1.5049979976906493, "learning_rate": 4.983258572985243e-05, "loss": 0.7118, "step": 2218 }, { "epoch": 0.2631329301553421, "grad_norm": 1.5982744601079337, "learning_rate": 4.9832308290067865e-05, "loss": 1.0606, "step": 2219 }, { "epoch": 0.2632515119174671, "grad_norm": 1.4913775573609198, "learning_rate": 4.983203062135987e-05, "loss": 0.9558, "step": 2220 }, { "epoch": 0.2633700936795921, "grad_norm": 1.4983971437597996, "learning_rate": 4.9831752723731006e-05, "loss": 0.9606, "step": 2221 }, { "epoch": 0.26348867544171706, "grad_norm": 1.3966979712238767, "learning_rate": 4.983147459718382e-05, "loss": 0.7713, "step": 2222 }, { "epoch": 0.26360725720384204, "grad_norm": 1.508943126036321, "learning_rate": 4.98311962417209e-05, "loss": 1.0105, "step": 2223 }, { "epoch": 0.263725838965967, "grad_norm": 1.999047387364816, "learning_rate": 4.98309176573448e-05, "loss": 1.0228, "step": 2224 }, { "epoch": 0.263844420728092, "grad_norm": 1.4607476931341195, "learning_rate": 4.983063884405809e-05, "loss": 0.6235, "step": 2225 }, { "epoch": 0.263963002490217, "grad_norm": 1.6119254415235267, "learning_rate": 4.9830359801863334e-05, "loss": 0.8707, "step": 2226 }, { "epoch": 0.264081584252342, "grad_norm": 1.6797512904245933, "learning_rate": 4.983008053076311e-05, "loss": 1.1013, "step": 2227 }, { "epoch": 0.264200166014467, "grad_norm": 1.7402944002719882, "learning_rate": 4.982980103076e-05, "loss": 0.8649, "step": 2228 }, { "epoch": 0.264318747776592, "grad_norm": 1.7560916351057352, "learning_rate": 4.982952130185656e-05, "loss": 1.1523, "step": 2229 }, { "epoch": 0.26443732953871696, "grad_norm": 1.5835855166801942, "learning_rate": 4.982924134405539e-05, "loss": 0.5252, "step": 2230 }, { "epoch": 0.26455591130084194, "grad_norm": 1.6437484027032823, "learning_rate": 4.9828961157359064e-05, "loss": 0.9379, "step": 2231 }, { "epoch": 0.2646744930629669, "grad_norm": 1.6911624819781965, "learning_rate": 4.982868074177016e-05, "loss": 0.9642, "step": 2232 }, { "epoch": 0.2647930748250919, "grad_norm": 1.5717456559719993, "learning_rate": 4.982840009729127e-05, "loss": 0.9797, "step": 2233 }, { "epoch": 0.2649116565872169, "grad_norm": 1.8251235311549547, "learning_rate": 4.9828119223924974e-05, "loss": 0.8802, "step": 2234 }, { "epoch": 0.26503023834934186, "grad_norm": 1.572833259712977, "learning_rate": 4.9827838121673865e-05, "loss": 1.0434, "step": 2235 }, { "epoch": 0.26514882011146684, "grad_norm": 1.59258086953376, "learning_rate": 4.982755679054053e-05, "loss": 0.7834, "step": 2236 }, { "epoch": 0.2652674018735918, "grad_norm": 1.8604818786034807, "learning_rate": 4.9827275230527574e-05, "loss": 0.987, "step": 2237 }, { "epoch": 0.2653859836357168, "grad_norm": 1.9526092151832255, "learning_rate": 4.982699344163758e-05, "loss": 1.0604, "step": 2238 }, { "epoch": 0.2655045653978418, "grad_norm": 1.8261736885883155, "learning_rate": 4.982671142387316e-05, "loss": 1.0139, "step": 2239 }, { "epoch": 0.2656231471599668, "grad_norm": 1.5544617615957195, "learning_rate": 4.98264291772369e-05, "loss": 0.6288, "step": 2240 }, { "epoch": 0.2657417289220918, "grad_norm": 1.5234981316922735, "learning_rate": 4.982614670173141e-05, "loss": 0.8816, "step": 2241 }, { "epoch": 0.2658603106842168, "grad_norm": 1.5496095724245618, "learning_rate": 4.9825863997359285e-05, "loss": 1.0243, "step": 2242 }, { "epoch": 0.26597889244634176, "grad_norm": 1.7460792920788026, "learning_rate": 4.982558106412314e-05, "loss": 1.0361, "step": 2243 }, { "epoch": 0.26609747420846674, "grad_norm": 1.64859127945359, "learning_rate": 4.982529790202558e-05, "loss": 0.8022, "step": 2244 }, { "epoch": 0.2662160559705917, "grad_norm": 1.6444116804048423, "learning_rate": 4.9825014511069224e-05, "loss": 1.1802, "step": 2245 }, { "epoch": 0.2663346377327167, "grad_norm": 1.4887703484872488, "learning_rate": 4.9824730891256666e-05, "loss": 1.0765, "step": 2246 }, { "epoch": 0.2664532194948417, "grad_norm": 1.5458836219818344, "learning_rate": 4.9824447042590537e-05, "loss": 0.8803, "step": 2247 }, { "epoch": 0.26657180125696667, "grad_norm": 1.4257107751417126, "learning_rate": 4.982416296507345e-05, "loss": 1.0698, "step": 2248 }, { "epoch": 0.26669038301909165, "grad_norm": 1.5052202499178404, "learning_rate": 4.9823878658708015e-05, "loss": 1.0725, "step": 2249 }, { "epoch": 0.26680896478121663, "grad_norm": 1.3545901892659957, "learning_rate": 4.982359412349687e-05, "loss": 0.5528, "step": 2250 }, { "epoch": 0.2669275465433416, "grad_norm": 1.6451570791559917, "learning_rate": 4.982330935944262e-05, "loss": 0.8372, "step": 2251 }, { "epoch": 0.26704612830546665, "grad_norm": 1.6420761030414943, "learning_rate": 4.98230243665479e-05, "loss": 0.9582, "step": 2252 }, { "epoch": 0.2671647100675916, "grad_norm": 1.3776570541081061, "learning_rate": 4.982273914481533e-05, "loss": 0.6706, "step": 2253 }, { "epoch": 0.2672832918297166, "grad_norm": 1.5789502447259047, "learning_rate": 4.982245369424755e-05, "loss": 0.6965, "step": 2254 }, { "epoch": 0.2674018735918416, "grad_norm": 2.07294976044734, "learning_rate": 4.9822168014847184e-05, "loss": 0.9643, "step": 2255 }, { "epoch": 0.26752045535396657, "grad_norm": 1.482478545110422, "learning_rate": 4.9821882106616866e-05, "loss": 0.7547, "step": 2256 }, { "epoch": 0.26763903711609155, "grad_norm": 1.4797870893039287, "learning_rate": 4.982159596955924e-05, "loss": 0.8033, "step": 2257 }, { "epoch": 0.26775761887821653, "grad_norm": 1.9766718282505586, "learning_rate": 4.9821309603676934e-05, "loss": 0.8569, "step": 2258 }, { "epoch": 0.2678762006403415, "grad_norm": 1.9933158407426539, "learning_rate": 4.982102300897259e-05, "loss": 1.1115, "step": 2259 }, { "epoch": 0.2679947824024665, "grad_norm": 1.4779440467816958, "learning_rate": 4.982073618544886e-05, "loss": 0.9309, "step": 2260 }, { "epoch": 0.2681133641645915, "grad_norm": 1.8356509863555672, "learning_rate": 4.982044913310837e-05, "loss": 0.7702, "step": 2261 }, { "epoch": 0.26823194592671645, "grad_norm": 1.6260396673597868, "learning_rate": 4.9820161851953776e-05, "loss": 0.8036, "step": 2262 }, { "epoch": 0.26835052768884143, "grad_norm": 1.5249168521541052, "learning_rate": 4.981987434198773e-05, "loss": 0.898, "step": 2263 }, { "epoch": 0.2684691094509664, "grad_norm": 1.6728766437857052, "learning_rate": 4.9819586603212875e-05, "loss": 0.8531, "step": 2264 }, { "epoch": 0.26858769121309145, "grad_norm": 1.8197059253344197, "learning_rate": 4.9819298635631874e-05, "loss": 0.8003, "step": 2265 }, { "epoch": 0.26870627297521643, "grad_norm": 1.763754879739386, "learning_rate": 4.981901043924737e-05, "loss": 1.085, "step": 2266 }, { "epoch": 0.2688248547373414, "grad_norm": 1.7069536207971974, "learning_rate": 4.9818722014062026e-05, "loss": 0.9278, "step": 2267 }, { "epoch": 0.2689434364994664, "grad_norm": 1.7613303459881093, "learning_rate": 4.98184333600785e-05, "loss": 1.3622, "step": 2268 }, { "epoch": 0.2690620182615914, "grad_norm": 1.321905287133801, "learning_rate": 4.981814447729946e-05, "loss": 0.8156, "step": 2269 }, { "epoch": 0.26918060002371635, "grad_norm": 1.6163533681343276, "learning_rate": 4.981785536572755e-05, "loss": 1.1632, "step": 2270 }, { "epoch": 0.26929918178584134, "grad_norm": 1.509857902851942, "learning_rate": 4.981756602536545e-05, "loss": 0.8462, "step": 2271 }, { "epoch": 0.2694177635479663, "grad_norm": 1.554687450517775, "learning_rate": 4.9817276456215824e-05, "loss": 1.0378, "step": 2272 }, { "epoch": 0.2695363453100913, "grad_norm": 1.6327130379448598, "learning_rate": 4.981698665828135e-05, "loss": 0.8297, "step": 2273 }, { "epoch": 0.2696549270722163, "grad_norm": 1.407290209389833, "learning_rate": 4.981669663156468e-05, "loss": 1.1696, "step": 2274 }, { "epoch": 0.26977350883434126, "grad_norm": 1.391307660677606, "learning_rate": 4.98164063760685e-05, "loss": 0.8938, "step": 2275 }, { "epoch": 0.26989209059646624, "grad_norm": 1.4091273614813733, "learning_rate": 4.9816115891795494e-05, "loss": 0.7126, "step": 2276 }, { "epoch": 0.2700106723585913, "grad_norm": 1.359466268642746, "learning_rate": 4.981582517874833e-05, "loss": 0.6684, "step": 2277 }, { "epoch": 0.27012925412071626, "grad_norm": 1.5083895667694889, "learning_rate": 4.981553423692968e-05, "loss": 0.8027, "step": 2278 }, { "epoch": 0.27024783588284124, "grad_norm": 1.8026952308350164, "learning_rate": 4.9815243066342244e-05, "loss": 0.9701, "step": 2279 }, { "epoch": 0.2703664176449662, "grad_norm": 2.0804895384463378, "learning_rate": 4.98149516669887e-05, "loss": 1.1016, "step": 2280 }, { "epoch": 0.2704849994070912, "grad_norm": 1.8346028102579401, "learning_rate": 4.981466003887172e-05, "loss": 0.8025, "step": 2281 }, { "epoch": 0.2706035811692162, "grad_norm": 1.524770294365194, "learning_rate": 4.9814368181994015e-05, "loss": 0.8095, "step": 2282 }, { "epoch": 0.27072216293134116, "grad_norm": 1.8018233518594808, "learning_rate": 4.981407609635826e-05, "loss": 0.8499, "step": 2283 }, { "epoch": 0.27084074469346614, "grad_norm": 1.495876179315143, "learning_rate": 4.981378378196715e-05, "loss": 0.8544, "step": 2284 }, { "epoch": 0.2709593264555911, "grad_norm": 1.7571659850907795, "learning_rate": 4.981349123882338e-05, "loss": 1.0576, "step": 2285 }, { "epoch": 0.2710779082177161, "grad_norm": 1.6580855935237229, "learning_rate": 4.9813198466929664e-05, "loss": 0.9806, "step": 2286 }, { "epoch": 0.2711964899798411, "grad_norm": 1.4988019278436455, "learning_rate": 4.9812905466288675e-05, "loss": 1.0478, "step": 2287 }, { "epoch": 0.27131507174196606, "grad_norm": 1.4824962295696775, "learning_rate": 4.981261223690312e-05, "loss": 0.8817, "step": 2288 }, { "epoch": 0.27143365350409104, "grad_norm": 1.4941938146835907, "learning_rate": 4.981231877877572e-05, "loss": 0.8142, "step": 2289 }, { "epoch": 0.2715522352662161, "grad_norm": 1.5575367610113489, "learning_rate": 4.981202509190916e-05, "loss": 0.8436, "step": 2290 }, { "epoch": 0.27167081702834106, "grad_norm": 1.4830160665362453, "learning_rate": 4.9811731176306155e-05, "loss": 0.767, "step": 2291 }, { "epoch": 0.27178939879046604, "grad_norm": 1.4219043161223104, "learning_rate": 4.981143703196942e-05, "loss": 0.6989, "step": 2292 }, { "epoch": 0.271907980552591, "grad_norm": 1.6943146917458523, "learning_rate": 4.9811142658901654e-05, "loss": 1.0129, "step": 2293 }, { "epoch": 0.272026562314716, "grad_norm": 1.5028034780402468, "learning_rate": 4.9810848057105585e-05, "loss": 0.9935, "step": 2294 }, { "epoch": 0.272145144076841, "grad_norm": 2.0035001915090023, "learning_rate": 4.9810553226583914e-05, "loss": 1.0074, "step": 2295 }, { "epoch": 0.27226372583896596, "grad_norm": 1.4212208784813856, "learning_rate": 4.981025816733937e-05, "loss": 0.9766, "step": 2296 }, { "epoch": 0.27238230760109094, "grad_norm": 1.4816361482315437, "learning_rate": 4.980996287937467e-05, "loss": 0.8218, "step": 2297 }, { "epoch": 0.2725008893632159, "grad_norm": 1.4804506855450281, "learning_rate": 4.9809667362692544e-05, "loss": 0.7826, "step": 2298 }, { "epoch": 0.2726194711253409, "grad_norm": 1.2822091180647575, "learning_rate": 4.98093716172957e-05, "loss": 0.6886, "step": 2299 }, { "epoch": 0.2727380528874659, "grad_norm": 1.7219809251086458, "learning_rate": 4.9809075643186874e-05, "loss": 1.0186, "step": 2300 }, { "epoch": 0.27285663464959087, "grad_norm": 1.564623932198742, "learning_rate": 4.98087794403688e-05, "loss": 1.055, "step": 2301 }, { "epoch": 0.2729752164117159, "grad_norm": 1.5931004960104955, "learning_rate": 4.98084830088442e-05, "loss": 0.8173, "step": 2302 }, { "epoch": 0.2730937981738409, "grad_norm": 1.924877399858819, "learning_rate": 4.980818634861581e-05, "loss": 1.159, "step": 2303 }, { "epoch": 0.27321237993596587, "grad_norm": 1.732778358575352, "learning_rate": 4.980788945968635e-05, "loss": 1.0589, "step": 2304 }, { "epoch": 0.27333096169809085, "grad_norm": 1.5868140485179991, "learning_rate": 4.980759234205859e-05, "loss": 1.0167, "step": 2305 }, { "epoch": 0.2734495434602158, "grad_norm": 1.7951333614525498, "learning_rate": 4.980729499573523e-05, "loss": 0.7651, "step": 2306 }, { "epoch": 0.2735681252223408, "grad_norm": 1.4583221021034045, "learning_rate": 4.980699742071904e-05, "loss": 0.8476, "step": 2307 }, { "epoch": 0.2736867069844658, "grad_norm": 1.317658478209005, "learning_rate": 4.980669961701276e-05, "loss": 0.677, "step": 2308 }, { "epoch": 0.27380528874659077, "grad_norm": 1.720988007705009, "learning_rate": 4.9806401584619126e-05, "loss": 1.1059, "step": 2309 }, { "epoch": 0.27392387050871575, "grad_norm": 1.5774053912907289, "learning_rate": 4.980610332354089e-05, "loss": 0.8255, "step": 2310 }, { "epoch": 0.27404245227084073, "grad_norm": 1.6482463396987364, "learning_rate": 4.980580483378079e-05, "loss": 1.0962, "step": 2311 }, { "epoch": 0.2741610340329657, "grad_norm": 1.60663566020083, "learning_rate": 4.98055061153416e-05, "loss": 0.8796, "step": 2312 }, { "epoch": 0.2742796157950907, "grad_norm": 1.5154733818598287, "learning_rate": 4.980520716822605e-05, "loss": 0.7362, "step": 2313 }, { "epoch": 0.2743981975572157, "grad_norm": 1.3977900450593432, "learning_rate": 4.980490799243692e-05, "loss": 0.7525, "step": 2314 }, { "epoch": 0.2745167793193407, "grad_norm": 1.5099580337741563, "learning_rate": 4.9804608587976956e-05, "loss": 0.7127, "step": 2315 }, { "epoch": 0.2746353610814657, "grad_norm": 1.435205956582461, "learning_rate": 4.9804308954848914e-05, "loss": 0.617, "step": 2316 }, { "epoch": 0.27475394284359067, "grad_norm": 1.8546267361016988, "learning_rate": 4.980400909305556e-05, "loss": 1.2456, "step": 2317 }, { "epoch": 0.27487252460571565, "grad_norm": 1.9915868614349475, "learning_rate": 4.980370900259966e-05, "loss": 0.9704, "step": 2318 }, { "epoch": 0.27499110636784063, "grad_norm": 1.5163234322517274, "learning_rate": 4.9803408683483984e-05, "loss": 0.8733, "step": 2319 }, { "epoch": 0.2751096881299656, "grad_norm": 1.3720475281936442, "learning_rate": 4.9803108135711286e-05, "loss": 1.1588, "step": 2320 }, { "epoch": 0.2752282698920906, "grad_norm": 1.4655058511409425, "learning_rate": 4.9802807359284355e-05, "loss": 0.8837, "step": 2321 }, { "epoch": 0.2753468516542156, "grad_norm": 1.5960543089517507, "learning_rate": 4.980250635420595e-05, "loss": 1.0131, "step": 2322 }, { "epoch": 0.27546543341634055, "grad_norm": 1.3538657951985782, "learning_rate": 4.980220512047886e-05, "loss": 0.8084, "step": 2323 }, { "epoch": 0.27558401517846554, "grad_norm": 1.7799238241875865, "learning_rate": 4.980190365810584e-05, "loss": 0.9622, "step": 2324 }, { "epoch": 0.2757025969405905, "grad_norm": 1.5375595362129812, "learning_rate": 4.9801601967089693e-05, "loss": 0.7816, "step": 2325 }, { "epoch": 0.2758211787027155, "grad_norm": 1.6333305758753784, "learning_rate": 4.980130004743319e-05, "loss": 0.6349, "step": 2326 }, { "epoch": 0.27593976046484053, "grad_norm": 1.6710919558394968, "learning_rate": 4.98009978991391e-05, "loss": 0.9131, "step": 2327 }, { "epoch": 0.2760583422269655, "grad_norm": 1.6287896002599938, "learning_rate": 4.980069552221024e-05, "loss": 0.7864, "step": 2328 }, { "epoch": 0.2761769239890905, "grad_norm": 1.6131836385996834, "learning_rate": 4.980039291664937e-05, "loss": 0.8226, "step": 2329 }, { "epoch": 0.2762955057512155, "grad_norm": 1.5932362862822576, "learning_rate": 4.980009008245929e-05, "loss": 0.999, "step": 2330 }, { "epoch": 0.27641408751334046, "grad_norm": 1.7634165043714547, "learning_rate": 4.97997870196428e-05, "loss": 0.8163, "step": 2331 }, { "epoch": 0.27653266927546544, "grad_norm": 1.5615923071196849, "learning_rate": 4.979948372820267e-05, "loss": 0.8127, "step": 2332 }, { "epoch": 0.2766512510375904, "grad_norm": 1.6540325937889555, "learning_rate": 4.979918020814172e-05, "loss": 1.0004, "step": 2333 }, { "epoch": 0.2767698327997154, "grad_norm": 1.7429460374434826, "learning_rate": 4.979887645946274e-05, "loss": 0.9948, "step": 2334 }, { "epoch": 0.2768884145618404, "grad_norm": 1.9278768852265296, "learning_rate": 4.979857248216853e-05, "loss": 1.018, "step": 2335 }, { "epoch": 0.27700699632396536, "grad_norm": 1.4999276115477773, "learning_rate": 4.979826827626188e-05, "loss": 0.7011, "step": 2336 }, { "epoch": 0.27712557808609034, "grad_norm": 1.4598177691290897, "learning_rate": 4.9797963841745624e-05, "loss": 0.8391, "step": 2337 }, { "epoch": 0.2772441598482153, "grad_norm": 1.5108863025708286, "learning_rate": 4.979765917862254e-05, "loss": 0.8332, "step": 2338 }, { "epoch": 0.2773627416103403, "grad_norm": 1.5628769425476425, "learning_rate": 4.9797354286895446e-05, "loss": 1.0507, "step": 2339 }, { "epoch": 0.27748132337246534, "grad_norm": 1.3580658331795292, "learning_rate": 4.9797049166567164e-05, "loss": 0.6127, "step": 2340 }, { "epoch": 0.2775999051345903, "grad_norm": 1.4937611856361923, "learning_rate": 4.979674381764049e-05, "loss": 0.9559, "step": 2341 }, { "epoch": 0.2777184868967153, "grad_norm": 1.6145460289546036, "learning_rate": 4.979643824011825e-05, "loss": 0.808, "step": 2342 }, { "epoch": 0.2778370686588403, "grad_norm": 1.738272955057422, "learning_rate": 4.979613243400325e-05, "loss": 0.9047, "step": 2343 }, { "epoch": 0.27795565042096526, "grad_norm": 1.7674905924830935, "learning_rate": 4.979582639929832e-05, "loss": 0.9718, "step": 2344 }, { "epoch": 0.27807423218309024, "grad_norm": 1.6862318775331195, "learning_rate": 4.9795520136006274e-05, "loss": 0.8843, "step": 2345 }, { "epoch": 0.2781928139452152, "grad_norm": 1.8997882913714228, "learning_rate": 4.979521364412995e-05, "loss": 0.9614, "step": 2346 }, { "epoch": 0.2783113957073402, "grad_norm": 1.6493405366054128, "learning_rate": 4.9794906923672146e-05, "loss": 0.8881, "step": 2347 }, { "epoch": 0.2784299774694652, "grad_norm": 1.8992771542503692, "learning_rate": 4.979459997463571e-05, "loss": 1.0411, "step": 2348 }, { "epoch": 0.27854855923159016, "grad_norm": 1.3926693823490475, "learning_rate": 4.9794292797023475e-05, "loss": 0.6415, "step": 2349 }, { "epoch": 0.27866714099371515, "grad_norm": 1.5795305337883796, "learning_rate": 4.979398539083826e-05, "loss": 0.9302, "step": 2350 }, { "epoch": 0.2787857227558401, "grad_norm": 2.0269354261427166, "learning_rate": 4.97936777560829e-05, "loss": 1.0145, "step": 2351 }, { "epoch": 0.27890430451796516, "grad_norm": 1.3509875297531597, "learning_rate": 4.979336989276024e-05, "loss": 1.0907, "step": 2352 }, { "epoch": 0.27902288628009014, "grad_norm": 1.3776882640084553, "learning_rate": 4.979306180087312e-05, "loss": 0.7369, "step": 2353 }, { "epoch": 0.2791414680422151, "grad_norm": 1.3447027082182557, "learning_rate": 4.979275348042436e-05, "loss": 0.8507, "step": 2354 }, { "epoch": 0.2792600498043401, "grad_norm": 1.501206382220582, "learning_rate": 4.9792444931416824e-05, "loss": 0.8519, "step": 2355 }, { "epoch": 0.2793786315664651, "grad_norm": 1.3781663804520787, "learning_rate": 4.979213615385334e-05, "loss": 0.6508, "step": 2356 }, { "epoch": 0.27949721332859007, "grad_norm": 1.7375104070283125, "learning_rate": 4.9791827147736777e-05, "loss": 0.9627, "step": 2357 }, { "epoch": 0.27961579509071505, "grad_norm": 1.600400254068593, "learning_rate": 4.9791517913069966e-05, "loss": 1.0308, "step": 2358 }, { "epoch": 0.27973437685284, "grad_norm": 1.7044306390133839, "learning_rate": 4.979120844985575e-05, "loss": 0.8975, "step": 2359 }, { "epoch": 0.279852958614965, "grad_norm": 1.5963307212014914, "learning_rate": 4.9790898758097e-05, "loss": 0.8561, "step": 2360 }, { "epoch": 0.27997154037709, "grad_norm": 1.8424300966720464, "learning_rate": 4.9790588837796566e-05, "loss": 0.8956, "step": 2361 }, { "epoch": 0.28009012213921497, "grad_norm": 1.6962694860934189, "learning_rate": 4.97902786889573e-05, "loss": 0.8921, "step": 2362 }, { "epoch": 0.28020870390133995, "grad_norm": 1.8879000873979064, "learning_rate": 4.978996831158206e-05, "loss": 0.9548, "step": 2363 }, { "epoch": 0.280327285663465, "grad_norm": 1.8732744939240091, "learning_rate": 4.978965770567372e-05, "loss": 0.5602, "step": 2364 }, { "epoch": 0.28044586742558997, "grad_norm": 2.342423842334272, "learning_rate": 4.978934687123513e-05, "loss": 1.174, "step": 2365 }, { "epoch": 0.28056444918771495, "grad_norm": 2.1020523110662905, "learning_rate": 4.978903580826917e-05, "loss": 1.0082, "step": 2366 }, { "epoch": 0.28068303094983993, "grad_norm": 1.5400195148870632, "learning_rate": 4.978872451677868e-05, "loss": 0.7915, "step": 2367 }, { "epoch": 0.2808016127119649, "grad_norm": 1.6456400683839234, "learning_rate": 4.9788412996766565e-05, "loss": 0.962, "step": 2368 }, { "epoch": 0.2809201944740899, "grad_norm": 1.599856550585295, "learning_rate": 4.978810124823567e-05, "loss": 1.1514, "step": 2369 }, { "epoch": 0.28103877623621487, "grad_norm": 1.6554155146302068, "learning_rate": 4.978778927118889e-05, "loss": 1.1269, "step": 2370 }, { "epoch": 0.28115735799833985, "grad_norm": 1.5222823208211833, "learning_rate": 4.9787477065629076e-05, "loss": 1.0928, "step": 2371 }, { "epoch": 0.28127593976046483, "grad_norm": 1.6160714938964338, "learning_rate": 4.978716463155913e-05, "loss": 1.2369, "step": 2372 }, { "epoch": 0.2813945215225898, "grad_norm": 1.3738300214158063, "learning_rate": 4.9786851968981916e-05, "loss": 0.9255, "step": 2373 }, { "epoch": 0.2815131032847148, "grad_norm": 1.25735968450066, "learning_rate": 4.978653907790032e-05, "loss": 0.8168, "step": 2374 }, { "epoch": 0.2816316850468398, "grad_norm": 1.511509307572403, "learning_rate": 4.9786225958317237e-05, "loss": 0.959, "step": 2375 }, { "epoch": 0.28175026680896476, "grad_norm": 1.2785000313988124, "learning_rate": 4.978591261023554e-05, "loss": 0.9499, "step": 2376 }, { "epoch": 0.2818688485710898, "grad_norm": 1.3774059919242738, "learning_rate": 4.9785599033658125e-05, "loss": 0.8132, "step": 2377 }, { "epoch": 0.2819874303332148, "grad_norm": 1.4855844999222076, "learning_rate": 4.9785285228587885e-05, "loss": 0.8463, "step": 2378 }, { "epoch": 0.28210601209533975, "grad_norm": 1.4942637851503844, "learning_rate": 4.97849711950277e-05, "loss": 0.8871, "step": 2379 }, { "epoch": 0.28222459385746473, "grad_norm": 1.5422906574622464, "learning_rate": 4.978465693298048e-05, "loss": 0.9864, "step": 2380 }, { "epoch": 0.2823431756195897, "grad_norm": 1.347215576910993, "learning_rate": 4.9784342442449115e-05, "loss": 1.0103, "step": 2381 }, { "epoch": 0.2824617573817147, "grad_norm": 1.504008556970501, "learning_rate": 4.9784027723436505e-05, "loss": 1.0114, "step": 2382 }, { "epoch": 0.2825803391438397, "grad_norm": 1.6930182151257767, "learning_rate": 4.978371277594555e-05, "loss": 0.797, "step": 2383 }, { "epoch": 0.28269892090596466, "grad_norm": 1.3268759644495594, "learning_rate": 4.978339759997916e-05, "loss": 0.702, "step": 2384 }, { "epoch": 0.28281750266808964, "grad_norm": 1.4438104742677618, "learning_rate": 4.978308219554023e-05, "loss": 0.8245, "step": 2385 }, { "epoch": 0.2829360844302146, "grad_norm": 1.5063078728190158, "learning_rate": 4.9782766562631675e-05, "loss": 0.8495, "step": 2386 }, { "epoch": 0.2830546661923396, "grad_norm": 1.7472729568551792, "learning_rate": 4.9782450701256405e-05, "loss": 1.0, "step": 2387 }, { "epoch": 0.2831732479544646, "grad_norm": 1.5258581507948437, "learning_rate": 4.978213461141733e-05, "loss": 0.8206, "step": 2388 }, { "epoch": 0.2832918297165896, "grad_norm": 1.4474755435901978, "learning_rate": 4.978181829311736e-05, "loss": 0.658, "step": 2389 }, { "epoch": 0.2834104114787146, "grad_norm": 1.4129592213485813, "learning_rate": 4.9781501746359416e-05, "loss": 0.953, "step": 2390 }, { "epoch": 0.2835289932408396, "grad_norm": 1.534427866639826, "learning_rate": 4.9781184971146415e-05, "loss": 0.7199, "step": 2391 }, { "epoch": 0.28364757500296456, "grad_norm": 1.56943773618129, "learning_rate": 4.978086796748128e-05, "loss": 1.0846, "step": 2392 }, { "epoch": 0.28376615676508954, "grad_norm": 1.4865639121992624, "learning_rate": 4.9780550735366926e-05, "loss": 0.6404, "step": 2393 }, { "epoch": 0.2838847385272145, "grad_norm": 1.6075098162842552, "learning_rate": 4.9780233274806286e-05, "loss": 0.912, "step": 2394 }, { "epoch": 0.2840033202893395, "grad_norm": 1.5383713194163655, "learning_rate": 4.977991558580228e-05, "loss": 0.9617, "step": 2395 }, { "epoch": 0.2841219020514645, "grad_norm": 1.7899617678234088, "learning_rate": 4.9779597668357844e-05, "loss": 1.0784, "step": 2396 }, { "epoch": 0.28424048381358946, "grad_norm": 1.7320488989887202, "learning_rate": 4.97792795224759e-05, "loss": 1.0967, "step": 2397 }, { "epoch": 0.28435906557571444, "grad_norm": 1.5400649448911345, "learning_rate": 4.9778961148159384e-05, "loss": 0.7434, "step": 2398 }, { "epoch": 0.2844776473378394, "grad_norm": 1.6161655988288448, "learning_rate": 4.9778642545411236e-05, "loss": 0.8305, "step": 2399 }, { "epoch": 0.2845962290999644, "grad_norm": 1.5004247017968646, "learning_rate": 4.977832371423439e-05, "loss": 0.871, "step": 2400 }, { "epoch": 0.2847148108620894, "grad_norm": 1.6302941174498264, "learning_rate": 4.977800465463178e-05, "loss": 1.0917, "step": 2401 }, { "epoch": 0.2848333926242144, "grad_norm": 1.9500142453998417, "learning_rate": 4.977768536660635e-05, "loss": 0.9573, "step": 2402 }, { "epoch": 0.2849519743863394, "grad_norm": 1.7279385961801683, "learning_rate": 4.977736585016105e-05, "loss": 0.8734, "step": 2403 }, { "epoch": 0.2850705561484644, "grad_norm": 1.5789031368206385, "learning_rate": 4.977704610529883e-05, "loss": 1.0316, "step": 2404 }, { "epoch": 0.28518913791058936, "grad_norm": 1.5441335709818145, "learning_rate": 4.977672613202261e-05, "loss": 0.8706, "step": 2405 }, { "epoch": 0.28530771967271434, "grad_norm": 1.45866301566283, "learning_rate": 4.9776405930335365e-05, "loss": 0.9468, "step": 2406 }, { "epoch": 0.2854263014348393, "grad_norm": 1.8185536141867098, "learning_rate": 4.977608550024005e-05, "loss": 1.035, "step": 2407 }, { "epoch": 0.2855448831969643, "grad_norm": 1.6712295967439859, "learning_rate": 4.97757648417396e-05, "loss": 0.9168, "step": 2408 }, { "epoch": 0.2856634649590893, "grad_norm": 1.4939317295222134, "learning_rate": 4.977544395483699e-05, "loss": 1.0048, "step": 2409 }, { "epoch": 0.28578204672121427, "grad_norm": 1.5496299542817438, "learning_rate": 4.977512283953515e-05, "loss": 1.0024, "step": 2410 }, { "epoch": 0.28590062848333925, "grad_norm": 1.469882899699358, "learning_rate": 4.9774801495837074e-05, "loss": 1.1496, "step": 2411 }, { "epoch": 0.28601921024546423, "grad_norm": 1.6512217810272487, "learning_rate": 4.97744799237457e-05, "loss": 0.9102, "step": 2412 }, { "epoch": 0.2861377920075892, "grad_norm": 1.4709189995620464, "learning_rate": 4.977415812326401e-05, "loss": 1.0569, "step": 2413 }, { "epoch": 0.28625637376971425, "grad_norm": 1.3953818969620164, "learning_rate": 4.977383609439496e-05, "loss": 0.9195, "step": 2414 }, { "epoch": 0.2863749555318392, "grad_norm": 1.5530593465931122, "learning_rate": 4.9773513837141516e-05, "loss": 1.0856, "step": 2415 }, { "epoch": 0.2864935372939642, "grad_norm": 1.3699945452734779, "learning_rate": 4.9773191351506654e-05, "loss": 0.8359, "step": 2416 }, { "epoch": 0.2866121190560892, "grad_norm": 1.4870348851076065, "learning_rate": 4.977286863749335e-05, "loss": 1.0555, "step": 2417 }, { "epoch": 0.28673070081821417, "grad_norm": 1.4359111663921689, "learning_rate": 4.977254569510457e-05, "loss": 1.0983, "step": 2418 }, { "epoch": 0.28684928258033915, "grad_norm": 1.410138983438062, "learning_rate": 4.97722225243433e-05, "loss": 0.8596, "step": 2419 }, { "epoch": 0.28696786434246413, "grad_norm": 1.7809862578142337, "learning_rate": 4.9771899125212524e-05, "loss": 0.9321, "step": 2420 }, { "epoch": 0.2870864461045891, "grad_norm": 1.8669371676755382, "learning_rate": 4.9771575497715206e-05, "loss": 0.9885, "step": 2421 }, { "epoch": 0.2872050278667141, "grad_norm": 1.4964475734731368, "learning_rate": 4.977125164185434e-05, "loss": 0.9255, "step": 2422 }, { "epoch": 0.28732360962883907, "grad_norm": 1.6679845036740746, "learning_rate": 4.977092755763292e-05, "loss": 1.0333, "step": 2423 }, { "epoch": 0.28744219139096405, "grad_norm": 1.5799589531737646, "learning_rate": 4.977060324505391e-05, "loss": 1.0092, "step": 2424 }, { "epoch": 0.28756077315308903, "grad_norm": 1.6957229771398297, "learning_rate": 4.977027870412032e-05, "loss": 0.8155, "step": 2425 }, { "epoch": 0.287679354915214, "grad_norm": 1.387962184634277, "learning_rate": 4.976995393483513e-05, "loss": 0.7769, "step": 2426 }, { "epoch": 0.28779793667733905, "grad_norm": 1.587797760950652, "learning_rate": 4.9769628937201355e-05, "loss": 0.9381, "step": 2427 }, { "epoch": 0.28791651843946403, "grad_norm": 1.5781299114198215, "learning_rate": 4.976930371122196e-05, "loss": 0.879, "step": 2428 }, { "epoch": 0.288035100201589, "grad_norm": 1.6842908631420495, "learning_rate": 4.976897825689997e-05, "loss": 1.1819, "step": 2429 }, { "epoch": 0.288153681963714, "grad_norm": 1.6456598767281734, "learning_rate": 4.976865257423836e-05, "loss": 0.6421, "step": 2430 }, { "epoch": 0.288272263725839, "grad_norm": 1.6081830903014547, "learning_rate": 4.976832666324016e-05, "loss": 0.8206, "step": 2431 }, { "epoch": 0.28839084548796395, "grad_norm": 1.5311996265100005, "learning_rate": 4.976800052390836e-05, "loss": 0.8542, "step": 2432 }, { "epoch": 0.28850942725008893, "grad_norm": 1.3851321623650494, "learning_rate": 4.976767415624596e-05, "loss": 0.7755, "step": 2433 }, { "epoch": 0.2886280090122139, "grad_norm": 2.0135001249696174, "learning_rate": 4.9767347560255986e-05, "loss": 1.164, "step": 2434 }, { "epoch": 0.2887465907743389, "grad_norm": 2.26102826066825, "learning_rate": 4.9767020735941426e-05, "loss": 1.1262, "step": 2435 }, { "epoch": 0.2888651725364639, "grad_norm": 1.5980948256977554, "learning_rate": 4.976669368330532e-05, "loss": 0.7784, "step": 2436 }, { "epoch": 0.28898375429858886, "grad_norm": 1.396902187945008, "learning_rate": 4.9766366402350664e-05, "loss": 0.8269, "step": 2437 }, { "epoch": 0.28910233606071384, "grad_norm": 1.4686863146321578, "learning_rate": 4.976603889308048e-05, "loss": 0.8729, "step": 2438 }, { "epoch": 0.2892209178228389, "grad_norm": 1.4972790957768372, "learning_rate": 4.976571115549779e-05, "loss": 0.8332, "step": 2439 }, { "epoch": 0.28933949958496386, "grad_norm": 1.4774115268183194, "learning_rate": 4.976538318960561e-05, "loss": 0.9241, "step": 2440 }, { "epoch": 0.28945808134708884, "grad_norm": 1.463869044411452, "learning_rate": 4.976505499540696e-05, "loss": 0.935, "step": 2441 }, { "epoch": 0.2895766631092138, "grad_norm": 1.6434574831572348, "learning_rate": 4.976472657290488e-05, "loss": 1.0176, "step": 2442 }, { "epoch": 0.2896952448713388, "grad_norm": 1.372632154226201, "learning_rate": 4.9764397922102393e-05, "loss": 0.7306, "step": 2443 }, { "epoch": 0.2898138266334638, "grad_norm": 1.47297405661419, "learning_rate": 4.9764069043002516e-05, "loss": 0.7562, "step": 2444 }, { "epoch": 0.28993240839558876, "grad_norm": 1.4207201533286626, "learning_rate": 4.9763739935608286e-05, "loss": 0.7087, "step": 2445 }, { "epoch": 0.29005099015771374, "grad_norm": 1.5189953106395007, "learning_rate": 4.976341059992275e-05, "loss": 0.7495, "step": 2446 }, { "epoch": 0.2901695719198387, "grad_norm": 1.5766578427788733, "learning_rate": 4.976308103594894e-05, "loss": 0.8623, "step": 2447 }, { "epoch": 0.2902881536819637, "grad_norm": 1.6111442449254452, "learning_rate": 4.9762751243689885e-05, "loss": 1.1065, "step": 2448 }, { "epoch": 0.2904067354440887, "grad_norm": 1.6997701859633294, "learning_rate": 4.976242122314863e-05, "loss": 0.8941, "step": 2449 }, { "epoch": 0.29052531720621366, "grad_norm": 1.7355967636695475, "learning_rate": 4.976209097432821e-05, "loss": 0.8303, "step": 2450 }, { "epoch": 0.29064389896833864, "grad_norm": 1.8976717022593483, "learning_rate": 4.9761760497231677e-05, "loss": 0.8139, "step": 2451 }, { "epoch": 0.2907624807304637, "grad_norm": 1.7551599543917875, "learning_rate": 4.9761429791862094e-05, "loss": 0.9635, "step": 2452 }, { "epoch": 0.29088106249258866, "grad_norm": 2.1549159818777888, "learning_rate": 4.976109885822248e-05, "loss": 1.1164, "step": 2453 }, { "epoch": 0.29099964425471364, "grad_norm": 1.8153668812167236, "learning_rate": 4.976076769631589e-05, "loss": 0.9211, "step": 2454 }, { "epoch": 0.2911182260168386, "grad_norm": 1.7011277934190991, "learning_rate": 4.97604363061454e-05, "loss": 1.1271, "step": 2455 }, { "epoch": 0.2912368077789636, "grad_norm": 1.6852902298470798, "learning_rate": 4.976010468771405e-05, "loss": 0.9859, "step": 2456 }, { "epoch": 0.2913553895410886, "grad_norm": 1.6563526036450935, "learning_rate": 4.975977284102489e-05, "loss": 0.9522, "step": 2457 }, { "epoch": 0.29147397130321356, "grad_norm": 1.5370122152361108, "learning_rate": 4.975944076608099e-05, "loss": 0.8795, "step": 2458 }, { "epoch": 0.29159255306533854, "grad_norm": 1.5597852455302597, "learning_rate": 4.9759108462885404e-05, "loss": 0.8938, "step": 2459 }, { "epoch": 0.2917111348274635, "grad_norm": 1.4439289768870136, "learning_rate": 4.9758775931441204e-05, "loss": 0.7363, "step": 2460 }, { "epoch": 0.2918297165895885, "grad_norm": 1.8901467329119255, "learning_rate": 4.975844317175145e-05, "loss": 0.858, "step": 2461 }, { "epoch": 0.2919482983517135, "grad_norm": 1.456985236465551, "learning_rate": 4.9758110183819215e-05, "loss": 1.1251, "step": 2462 }, { "epoch": 0.29206688011383847, "grad_norm": 1.5815380036151172, "learning_rate": 4.975777696764756e-05, "loss": 0.9014, "step": 2463 }, { "epoch": 0.2921854618759635, "grad_norm": 1.5078973699400564, "learning_rate": 4.975744352323956e-05, "loss": 0.9357, "step": 2464 }, { "epoch": 0.2923040436380885, "grad_norm": 1.6320533758673677, "learning_rate": 4.9757109850598295e-05, "loss": 1.0957, "step": 2465 }, { "epoch": 0.29242262540021347, "grad_norm": 1.533129535635482, "learning_rate": 4.975677594972683e-05, "loss": 1.0212, "step": 2466 }, { "epoch": 0.29254120716233845, "grad_norm": 1.343228638405399, "learning_rate": 4.975644182062825e-05, "loss": 0.7514, "step": 2467 }, { "epoch": 0.2926597889244634, "grad_norm": 1.3048993275906209, "learning_rate": 4.9756107463305645e-05, "loss": 0.6401, "step": 2468 }, { "epoch": 0.2927783706865884, "grad_norm": 1.4004138509470117, "learning_rate": 4.975577287776208e-05, "loss": 0.9109, "step": 2469 }, { "epoch": 0.2928969524487134, "grad_norm": 1.5108247036926876, "learning_rate": 4.975543806400065e-05, "loss": 0.8359, "step": 2470 }, { "epoch": 0.29301553421083837, "grad_norm": 1.1662701489021907, "learning_rate": 4.975510302202443e-05, "loss": 0.698, "step": 2471 }, { "epoch": 0.29313411597296335, "grad_norm": 1.6722735594043467, "learning_rate": 4.9754767751836523e-05, "loss": 1.0934, "step": 2472 }, { "epoch": 0.29325269773508833, "grad_norm": 1.6570678169279243, "learning_rate": 4.975443225344002e-05, "loss": 0.9784, "step": 2473 }, { "epoch": 0.2933712794972133, "grad_norm": 1.4538665492823626, "learning_rate": 4.975409652683799e-05, "loss": 0.8224, "step": 2474 }, { "epoch": 0.2934898612593383, "grad_norm": 1.620074595506638, "learning_rate": 4.975376057203356e-05, "loss": 0.9211, "step": 2475 }, { "epoch": 0.2936084430214633, "grad_norm": 1.4825846884460248, "learning_rate": 4.975342438902981e-05, "loss": 0.6733, "step": 2476 }, { "epoch": 0.2937270247835883, "grad_norm": 1.4023811884534079, "learning_rate": 4.975308797782984e-05, "loss": 1.0042, "step": 2477 }, { "epoch": 0.2938456065457133, "grad_norm": 1.4679035191682481, "learning_rate": 4.975275133843675e-05, "loss": 0.9633, "step": 2478 }, { "epoch": 0.29396418830783827, "grad_norm": 1.5207144099481416, "learning_rate": 4.975241447085365e-05, "loss": 0.9552, "step": 2479 }, { "epoch": 0.29408277006996325, "grad_norm": 1.5906830869911035, "learning_rate": 4.9752077375083636e-05, "loss": 0.8537, "step": 2480 }, { "epoch": 0.29420135183208823, "grad_norm": 1.7732865528408428, "learning_rate": 4.975174005112984e-05, "loss": 0.8556, "step": 2481 }, { "epoch": 0.2943199335942132, "grad_norm": 1.543750249856277, "learning_rate": 4.9751402498995334e-05, "loss": 0.8888, "step": 2482 }, { "epoch": 0.2944385153563382, "grad_norm": 1.9281247230521332, "learning_rate": 4.975106471868325e-05, "loss": 1.0933, "step": 2483 }, { "epoch": 0.2945570971184632, "grad_norm": 1.7627602015000823, "learning_rate": 4.975072671019671e-05, "loss": 1.0963, "step": 2484 }, { "epoch": 0.29467567888058815, "grad_norm": 1.3745490362794537, "learning_rate": 4.975038847353882e-05, "loss": 0.945, "step": 2485 }, { "epoch": 0.29479426064271314, "grad_norm": 1.7791728116352725, "learning_rate": 4.9750050008712696e-05, "loss": 1.2101, "step": 2486 }, { "epoch": 0.2949128424048381, "grad_norm": 1.4757730441072967, "learning_rate": 4.974971131572146e-05, "loss": 0.8775, "step": 2487 }, { "epoch": 0.2950314241669631, "grad_norm": 1.5079020424842573, "learning_rate": 4.974937239456824e-05, "loss": 0.8159, "step": 2488 }, { "epoch": 0.29515000592908813, "grad_norm": 1.3711467806605337, "learning_rate": 4.974903324525615e-05, "loss": 0.8425, "step": 2489 }, { "epoch": 0.2952685876912131, "grad_norm": 1.388534906127683, "learning_rate": 4.974869386778833e-05, "loss": 0.8557, "step": 2490 }, { "epoch": 0.2953871694533381, "grad_norm": 1.5431256961615287, "learning_rate": 4.97483542621679e-05, "loss": 1.0615, "step": 2491 }, { "epoch": 0.2955057512154631, "grad_norm": 1.3979012359819658, "learning_rate": 4.9748014428397996e-05, "loss": 0.9582, "step": 2492 }, { "epoch": 0.29562433297758806, "grad_norm": 1.4037425953107994, "learning_rate": 4.9747674366481734e-05, "loss": 0.9099, "step": 2493 }, { "epoch": 0.29574291473971304, "grad_norm": 1.5096630353844176, "learning_rate": 4.974733407642227e-05, "loss": 0.9342, "step": 2494 }, { "epoch": 0.295861496501838, "grad_norm": 1.7251883883446064, "learning_rate": 4.974699355822273e-05, "loss": 0.9831, "step": 2495 }, { "epoch": 0.295980078263963, "grad_norm": 1.6940908272405255, "learning_rate": 4.974665281188626e-05, "loss": 1.1715, "step": 2496 }, { "epoch": 0.296098660026088, "grad_norm": 1.5063970898475894, "learning_rate": 4.9746311837416e-05, "loss": 0.8927, "step": 2497 }, { "epoch": 0.29621724178821296, "grad_norm": 1.5959764203463707, "learning_rate": 4.974597063481509e-05, "loss": 0.9756, "step": 2498 }, { "epoch": 0.29633582355033794, "grad_norm": 1.4694363782588318, "learning_rate": 4.9745629204086666e-05, "loss": 0.7107, "step": 2499 }, { "epoch": 0.2964544053124629, "grad_norm": 1.7715520245566363, "learning_rate": 4.97452875452339e-05, "loss": 0.8678, "step": 2500 }, { "epoch": 0.2965729870745879, "grad_norm": 1.719779403221391, "learning_rate": 4.974494565825992e-05, "loss": 0.7108, "step": 2501 }, { "epoch": 0.29669156883671294, "grad_norm": 1.6909317137489985, "learning_rate": 4.9744603543167887e-05, "loss": 1.051, "step": 2502 }, { "epoch": 0.2968101505988379, "grad_norm": 1.624849096638231, "learning_rate": 4.974426119996095e-05, "loss": 1.0165, "step": 2503 }, { "epoch": 0.2969287323609629, "grad_norm": 1.3354729663321676, "learning_rate": 4.974391862864227e-05, "loss": 0.7568, "step": 2504 }, { "epoch": 0.2970473141230879, "grad_norm": 1.6751802870583177, "learning_rate": 4.974357582921501e-05, "loss": 1.1076, "step": 2505 }, { "epoch": 0.29716589588521286, "grad_norm": 1.4878367576048521, "learning_rate": 4.974323280168231e-05, "loss": 0.9715, "step": 2506 }, { "epoch": 0.29728447764733784, "grad_norm": 1.4687316386714155, "learning_rate": 4.9742889546047356e-05, "loss": 0.8072, "step": 2507 }, { "epoch": 0.2974030594094628, "grad_norm": 1.4020801471124975, "learning_rate": 4.974254606231329e-05, "loss": 0.838, "step": 2508 }, { "epoch": 0.2975216411715878, "grad_norm": 1.4308406017369033, "learning_rate": 4.97422023504833e-05, "loss": 1.0792, "step": 2509 }, { "epoch": 0.2976402229337128, "grad_norm": 1.6967117127736508, "learning_rate": 4.9741858410560546e-05, "loss": 1.077, "step": 2510 }, { "epoch": 0.29775880469583776, "grad_norm": 1.5168504739120334, "learning_rate": 4.97415142425482e-05, "loss": 0.8955, "step": 2511 }, { "epoch": 0.29787738645796275, "grad_norm": 1.4729172729059716, "learning_rate": 4.974116984644943e-05, "loss": 0.722, "step": 2512 }, { "epoch": 0.2979959682200877, "grad_norm": 1.6603166010675723, "learning_rate": 4.974082522226741e-05, "loss": 0.9697, "step": 2513 }, { "epoch": 0.29811454998221276, "grad_norm": 1.4771539161281177, "learning_rate": 4.9740480370005324e-05, "loss": 0.9614, "step": 2514 }, { "epoch": 0.29823313174433774, "grad_norm": 1.2436134692586345, "learning_rate": 4.9740135289666346e-05, "loss": 0.5818, "step": 2515 }, { "epoch": 0.2983517135064627, "grad_norm": 1.474971165310201, "learning_rate": 4.973978998125366e-05, "loss": 1.0244, "step": 2516 }, { "epoch": 0.2984702952685877, "grad_norm": 1.5170666539393374, "learning_rate": 4.973944444477045e-05, "loss": 0.922, "step": 2517 }, { "epoch": 0.2985888770307127, "grad_norm": 1.890515246560515, "learning_rate": 4.97390986802199e-05, "loss": 0.9076, "step": 2518 }, { "epoch": 0.29870745879283767, "grad_norm": 1.55404467869752, "learning_rate": 4.973875268760519e-05, "loss": 0.9594, "step": 2519 }, { "epoch": 0.29882604055496265, "grad_norm": 1.4606248062809595, "learning_rate": 4.973840646692953e-05, "loss": 0.7005, "step": 2520 }, { "epoch": 0.2989446223170876, "grad_norm": 1.6411613131427558, "learning_rate": 4.9738060018196085e-05, "loss": 1.1581, "step": 2521 }, { "epoch": 0.2990632040792126, "grad_norm": 1.466427373853806, "learning_rate": 4.973771334140807e-05, "loss": 0.7981, "step": 2522 }, { "epoch": 0.2991817858413376, "grad_norm": 1.487091372512933, "learning_rate": 4.973736643656867e-05, "loss": 0.8585, "step": 2523 }, { "epoch": 0.29930036760346257, "grad_norm": 1.6807062201555787, "learning_rate": 4.9737019303681085e-05, "loss": 0.9157, "step": 2524 }, { "epoch": 0.29941894936558755, "grad_norm": 1.5828903958457703, "learning_rate": 4.9736671942748526e-05, "loss": 1.0478, "step": 2525 }, { "epoch": 0.29953753112771253, "grad_norm": 1.4624945339510396, "learning_rate": 4.973632435377418e-05, "loss": 1.0318, "step": 2526 }, { "epoch": 0.29965611288983757, "grad_norm": 1.2269386621513128, "learning_rate": 4.9735976536761256e-05, "loss": 0.7653, "step": 2527 }, { "epoch": 0.29977469465196255, "grad_norm": 1.5215876464080502, "learning_rate": 4.973562849171296e-05, "loss": 0.8221, "step": 2528 }, { "epoch": 0.29989327641408753, "grad_norm": 1.607624170716136, "learning_rate": 4.973528021863251e-05, "loss": 0.9775, "step": 2529 }, { "epoch": 0.3000118581762125, "grad_norm": 1.645590583933082, "learning_rate": 4.97349317175231e-05, "loss": 0.931, "step": 2530 }, { "epoch": 0.3001304399383375, "grad_norm": 1.4391606533173735, "learning_rate": 4.9734582988387954e-05, "loss": 0.9401, "step": 2531 }, { "epoch": 0.30024902170046247, "grad_norm": 1.6451140065260246, "learning_rate": 4.973423403123029e-05, "loss": 1.0595, "step": 2532 }, { "epoch": 0.30036760346258745, "grad_norm": 1.6129355092836206, "learning_rate": 4.9733884846053314e-05, "loss": 1.0479, "step": 2533 }, { "epoch": 0.30048618522471243, "grad_norm": 1.5091322616920457, "learning_rate": 4.9733535432860246e-05, "loss": 0.9707, "step": 2534 }, { "epoch": 0.3006047669868374, "grad_norm": 1.2857608346899063, "learning_rate": 4.973318579165432e-05, "loss": 0.6603, "step": 2535 }, { "epoch": 0.3007233487489624, "grad_norm": 1.4144053479158973, "learning_rate": 4.973283592243875e-05, "loss": 0.877, "step": 2536 }, { "epoch": 0.3008419305110874, "grad_norm": 1.6757614114070765, "learning_rate": 4.973248582521676e-05, "loss": 1.0667, "step": 2537 }, { "epoch": 0.30096051227321235, "grad_norm": 1.7293242060366243, "learning_rate": 4.9732135499991575e-05, "loss": 0.8467, "step": 2538 }, { "epoch": 0.3010790940353374, "grad_norm": 1.3856228030841544, "learning_rate": 4.9731784946766435e-05, "loss": 0.8273, "step": 2539 }, { "epoch": 0.30119767579746237, "grad_norm": 1.3508044585325354, "learning_rate": 4.973143416554457e-05, "loss": 0.8825, "step": 2540 }, { "epoch": 0.30131625755958735, "grad_norm": 1.5219045444439614, "learning_rate": 4.9731083156329196e-05, "loss": 0.8772, "step": 2541 }, { "epoch": 0.30143483932171233, "grad_norm": 1.4213795692867122, "learning_rate": 4.9730731919123575e-05, "loss": 0.9348, "step": 2542 }, { "epoch": 0.3015534210838373, "grad_norm": 1.6097094915903436, "learning_rate": 4.9730380453930926e-05, "loss": 0.8343, "step": 2543 }, { "epoch": 0.3016720028459623, "grad_norm": 1.6377048285092566, "learning_rate": 4.97300287607545e-05, "loss": 1.0675, "step": 2544 }, { "epoch": 0.3017905846080873, "grad_norm": 1.422753237788405, "learning_rate": 4.972967683959753e-05, "loss": 0.9265, "step": 2545 }, { "epoch": 0.30190916637021226, "grad_norm": 1.6579036301187857, "learning_rate": 4.972932469046326e-05, "loss": 1.1289, "step": 2546 }, { "epoch": 0.30202774813233724, "grad_norm": 1.6965738314625463, "learning_rate": 4.972897231335496e-05, "loss": 0.9131, "step": 2547 }, { "epoch": 0.3021463298944622, "grad_norm": 1.6105316742110862, "learning_rate": 4.972861970827585e-05, "loss": 0.9157, "step": 2548 }, { "epoch": 0.3022649116565872, "grad_norm": 1.5336097991416138, "learning_rate": 4.9728266875229187e-05, "loss": 0.8835, "step": 2549 }, { "epoch": 0.3023834934187122, "grad_norm": 1.6969230508997102, "learning_rate": 4.972791381421823e-05, "loss": 1.0882, "step": 2550 }, { "epoch": 0.30250207518083716, "grad_norm": 1.8540808408366418, "learning_rate": 4.9727560525246234e-05, "loss": 0.935, "step": 2551 }, { "epoch": 0.3026206569429622, "grad_norm": 1.8986895243253412, "learning_rate": 4.972720700831645e-05, "loss": 1.3103, "step": 2552 }, { "epoch": 0.3027392387050872, "grad_norm": 1.475388450852394, "learning_rate": 4.972685326343213e-05, "loss": 0.8544, "step": 2553 }, { "epoch": 0.30285782046721216, "grad_norm": 1.4505693549702259, "learning_rate": 4.972649929059656e-05, "loss": 0.8083, "step": 2554 }, { "epoch": 0.30297640222933714, "grad_norm": 1.2993510353824953, "learning_rate": 4.972614508981298e-05, "loss": 0.8974, "step": 2555 }, { "epoch": 0.3030949839914621, "grad_norm": 1.530886957891809, "learning_rate": 4.972579066108467e-05, "loss": 0.9336, "step": 2556 }, { "epoch": 0.3032135657535871, "grad_norm": 1.3390048195672548, "learning_rate": 4.972543600441488e-05, "loss": 0.847, "step": 2557 }, { "epoch": 0.3033321475157121, "grad_norm": 1.4820355050045977, "learning_rate": 4.9725081119806906e-05, "loss": 0.9657, "step": 2558 }, { "epoch": 0.30345072927783706, "grad_norm": 1.3398319578874784, "learning_rate": 4.972472600726399e-05, "loss": 0.9304, "step": 2559 }, { "epoch": 0.30356931103996204, "grad_norm": 1.3590243926911865, "learning_rate": 4.9724370666789424e-05, "loss": 1.0812, "step": 2560 }, { "epoch": 0.303687892802087, "grad_norm": 1.466930909587523, "learning_rate": 4.972401509838648e-05, "loss": 0.7857, "step": 2561 }, { "epoch": 0.303806474564212, "grad_norm": 1.7058262201137064, "learning_rate": 4.972365930205844e-05, "loss": 0.7288, "step": 2562 }, { "epoch": 0.303925056326337, "grad_norm": 1.587130364137291, "learning_rate": 4.972330327780857e-05, "loss": 0.8536, "step": 2563 }, { "epoch": 0.304043638088462, "grad_norm": 1.715769603271045, "learning_rate": 4.972294702564017e-05, "loss": 0.7871, "step": 2564 }, { "epoch": 0.304162219850587, "grad_norm": 1.6852421762577392, "learning_rate": 4.9722590545556516e-05, "loss": 0.8263, "step": 2565 }, { "epoch": 0.304280801612712, "grad_norm": 1.5455181894644734, "learning_rate": 4.972223383756089e-05, "loss": 0.755, "step": 2566 }, { "epoch": 0.30439938337483696, "grad_norm": 1.4859971423366658, "learning_rate": 4.972187690165658e-05, "loss": 0.6916, "step": 2567 }, { "epoch": 0.30451796513696194, "grad_norm": 1.7446534965871885, "learning_rate": 4.972151973784689e-05, "loss": 0.8266, "step": 2568 }, { "epoch": 0.3046365468990869, "grad_norm": 2.010701507626782, "learning_rate": 4.97211623461351e-05, "loss": 0.984, "step": 2569 }, { "epoch": 0.3047551286612119, "grad_norm": 1.8291165760086305, "learning_rate": 4.9720804726524504e-05, "loss": 0.9564, "step": 2570 }, { "epoch": 0.3048737104233369, "grad_norm": 1.5580193151761066, "learning_rate": 4.9720446879018414e-05, "loss": 0.8761, "step": 2571 }, { "epoch": 0.30499229218546187, "grad_norm": 1.4154722087209504, "learning_rate": 4.972008880362011e-05, "loss": 0.7328, "step": 2572 }, { "epoch": 0.30511087394758685, "grad_norm": 1.4905693002974378, "learning_rate": 4.9719730500332895e-05, "loss": 0.8294, "step": 2573 }, { "epoch": 0.3052294557097118, "grad_norm": 1.632979845700117, "learning_rate": 4.9719371969160086e-05, "loss": 0.9339, "step": 2574 }, { "epoch": 0.3053480374718368, "grad_norm": 1.4621386131526322, "learning_rate": 4.971901321010498e-05, "loss": 0.8916, "step": 2575 }, { "epoch": 0.3054666192339618, "grad_norm": 1.4814258969399958, "learning_rate": 4.9718654223170884e-05, "loss": 0.9591, "step": 2576 }, { "epoch": 0.3055852009960868, "grad_norm": 1.2969267923326158, "learning_rate": 4.971829500836111e-05, "loss": 0.7788, "step": 2577 }, { "epoch": 0.3057037827582118, "grad_norm": 1.4427795961104461, "learning_rate": 4.971793556567896e-05, "loss": 0.7664, "step": 2578 }, { "epoch": 0.3058223645203368, "grad_norm": 1.6143677658883506, "learning_rate": 4.971757589512777e-05, "loss": 0.9594, "step": 2579 }, { "epoch": 0.30594094628246177, "grad_norm": 1.5199534968373927, "learning_rate": 4.971721599671083e-05, "loss": 0.8734, "step": 2580 }, { "epoch": 0.30605952804458675, "grad_norm": 1.7345809244002213, "learning_rate": 4.971685587043147e-05, "loss": 0.9996, "step": 2581 }, { "epoch": 0.30617810980671173, "grad_norm": 1.2547650177353251, "learning_rate": 4.9716495516293006e-05, "loss": 0.5779, "step": 2582 }, { "epoch": 0.3062966915688367, "grad_norm": 1.449675058444994, "learning_rate": 4.971613493429877e-05, "loss": 0.8739, "step": 2583 }, { "epoch": 0.3064152733309617, "grad_norm": 1.380035294120362, "learning_rate": 4.9715774124452076e-05, "loss": 0.674, "step": 2584 }, { "epoch": 0.30653385509308667, "grad_norm": 1.5749245337471962, "learning_rate": 4.971541308675625e-05, "loss": 0.8355, "step": 2585 }, { "epoch": 0.30665243685521165, "grad_norm": 1.47932020546663, "learning_rate": 4.971505182121462e-05, "loss": 0.6311, "step": 2586 }, { "epoch": 0.30677101861733663, "grad_norm": 1.5421659610812786, "learning_rate": 4.971469032783053e-05, "loss": 0.7349, "step": 2587 }, { "epoch": 0.3068896003794616, "grad_norm": 1.4579061987067918, "learning_rate": 4.9714328606607296e-05, "loss": 0.5985, "step": 2588 }, { "epoch": 0.30700818214158665, "grad_norm": 1.7765789338821452, "learning_rate": 4.971396665754826e-05, "loss": 0.9933, "step": 2589 }, { "epoch": 0.30712676390371163, "grad_norm": 1.6220379350890641, "learning_rate": 4.971360448065676e-05, "loss": 0.7988, "step": 2590 }, { "epoch": 0.3072453456658366, "grad_norm": 1.5878553627182814, "learning_rate": 4.971324207593613e-05, "loss": 0.8747, "step": 2591 }, { "epoch": 0.3073639274279616, "grad_norm": 1.4551534605513254, "learning_rate": 4.9712879443389715e-05, "loss": 0.9099, "step": 2592 }, { "epoch": 0.3074825091900866, "grad_norm": 1.5078157084711428, "learning_rate": 4.9712516583020854e-05, "loss": 0.9787, "step": 2593 }, { "epoch": 0.30760109095221155, "grad_norm": 1.4219650038883662, "learning_rate": 4.971215349483289e-05, "loss": 0.6654, "step": 2594 }, { "epoch": 0.30771967271433653, "grad_norm": 1.5018418918247791, "learning_rate": 4.971179017882919e-05, "loss": 0.841, "step": 2595 }, { "epoch": 0.3078382544764615, "grad_norm": 1.478086151175857, "learning_rate": 4.9711426635013076e-05, "loss": 0.5684, "step": 2596 }, { "epoch": 0.3079568362385865, "grad_norm": 1.7522568180604647, "learning_rate": 4.9711062863387915e-05, "loss": 0.9465, "step": 2597 }, { "epoch": 0.3080754180007115, "grad_norm": 1.4582486901807175, "learning_rate": 4.971069886395706e-05, "loss": 0.775, "step": 2598 }, { "epoch": 0.30819399976283646, "grad_norm": 1.8510213048731483, "learning_rate": 4.971033463672385e-05, "loss": 0.7992, "step": 2599 }, { "epoch": 0.30831258152496144, "grad_norm": 1.6151131238928218, "learning_rate": 4.970997018169167e-05, "loss": 0.7525, "step": 2600 }, { "epoch": 0.3084311632870864, "grad_norm": 1.6836601205663049, "learning_rate": 4.9709605498863865e-05, "loss": 0.9163, "step": 2601 }, { "epoch": 0.30854974504921145, "grad_norm": 1.7062777568567788, "learning_rate": 4.97092405882438e-05, "loss": 1.0714, "step": 2602 }, { "epoch": 0.30866832681133644, "grad_norm": 1.3897848854666475, "learning_rate": 4.970887544983483e-05, "loss": 0.7758, "step": 2603 }, { "epoch": 0.3087869085734614, "grad_norm": 1.822950464884883, "learning_rate": 4.970851008364034e-05, "loss": 0.8179, "step": 2604 }, { "epoch": 0.3089054903355864, "grad_norm": 1.5606074460297912, "learning_rate": 4.9708144489663675e-05, "loss": 0.7607, "step": 2605 }, { "epoch": 0.3090240720977114, "grad_norm": 1.6103331138012362, "learning_rate": 4.970777866790822e-05, "loss": 1.0334, "step": 2606 }, { "epoch": 0.30914265385983636, "grad_norm": 1.3150429649914626, "learning_rate": 4.970741261837734e-05, "loss": 0.6621, "step": 2607 }, { "epoch": 0.30926123562196134, "grad_norm": 1.5630751426917011, "learning_rate": 4.970704634107442e-05, "loss": 0.8753, "step": 2608 }, { "epoch": 0.3093798173840863, "grad_norm": 1.4200733841857318, "learning_rate": 4.9706679836002836e-05, "loss": 0.7946, "step": 2609 }, { "epoch": 0.3094983991462113, "grad_norm": 1.833466194263778, "learning_rate": 4.970631310316595e-05, "loss": 0.8894, "step": 2610 }, { "epoch": 0.3096169809083363, "grad_norm": 1.3462887980066904, "learning_rate": 4.970594614256716e-05, "loss": 0.745, "step": 2611 }, { "epoch": 0.30973556267046126, "grad_norm": 1.4386182090423194, "learning_rate": 4.970557895420984e-05, "loss": 0.8175, "step": 2612 }, { "epoch": 0.30985414443258624, "grad_norm": 1.5409815392447845, "learning_rate": 4.970521153809738e-05, "loss": 0.7803, "step": 2613 }, { "epoch": 0.3099727261947113, "grad_norm": 1.3749228546246528, "learning_rate": 4.9704843894233164e-05, "loss": 0.5846, "step": 2614 }, { "epoch": 0.31009130795683626, "grad_norm": 1.5263414253033938, "learning_rate": 4.970447602262058e-05, "loss": 0.6874, "step": 2615 }, { "epoch": 0.31020988971896124, "grad_norm": 1.7794791138627477, "learning_rate": 4.970410792326303e-05, "loss": 0.9489, "step": 2616 }, { "epoch": 0.3103284714810862, "grad_norm": 1.760711225890816, "learning_rate": 4.970373959616389e-05, "loss": 1.063, "step": 2617 }, { "epoch": 0.3104470532432112, "grad_norm": 1.5668682954357929, "learning_rate": 4.970337104132657e-05, "loss": 0.9784, "step": 2618 }, { "epoch": 0.3105656350053362, "grad_norm": 1.635818448919912, "learning_rate": 4.970300225875446e-05, "loss": 0.9689, "step": 2619 }, { "epoch": 0.31068421676746116, "grad_norm": 1.9725092733775906, "learning_rate": 4.970263324845096e-05, "loss": 0.8159, "step": 2620 }, { "epoch": 0.31080279852958614, "grad_norm": 1.5012003477409537, "learning_rate": 4.970226401041948e-05, "loss": 0.9158, "step": 2621 }, { "epoch": 0.3109213802917111, "grad_norm": 2.0004391485720228, "learning_rate": 4.970189454466342e-05, "loss": 0.8863, "step": 2622 }, { "epoch": 0.3110399620538361, "grad_norm": 1.6453957143104205, "learning_rate": 4.970152485118618e-05, "loss": 0.8352, "step": 2623 }, { "epoch": 0.3111585438159611, "grad_norm": 1.4973576448470858, "learning_rate": 4.970115492999117e-05, "loss": 1.0024, "step": 2624 }, { "epoch": 0.31127712557808607, "grad_norm": 1.5803617812198987, "learning_rate": 4.9700784781081796e-05, "loss": 0.827, "step": 2625 }, { "epoch": 0.3113957073402111, "grad_norm": 1.7660374366710427, "learning_rate": 4.970041440446148e-05, "loss": 0.9267, "step": 2626 }, { "epoch": 0.3115142891023361, "grad_norm": 1.6011907976076205, "learning_rate": 4.970004380013364e-05, "loss": 0.9827, "step": 2627 }, { "epoch": 0.31163287086446106, "grad_norm": 1.6072789459622534, "learning_rate": 4.969967296810168e-05, "loss": 0.9632, "step": 2628 }, { "epoch": 0.31175145262658605, "grad_norm": 1.3322846308689626, "learning_rate": 4.9699301908369025e-05, "loss": 0.7393, "step": 2629 }, { "epoch": 0.311870034388711, "grad_norm": 1.4847747822411748, "learning_rate": 4.969893062093909e-05, "loss": 1.0349, "step": 2630 }, { "epoch": 0.311988616150836, "grad_norm": 1.3543384788385817, "learning_rate": 4.969855910581531e-05, "loss": 0.9077, "step": 2631 }, { "epoch": 0.312107197912961, "grad_norm": 1.5721906040927294, "learning_rate": 4.96981873630011e-05, "loss": 1.0494, "step": 2632 }, { "epoch": 0.31222577967508597, "grad_norm": 1.7243191094359915, "learning_rate": 4.9697815392499894e-05, "loss": 0.8057, "step": 2633 }, { "epoch": 0.31234436143721095, "grad_norm": 1.4379544903133519, "learning_rate": 4.9697443194315105e-05, "loss": 0.9455, "step": 2634 }, { "epoch": 0.31246294319933593, "grad_norm": 1.3368235556260124, "learning_rate": 4.9697070768450184e-05, "loss": 0.8406, "step": 2635 }, { "epoch": 0.3125815249614609, "grad_norm": 1.3095789823362, "learning_rate": 4.969669811490856e-05, "loss": 0.7857, "step": 2636 }, { "epoch": 0.3127001067235859, "grad_norm": 1.429464353700218, "learning_rate": 4.9696325233693655e-05, "loss": 0.9826, "step": 2637 }, { "epoch": 0.31281868848571087, "grad_norm": 1.6413598191801042, "learning_rate": 4.969595212480892e-05, "loss": 0.9167, "step": 2638 }, { "epoch": 0.3129372702478359, "grad_norm": 1.432700278522245, "learning_rate": 4.969557878825779e-05, "loss": 0.7019, "step": 2639 }, { "epoch": 0.3130558520099609, "grad_norm": 1.5048084279873926, "learning_rate": 4.96952052240437e-05, "loss": 0.8958, "step": 2640 }, { "epoch": 0.31317443377208587, "grad_norm": 1.7805021359273707, "learning_rate": 4.969483143217012e-05, "loss": 0.9748, "step": 2641 }, { "epoch": 0.31329301553421085, "grad_norm": 1.389726066484738, "learning_rate": 4.969445741264046e-05, "loss": 0.7684, "step": 2642 }, { "epoch": 0.31341159729633583, "grad_norm": 1.3655791222700386, "learning_rate": 4.969408316545819e-05, "loss": 0.8109, "step": 2643 }, { "epoch": 0.3135301790584608, "grad_norm": 1.6702168018498835, "learning_rate": 4.969370869062676e-05, "loss": 0.8914, "step": 2644 }, { "epoch": 0.3136487608205858, "grad_norm": 1.5746687507393857, "learning_rate": 4.969333398814961e-05, "loss": 0.9526, "step": 2645 }, { "epoch": 0.3137673425827108, "grad_norm": 1.3273528179635306, "learning_rate": 4.96929590580302e-05, "loss": 0.914, "step": 2646 }, { "epoch": 0.31388592434483575, "grad_norm": 1.4826094318989829, "learning_rate": 4.9692583900271996e-05, "loss": 0.8494, "step": 2647 }, { "epoch": 0.31400450610696073, "grad_norm": 1.5554826478075097, "learning_rate": 4.9692208514878444e-05, "loss": 0.6351, "step": 2648 }, { "epoch": 0.3141230878690857, "grad_norm": 1.7102026713677392, "learning_rate": 4.9691832901853006e-05, "loss": 0.9757, "step": 2649 }, { "epoch": 0.3142416696312107, "grad_norm": 1.6146548626478319, "learning_rate": 4.969145706119915e-05, "loss": 0.9551, "step": 2650 }, { "epoch": 0.31436025139333573, "grad_norm": 1.3389391007527207, "learning_rate": 4.969108099292035e-05, "loss": 0.7567, "step": 2651 }, { "epoch": 0.3144788331554607, "grad_norm": 1.6437079099692233, "learning_rate": 4.969070469702005e-05, "loss": 0.961, "step": 2652 }, { "epoch": 0.3145974149175857, "grad_norm": 1.6589477068104022, "learning_rate": 4.9690328173501734e-05, "loss": 0.8826, "step": 2653 }, { "epoch": 0.3147159966797107, "grad_norm": 1.1360292348701742, "learning_rate": 4.9689951422368866e-05, "loss": 0.6122, "step": 2654 }, { "epoch": 0.31483457844183566, "grad_norm": 1.51308237033301, "learning_rate": 4.968957444362492e-05, "loss": 0.9353, "step": 2655 }, { "epoch": 0.31495316020396064, "grad_norm": 1.6549714117141376, "learning_rate": 4.968919723727338e-05, "loss": 1.1647, "step": 2656 }, { "epoch": 0.3150717419660856, "grad_norm": 1.7603797713426332, "learning_rate": 4.968881980331772e-05, "loss": 0.7734, "step": 2657 }, { "epoch": 0.3151903237282106, "grad_norm": 1.5695819809878564, "learning_rate": 4.968844214176142e-05, "loss": 0.7266, "step": 2658 }, { "epoch": 0.3153089054903356, "grad_norm": 1.6032529088845697, "learning_rate": 4.968806425260794e-05, "loss": 1.0162, "step": 2659 }, { "epoch": 0.31542748725246056, "grad_norm": 1.3886534291144894, "learning_rate": 4.96876861358608e-05, "loss": 0.8603, "step": 2660 }, { "epoch": 0.31554606901458554, "grad_norm": 1.4051979279527427, "learning_rate": 4.968730779152346e-05, "loss": 0.742, "step": 2661 }, { "epoch": 0.3156646507767105, "grad_norm": 1.486309612007322, "learning_rate": 4.968692921959942e-05, "loss": 0.6914, "step": 2662 }, { "epoch": 0.3157832325388355, "grad_norm": 1.644788206480169, "learning_rate": 4.968655042009216e-05, "loss": 0.919, "step": 2663 }, { "epoch": 0.31590181430096054, "grad_norm": 1.4512727914309704, "learning_rate": 4.9686171393005174e-05, "loss": 0.7048, "step": 2664 }, { "epoch": 0.3160203960630855, "grad_norm": 1.4854024363489926, "learning_rate": 4.968579213834197e-05, "loss": 0.6105, "step": 2665 }, { "epoch": 0.3161389778252105, "grad_norm": 1.649956629720194, "learning_rate": 4.968541265610603e-05, "loss": 0.995, "step": 2666 }, { "epoch": 0.3162575595873355, "grad_norm": 1.4139044989294303, "learning_rate": 4.9685032946300855e-05, "loss": 0.5695, "step": 2667 }, { "epoch": 0.31637614134946046, "grad_norm": 1.6930928952874693, "learning_rate": 4.968465300892996e-05, "loss": 0.8023, "step": 2668 }, { "epoch": 0.31649472311158544, "grad_norm": 1.7352741521881156, "learning_rate": 4.968427284399681e-05, "loss": 0.8135, "step": 2669 }, { "epoch": 0.3166133048737104, "grad_norm": 1.7099178691237082, "learning_rate": 4.9683892451504955e-05, "loss": 0.7621, "step": 2670 }, { "epoch": 0.3167318866358354, "grad_norm": 1.8840255962750865, "learning_rate": 4.968351183145787e-05, "loss": 0.7483, "step": 2671 }, { "epoch": 0.3168504683979604, "grad_norm": 1.8020612147598967, "learning_rate": 4.9683130983859086e-05, "loss": 0.8415, "step": 2672 }, { "epoch": 0.31696905016008536, "grad_norm": 2.1774718662523913, "learning_rate": 4.968274990871209e-05, "loss": 0.9584, "step": 2673 }, { "epoch": 0.31708763192221034, "grad_norm": 1.618495300544766, "learning_rate": 4.968236860602041e-05, "loss": 0.789, "step": 2674 }, { "epoch": 0.3172062136843353, "grad_norm": 1.7209048977814367, "learning_rate": 4.9681987075787576e-05, "loss": 0.7891, "step": 2675 }, { "epoch": 0.31732479544646036, "grad_norm": 1.5649218262829656, "learning_rate": 4.9681605318017066e-05, "loss": 0.9379, "step": 2676 }, { "epoch": 0.31744337720858534, "grad_norm": 1.4423781212296602, "learning_rate": 4.968122333271243e-05, "loss": 0.7404, "step": 2677 }, { "epoch": 0.3175619589707103, "grad_norm": 1.4483519041783814, "learning_rate": 4.9680841119877184e-05, "loss": 0.993, "step": 2678 }, { "epoch": 0.3176805407328353, "grad_norm": 1.5923483763814457, "learning_rate": 4.968045867951484e-05, "loss": 0.895, "step": 2679 }, { "epoch": 0.3177991224949603, "grad_norm": 1.5844197951628598, "learning_rate": 4.968007601162894e-05, "loss": 0.8953, "step": 2680 }, { "epoch": 0.31791770425708527, "grad_norm": 1.3874534619501644, "learning_rate": 4.9679693116223e-05, "loss": 0.8436, "step": 2681 }, { "epoch": 0.31803628601921025, "grad_norm": 1.1986487574212918, "learning_rate": 4.967930999330056e-05, "loss": 0.6752, "step": 2682 }, { "epoch": 0.3181548677813352, "grad_norm": 1.598011115707571, "learning_rate": 4.967892664286514e-05, "loss": 0.878, "step": 2683 }, { "epoch": 0.3182734495434602, "grad_norm": 1.8463185876241386, "learning_rate": 4.967854306492028e-05, "loss": 1.1226, "step": 2684 }, { "epoch": 0.3183920313055852, "grad_norm": 1.405426475257274, "learning_rate": 4.967815925946951e-05, "loss": 0.9405, "step": 2685 }, { "epoch": 0.31851061306771017, "grad_norm": 1.4328218943422086, "learning_rate": 4.967777522651639e-05, "loss": 0.9408, "step": 2686 }, { "epoch": 0.31862919482983515, "grad_norm": 1.4000156335004044, "learning_rate": 4.967739096606443e-05, "loss": 1.1054, "step": 2687 }, { "epoch": 0.31874777659196013, "grad_norm": 1.42324732835267, "learning_rate": 4.9677006478117195e-05, "loss": 0.6972, "step": 2688 }, { "epoch": 0.31886635835408517, "grad_norm": 1.5464415545746586, "learning_rate": 4.967662176267822e-05, "loss": 0.8433, "step": 2689 }, { "epoch": 0.31898494011621015, "grad_norm": 1.483059713974556, "learning_rate": 4.967623681975105e-05, "loss": 0.7086, "step": 2690 }, { "epoch": 0.31910352187833513, "grad_norm": 1.3967410649533993, "learning_rate": 4.9675851649339234e-05, "loss": 0.8798, "step": 2691 }, { "epoch": 0.3192221036404601, "grad_norm": 1.713256336302126, "learning_rate": 4.967546625144633e-05, "loss": 0.9062, "step": 2692 }, { "epoch": 0.3193406854025851, "grad_norm": 1.8156979784314708, "learning_rate": 4.9675080626075885e-05, "loss": 1.1516, "step": 2693 }, { "epoch": 0.31945926716471007, "grad_norm": 1.7429895717933346, "learning_rate": 4.967469477323146e-05, "loss": 1.0175, "step": 2694 }, { "epoch": 0.31957784892683505, "grad_norm": 1.2684034593628921, "learning_rate": 4.967430869291659e-05, "loss": 0.5538, "step": 2695 }, { "epoch": 0.31969643068896003, "grad_norm": 1.6112117856295023, "learning_rate": 4.967392238513487e-05, "loss": 0.8699, "step": 2696 }, { "epoch": 0.319815012451085, "grad_norm": 1.7298997979520403, "learning_rate": 4.9673535849889834e-05, "loss": 0.7953, "step": 2697 }, { "epoch": 0.31993359421321, "grad_norm": 1.4851996145557502, "learning_rate": 4.967314908718506e-05, "loss": 0.6843, "step": 2698 }, { "epoch": 0.320052175975335, "grad_norm": 1.5034993079186254, "learning_rate": 4.9672762097024106e-05, "loss": 0.7931, "step": 2699 }, { "epoch": 0.32017075773745995, "grad_norm": 1.6694327106982263, "learning_rate": 4.9672374879410536e-05, "loss": 0.8841, "step": 2700 }, { "epoch": 0.320289339499585, "grad_norm": 1.5191427430641669, "learning_rate": 4.967198743434793e-05, "loss": 0.8175, "step": 2701 }, { "epoch": 0.32040792126170997, "grad_norm": 1.898652546812242, "learning_rate": 4.967159976183985e-05, "loss": 0.6929, "step": 2702 }, { "epoch": 0.32052650302383495, "grad_norm": 1.4914277779343135, "learning_rate": 4.9671211861889874e-05, "loss": 0.9222, "step": 2703 }, { "epoch": 0.32064508478595993, "grad_norm": 1.4914992246795422, "learning_rate": 4.967082373450158e-05, "loss": 0.8679, "step": 2704 }, { "epoch": 0.3207636665480849, "grad_norm": 1.639095466114579, "learning_rate": 4.967043537967855e-05, "loss": 0.7045, "step": 2705 }, { "epoch": 0.3208822483102099, "grad_norm": 1.6225478976843937, "learning_rate": 4.967004679742436e-05, "loss": 0.758, "step": 2706 }, { "epoch": 0.3210008300723349, "grad_norm": 1.6415244527084527, "learning_rate": 4.966965798774258e-05, "loss": 0.7123, "step": 2707 }, { "epoch": 0.32111941183445986, "grad_norm": 1.7771767698210414, "learning_rate": 4.9669268950636815e-05, "loss": 0.8649, "step": 2708 }, { "epoch": 0.32123799359658484, "grad_norm": 1.427174591417389, "learning_rate": 4.966887968611064e-05, "loss": 0.6612, "step": 2709 }, { "epoch": 0.3213565753587098, "grad_norm": 2.2487945318053058, "learning_rate": 4.966849019416764e-05, "loss": 1.0242, "step": 2710 }, { "epoch": 0.3214751571208348, "grad_norm": 1.8989091526360655, "learning_rate": 4.966810047481142e-05, "loss": 0.8491, "step": 2711 }, { "epoch": 0.3215937388829598, "grad_norm": 1.4886489194472363, "learning_rate": 4.966771052804555e-05, "loss": 0.6541, "step": 2712 }, { "epoch": 0.32171232064508476, "grad_norm": 1.9425249096952568, "learning_rate": 4.966732035387365e-05, "loss": 0.7138, "step": 2713 }, { "epoch": 0.3218309024072098, "grad_norm": 1.5603168881662295, "learning_rate": 4.96669299522993e-05, "loss": 0.8729, "step": 2714 }, { "epoch": 0.3219494841693348, "grad_norm": 1.4372828896893561, "learning_rate": 4.9666539323326106e-05, "loss": 0.7963, "step": 2715 }, { "epoch": 0.32206806593145976, "grad_norm": 1.4630052135750407, "learning_rate": 4.966614846695767e-05, "loss": 0.7536, "step": 2716 }, { "epoch": 0.32218664769358474, "grad_norm": 1.7179239631099212, "learning_rate": 4.966575738319759e-05, "loss": 0.7555, "step": 2717 }, { "epoch": 0.3223052294557097, "grad_norm": 1.777404528290335, "learning_rate": 4.966536607204948e-05, "loss": 1.0564, "step": 2718 }, { "epoch": 0.3224238112178347, "grad_norm": 1.2913568877915826, "learning_rate": 4.966497453351693e-05, "loss": 0.7498, "step": 2719 }, { "epoch": 0.3225423929799597, "grad_norm": 1.5428853804520588, "learning_rate": 4.966458276760357e-05, "loss": 0.89, "step": 2720 }, { "epoch": 0.32266097474208466, "grad_norm": 1.5111616582472553, "learning_rate": 4.9664190774313e-05, "loss": 0.7881, "step": 2721 }, { "epoch": 0.32277955650420964, "grad_norm": 1.4869216829819702, "learning_rate": 4.9663798553648834e-05, "loss": 0.8756, "step": 2722 }, { "epoch": 0.3228981382663346, "grad_norm": 1.4748085806439688, "learning_rate": 4.966340610561469e-05, "loss": 0.8295, "step": 2723 }, { "epoch": 0.3230167200284596, "grad_norm": 1.6795243846872943, "learning_rate": 4.966301343021419e-05, "loss": 0.9624, "step": 2724 }, { "epoch": 0.3231353017905846, "grad_norm": 1.5703761146324269, "learning_rate": 4.966262052745094e-05, "loss": 0.8193, "step": 2725 }, { "epoch": 0.3232538835527096, "grad_norm": 1.4118069984151005, "learning_rate": 4.966222739732859e-05, "loss": 0.9, "step": 2726 }, { "epoch": 0.3233724653148346, "grad_norm": 1.5061008334007595, "learning_rate": 4.966183403985073e-05, "loss": 0.7043, "step": 2727 }, { "epoch": 0.3234910470769596, "grad_norm": 1.5529059476868734, "learning_rate": 4.966144045502101e-05, "loss": 0.8262, "step": 2728 }, { "epoch": 0.32360962883908456, "grad_norm": 1.7654291314310488, "learning_rate": 4.966104664284305e-05, "loss": 1.0376, "step": 2729 }, { "epoch": 0.32372821060120954, "grad_norm": 1.6535180662890188, "learning_rate": 4.966065260332048e-05, "loss": 0.9945, "step": 2730 }, { "epoch": 0.3238467923633345, "grad_norm": 1.3232890366181171, "learning_rate": 4.966025833645693e-05, "loss": 0.8465, "step": 2731 }, { "epoch": 0.3239653741254595, "grad_norm": 1.8552384102608477, "learning_rate": 4.965986384225605e-05, "loss": 0.9806, "step": 2732 }, { "epoch": 0.3240839558875845, "grad_norm": 1.6633362597333998, "learning_rate": 4.9659469120721456e-05, "loss": 1.0228, "step": 2733 }, { "epoch": 0.32420253764970947, "grad_norm": 1.8039601811575963, "learning_rate": 4.96590741718568e-05, "loss": 1.0149, "step": 2734 }, { "epoch": 0.32432111941183445, "grad_norm": 1.496510299099411, "learning_rate": 4.965867899566572e-05, "loss": 0.8182, "step": 2735 }, { "epoch": 0.3244397011739594, "grad_norm": 1.5003034536532909, "learning_rate": 4.9658283592151855e-05, "loss": 0.7613, "step": 2736 }, { "epoch": 0.3245582829360844, "grad_norm": 1.9844302900875372, "learning_rate": 4.965788796131885e-05, "loss": 1.1964, "step": 2737 }, { "epoch": 0.3246768646982094, "grad_norm": 1.415547845892196, "learning_rate": 4.965749210317037e-05, "loss": 0.8273, "step": 2738 }, { "epoch": 0.3247954464603344, "grad_norm": 1.4280341983919356, "learning_rate": 4.9657096017710034e-05, "loss": 0.9482, "step": 2739 }, { "epoch": 0.3249140282224594, "grad_norm": 1.4673923875595514, "learning_rate": 4.965669970494151e-05, "loss": 1.0265, "step": 2740 }, { "epoch": 0.3250326099845844, "grad_norm": 1.5639943883450238, "learning_rate": 4.9656303164868454e-05, "loss": 0.8925, "step": 2741 }, { "epoch": 0.32515119174670937, "grad_norm": 1.2772132468819737, "learning_rate": 4.965590639749452e-05, "loss": 0.7292, "step": 2742 }, { "epoch": 0.32526977350883435, "grad_norm": 1.3935507203213573, "learning_rate": 4.965550940282337e-05, "loss": 0.8946, "step": 2743 }, { "epoch": 0.32538835527095933, "grad_norm": 1.3417704909645944, "learning_rate": 4.9655112180858656e-05, "loss": 0.769, "step": 2744 }, { "epoch": 0.3255069370330843, "grad_norm": 1.6819792822071984, "learning_rate": 4.9654714731604036e-05, "loss": 1.049, "step": 2745 }, { "epoch": 0.3256255187952093, "grad_norm": 1.7624632124897714, "learning_rate": 4.965431705506318e-05, "loss": 0.9167, "step": 2746 }, { "epoch": 0.32574410055733427, "grad_norm": 1.8125552206933606, "learning_rate": 4.965391915123975e-05, "loss": 0.9906, "step": 2747 }, { "epoch": 0.32586268231945925, "grad_norm": 1.5585853476730893, "learning_rate": 4.965352102013743e-05, "loss": 0.9564, "step": 2748 }, { "epoch": 0.32598126408158423, "grad_norm": 1.4782312524801715, "learning_rate": 4.9653122661759866e-05, "loss": 0.8484, "step": 2749 }, { "epoch": 0.3260998458437092, "grad_norm": 1.492183036900416, "learning_rate": 4.9652724076110754e-05, "loss": 0.8403, "step": 2750 }, { "epoch": 0.32621842760583425, "grad_norm": 1.6551330285410444, "learning_rate": 4.965232526319375e-05, "loss": 1.0068, "step": 2751 }, { "epoch": 0.32633700936795923, "grad_norm": 1.4293310224809268, "learning_rate": 4.9651926223012536e-05, "loss": 0.7726, "step": 2752 }, { "epoch": 0.3264555911300842, "grad_norm": 1.5640227735740697, "learning_rate": 4.9651526955570795e-05, "loss": 0.7291, "step": 2753 }, { "epoch": 0.3265741728922092, "grad_norm": 1.5129509564578678, "learning_rate": 4.965112746087221e-05, "loss": 0.7347, "step": 2754 }, { "epoch": 0.32669275465433417, "grad_norm": 1.7005463169985402, "learning_rate": 4.9650727738920456e-05, "loss": 1.119, "step": 2755 }, { "epoch": 0.32681133641645915, "grad_norm": 1.7813645301392322, "learning_rate": 4.965032778971922e-05, "loss": 0.9366, "step": 2756 }, { "epoch": 0.32692991817858413, "grad_norm": 1.6658920949671843, "learning_rate": 4.9649927613272184e-05, "loss": 0.9145, "step": 2757 }, { "epoch": 0.3270484999407091, "grad_norm": 1.5530916201485885, "learning_rate": 4.964952720958305e-05, "loss": 0.7077, "step": 2758 }, { "epoch": 0.3271670817028341, "grad_norm": 1.983070495308045, "learning_rate": 4.96491265786555e-05, "loss": 1.0396, "step": 2759 }, { "epoch": 0.3272856634649591, "grad_norm": 1.6429419397195484, "learning_rate": 4.964872572049324e-05, "loss": 0.9702, "step": 2760 }, { "epoch": 0.32740424522708406, "grad_norm": 1.567598038429543, "learning_rate": 4.964832463509994e-05, "loss": 0.9339, "step": 2761 }, { "epoch": 0.32752282698920904, "grad_norm": 1.4689677873242704, "learning_rate": 4.964792332247932e-05, "loss": 0.8379, "step": 2762 }, { "epoch": 0.327641408751334, "grad_norm": 1.6379684180806189, "learning_rate": 4.964752178263508e-05, "loss": 0.8064, "step": 2763 }, { "epoch": 0.32775999051345905, "grad_norm": 1.6612423213752732, "learning_rate": 4.9647120015570894e-05, "loss": 0.9853, "step": 2764 }, { "epoch": 0.32787857227558403, "grad_norm": 1.5492941388907633, "learning_rate": 4.96467180212905e-05, "loss": 0.8689, "step": 2765 }, { "epoch": 0.327997154037709, "grad_norm": 1.4700239159035646, "learning_rate": 4.964631579979758e-05, "loss": 0.8644, "step": 2766 }, { "epoch": 0.328115735799834, "grad_norm": 1.7397612302161707, "learning_rate": 4.964591335109586e-05, "loss": 0.8844, "step": 2767 }, { "epoch": 0.328234317561959, "grad_norm": 1.4451745171867123, "learning_rate": 4.964551067518904e-05, "loss": 0.8543, "step": 2768 }, { "epoch": 0.32835289932408396, "grad_norm": 1.3240068738470825, "learning_rate": 4.9645107772080834e-05, "loss": 0.6118, "step": 2769 }, { "epoch": 0.32847148108620894, "grad_norm": 1.5794189295605323, "learning_rate": 4.9644704641774963e-05, "loss": 1.0382, "step": 2770 }, { "epoch": 0.3285900628483339, "grad_norm": 1.4758957542508462, "learning_rate": 4.964430128427513e-05, "loss": 0.7699, "step": 2771 }, { "epoch": 0.3287086446104589, "grad_norm": 1.6164342833908791, "learning_rate": 4.9643897699585056e-05, "loss": 0.8955, "step": 2772 }, { "epoch": 0.3288272263725839, "grad_norm": 1.6202558170918955, "learning_rate": 4.964349388770847e-05, "loss": 0.8362, "step": 2773 }, { "epoch": 0.32894580813470886, "grad_norm": 1.4348110533810172, "learning_rate": 4.964308984864909e-05, "loss": 0.7605, "step": 2774 }, { "epoch": 0.32906438989683384, "grad_norm": 1.4218686201839146, "learning_rate": 4.964268558241064e-05, "loss": 0.67, "step": 2775 }, { "epoch": 0.3291829716589589, "grad_norm": 1.525256457738026, "learning_rate": 4.964228108899684e-05, "loss": 0.6838, "step": 2776 }, { "epoch": 0.32930155342108386, "grad_norm": 1.4343599954804653, "learning_rate": 4.964187636841144e-05, "loss": 0.7929, "step": 2777 }, { "epoch": 0.32942013518320884, "grad_norm": 1.6673166920532625, "learning_rate": 4.9641471420658154e-05, "loss": 0.7381, "step": 2778 }, { "epoch": 0.3295387169453338, "grad_norm": 1.5933448255015839, "learning_rate": 4.964106624574072e-05, "loss": 0.7367, "step": 2779 }, { "epoch": 0.3296572987074588, "grad_norm": 1.6123113902170438, "learning_rate": 4.9640660843662865e-05, "loss": 0.8604, "step": 2780 }, { "epoch": 0.3297758804695838, "grad_norm": 2.1716368279114278, "learning_rate": 4.964025521442834e-05, "loss": 0.9571, "step": 2781 }, { "epoch": 0.32989446223170876, "grad_norm": 2.169942548016722, "learning_rate": 4.9639849358040874e-05, "loss": 0.9362, "step": 2782 }, { "epoch": 0.33001304399383374, "grad_norm": 1.834795633740072, "learning_rate": 4.963944327450422e-05, "loss": 0.841, "step": 2783 }, { "epoch": 0.3301316257559587, "grad_norm": 1.84713720722264, "learning_rate": 4.96390369638221e-05, "loss": 0.6122, "step": 2784 }, { "epoch": 0.3302502075180837, "grad_norm": 1.34013181753607, "learning_rate": 4.9638630425998285e-05, "loss": 0.6473, "step": 2785 }, { "epoch": 0.3303687892802087, "grad_norm": 1.6089334234479604, "learning_rate": 4.963822366103651e-05, "loss": 0.9525, "step": 2786 }, { "epoch": 0.33048737104233367, "grad_norm": 1.8312307755719461, "learning_rate": 4.963781666894052e-05, "loss": 1.0734, "step": 2787 }, { "epoch": 0.33060595280445865, "grad_norm": 1.65368028349116, "learning_rate": 4.9637409449714076e-05, "loss": 0.9452, "step": 2788 }, { "epoch": 0.3307245345665837, "grad_norm": 1.520381340220029, "learning_rate": 4.963700200336093e-05, "loss": 0.9324, "step": 2789 }, { "epoch": 0.33084311632870866, "grad_norm": 1.5031168137028363, "learning_rate": 4.963659432988484e-05, "loss": 1.0041, "step": 2790 }, { "epoch": 0.33096169809083364, "grad_norm": 1.8458510736368334, "learning_rate": 4.963618642928956e-05, "loss": 0.9691, "step": 2791 }, { "epoch": 0.3310802798529586, "grad_norm": 1.4212338358323864, "learning_rate": 4.963577830157885e-05, "loss": 0.7868, "step": 2792 }, { "epoch": 0.3311988616150836, "grad_norm": 1.48049567821041, "learning_rate": 4.963536994675648e-05, "loss": 0.8595, "step": 2793 }, { "epoch": 0.3313174433772086, "grad_norm": 1.5153884021235164, "learning_rate": 4.9634961364826206e-05, "loss": 0.8579, "step": 2794 }, { "epoch": 0.33143602513933357, "grad_norm": 1.594545454096086, "learning_rate": 4.963455255579179e-05, "loss": 0.9207, "step": 2795 }, { "epoch": 0.33155460690145855, "grad_norm": 1.4923294895537007, "learning_rate": 4.9634143519657014e-05, "loss": 0.7358, "step": 2796 }, { "epoch": 0.33167318866358353, "grad_norm": 1.4473185155513781, "learning_rate": 4.9633734256425644e-05, "loss": 0.7292, "step": 2797 }, { "epoch": 0.3317917704257085, "grad_norm": 1.54527523654456, "learning_rate": 4.963332476610145e-05, "loss": 0.8929, "step": 2798 }, { "epoch": 0.3319103521878335, "grad_norm": 1.72342725632366, "learning_rate": 4.963291504868822e-05, "loss": 0.9952, "step": 2799 }, { "epoch": 0.33202893394995847, "grad_norm": 1.3668676221515605, "learning_rate": 4.963250510418971e-05, "loss": 0.7836, "step": 2800 }, { "epoch": 0.3321475157120835, "grad_norm": 1.6140539758231358, "learning_rate": 4.963209493260971e-05, "loss": 0.8181, "step": 2801 }, { "epoch": 0.3322660974742085, "grad_norm": 1.5186487093143384, "learning_rate": 4.9631684533951996e-05, "loss": 0.6405, "step": 2802 }, { "epoch": 0.33238467923633347, "grad_norm": 2.081595154271113, "learning_rate": 4.9631273908220364e-05, "loss": 0.9064, "step": 2803 }, { "epoch": 0.33250326099845845, "grad_norm": 1.4832673412218698, "learning_rate": 4.9630863055418586e-05, "loss": 0.8756, "step": 2804 }, { "epoch": 0.33262184276058343, "grad_norm": 2.1309771132101045, "learning_rate": 4.963045197555046e-05, "loss": 0.7442, "step": 2805 }, { "epoch": 0.3327404245227084, "grad_norm": 1.653285934336091, "learning_rate": 4.963004066861977e-05, "loss": 0.776, "step": 2806 }, { "epoch": 0.3328590062848334, "grad_norm": 1.8030736793595858, "learning_rate": 4.9629629134630306e-05, "loss": 0.7965, "step": 2807 }, { "epoch": 0.3329775880469584, "grad_norm": 1.5396209943692214, "learning_rate": 4.962921737358587e-05, "loss": 0.6777, "step": 2808 }, { "epoch": 0.33309616980908335, "grad_norm": 1.3184378776717363, "learning_rate": 4.962880538549024e-05, "loss": 0.5342, "step": 2809 }, { "epoch": 0.33321475157120833, "grad_norm": 1.3876034318785482, "learning_rate": 4.962839317034723e-05, "loss": 0.6402, "step": 2810 }, { "epoch": 0.3333333333333333, "grad_norm": 1.3753280647202086, "learning_rate": 4.962798072816065e-05, "loss": 0.5978, "step": 2811 }, { "epoch": 0.3334519150954583, "grad_norm": 1.660007722079822, "learning_rate": 4.9627568058934274e-05, "loss": 0.8813, "step": 2812 }, { "epoch": 0.3335704968575833, "grad_norm": 1.9798030696132103, "learning_rate": 4.962715516267192e-05, "loss": 1.0693, "step": 2813 }, { "epoch": 0.3336890786197083, "grad_norm": 1.5290415078224444, "learning_rate": 4.962674203937741e-05, "loss": 0.8477, "step": 2814 }, { "epoch": 0.3338076603818333, "grad_norm": 1.5955194156465384, "learning_rate": 4.962632868905452e-05, "loss": 0.6505, "step": 2815 }, { "epoch": 0.3339262421439583, "grad_norm": 1.4776903754309634, "learning_rate": 4.962591511170709e-05, "loss": 0.7045, "step": 2816 }, { "epoch": 0.33404482390608325, "grad_norm": 1.6896571199719752, "learning_rate": 4.962550130733891e-05, "loss": 0.7483, "step": 2817 }, { "epoch": 0.33416340566820824, "grad_norm": 1.5807609014437038, "learning_rate": 4.962508727595382e-05, "loss": 0.7775, "step": 2818 }, { "epoch": 0.3342819874303332, "grad_norm": 1.9799473341463127, "learning_rate": 4.962467301755561e-05, "loss": 0.9784, "step": 2819 }, { "epoch": 0.3344005691924582, "grad_norm": 1.724025274585934, "learning_rate": 4.9624258532148114e-05, "loss": 0.9772, "step": 2820 }, { "epoch": 0.3345191509545832, "grad_norm": 1.7189110824497051, "learning_rate": 4.962384381973516e-05, "loss": 1.0267, "step": 2821 }, { "epoch": 0.33463773271670816, "grad_norm": 1.414210404307473, "learning_rate": 4.9623428880320556e-05, "loss": 0.6544, "step": 2822 }, { "epoch": 0.33475631447883314, "grad_norm": 1.654770549949875, "learning_rate": 4.962301371390813e-05, "loss": 0.8016, "step": 2823 }, { "epoch": 0.3348748962409581, "grad_norm": 1.460637026310849, "learning_rate": 4.962259832050171e-05, "loss": 0.8311, "step": 2824 }, { "epoch": 0.3349934780030831, "grad_norm": 1.6273694842527817, "learning_rate": 4.962218270010513e-05, "loss": 0.7325, "step": 2825 }, { "epoch": 0.33511205976520814, "grad_norm": 1.4939469121769486, "learning_rate": 4.9621766852722224e-05, "loss": 0.8355, "step": 2826 }, { "epoch": 0.3352306415273331, "grad_norm": 1.7283197028868467, "learning_rate": 4.962135077835682e-05, "loss": 0.7549, "step": 2827 }, { "epoch": 0.3353492232894581, "grad_norm": 1.9593240951420299, "learning_rate": 4.962093447701275e-05, "loss": 1.3127, "step": 2828 }, { "epoch": 0.3354678050515831, "grad_norm": 1.4316475921318357, "learning_rate": 4.962051794869386e-05, "loss": 0.8735, "step": 2829 }, { "epoch": 0.33558638681370806, "grad_norm": 1.3372951686821986, "learning_rate": 4.962010119340399e-05, "loss": 0.7494, "step": 2830 }, { "epoch": 0.33570496857583304, "grad_norm": 1.3746263273388082, "learning_rate": 4.961968421114697e-05, "loss": 0.8242, "step": 2831 }, { "epoch": 0.335823550337958, "grad_norm": 1.5671956067437434, "learning_rate": 4.961926700192665e-05, "loss": 0.9125, "step": 2832 }, { "epoch": 0.335942132100083, "grad_norm": 1.5245976019969716, "learning_rate": 4.9618849565746886e-05, "loss": 0.7008, "step": 2833 }, { "epoch": 0.336060713862208, "grad_norm": 1.6046706174628338, "learning_rate": 4.961843190261152e-05, "loss": 0.7532, "step": 2834 }, { "epoch": 0.33617929562433296, "grad_norm": 1.4291737061886658, "learning_rate": 4.9618014012524395e-05, "loss": 0.8646, "step": 2835 }, { "epoch": 0.33629787738645794, "grad_norm": 2.240609001073133, "learning_rate": 4.9617595895489375e-05, "loss": 0.8901, "step": 2836 }, { "epoch": 0.3364164591485829, "grad_norm": 1.7589961771319107, "learning_rate": 4.961717755151031e-05, "loss": 0.9592, "step": 2837 }, { "epoch": 0.3365350409107079, "grad_norm": 1.3448616237641957, "learning_rate": 4.961675898059104e-05, "loss": 0.455, "step": 2838 }, { "epoch": 0.33665362267283294, "grad_norm": 1.1899828364806053, "learning_rate": 4.9616340182735457e-05, "loss": 0.6567, "step": 2839 }, { "epoch": 0.3367722044349579, "grad_norm": 1.5444616283351622, "learning_rate": 4.96159211579474e-05, "loss": 0.7597, "step": 2840 }, { "epoch": 0.3368907861970829, "grad_norm": 1.507572360636242, "learning_rate": 4.9615501906230734e-05, "loss": 0.7383, "step": 2841 }, { "epoch": 0.3370093679592079, "grad_norm": 2.057334748719598, "learning_rate": 4.961508242758932e-05, "loss": 0.9777, "step": 2842 }, { "epoch": 0.33712794972133286, "grad_norm": 1.6090544302351908, "learning_rate": 4.961466272202704e-05, "loss": 0.8335, "step": 2843 }, { "epoch": 0.33724653148345785, "grad_norm": 1.7514103983026081, "learning_rate": 4.961424278954775e-05, "loss": 0.802, "step": 2844 }, { "epoch": 0.3373651132455828, "grad_norm": 1.4103369286139642, "learning_rate": 4.961382263015533e-05, "loss": 0.7398, "step": 2845 }, { "epoch": 0.3374836950077078, "grad_norm": 1.4969483703583482, "learning_rate": 4.9613402243853644e-05, "loss": 0.7988, "step": 2846 }, { "epoch": 0.3376022767698328, "grad_norm": 1.4832407581082243, "learning_rate": 4.961298163064657e-05, "loss": 0.6867, "step": 2847 }, { "epoch": 0.33772085853195777, "grad_norm": 1.5675731404656152, "learning_rate": 4.961256079053799e-05, "loss": 0.9691, "step": 2848 }, { "epoch": 0.33783944029408275, "grad_norm": 1.5585724472907976, "learning_rate": 4.961213972353178e-05, "loss": 0.9006, "step": 2849 }, { "epoch": 0.33795802205620773, "grad_norm": 1.4264671873097026, "learning_rate": 4.961171842963183e-05, "loss": 1.0245, "step": 2850 }, { "epoch": 0.33807660381833277, "grad_norm": 1.5627125350800048, "learning_rate": 4.961129690884201e-05, "loss": 0.9583, "step": 2851 }, { "epoch": 0.33819518558045775, "grad_norm": 1.4026114383891077, "learning_rate": 4.9610875161166214e-05, "loss": 0.7778, "step": 2852 }, { "epoch": 0.3383137673425827, "grad_norm": 1.4174218463707347, "learning_rate": 4.961045318660833e-05, "loss": 0.9183, "step": 2853 }, { "epoch": 0.3384323491047077, "grad_norm": 1.5009330505642695, "learning_rate": 4.9610030985172254e-05, "loss": 0.7885, "step": 2854 }, { "epoch": 0.3385509308668327, "grad_norm": 1.4590603134232503, "learning_rate": 4.9609608556861864e-05, "loss": 0.7576, "step": 2855 }, { "epoch": 0.33866951262895767, "grad_norm": 1.4101792816062046, "learning_rate": 4.960918590168106e-05, "loss": 0.7472, "step": 2856 }, { "epoch": 0.33878809439108265, "grad_norm": 1.377982379433936, "learning_rate": 4.960876301963374e-05, "loss": 0.7622, "step": 2857 }, { "epoch": 0.33890667615320763, "grad_norm": 1.6369897854224613, "learning_rate": 4.9608339910723805e-05, "loss": 0.8572, "step": 2858 }, { "epoch": 0.3390252579153326, "grad_norm": 1.7746473769897542, "learning_rate": 4.960791657495515e-05, "loss": 0.8703, "step": 2859 }, { "epoch": 0.3391438396774576, "grad_norm": 1.7089409180061108, "learning_rate": 4.9607493012331685e-05, "loss": 0.7029, "step": 2860 }, { "epoch": 0.3392624214395826, "grad_norm": 1.5637088972903435, "learning_rate": 4.9607069222857306e-05, "loss": 0.8632, "step": 2861 }, { "epoch": 0.33938100320170755, "grad_norm": 1.8350189716827918, "learning_rate": 4.960664520653593e-05, "loss": 0.9446, "step": 2862 }, { "epoch": 0.3394995849638326, "grad_norm": 1.8785922565171425, "learning_rate": 4.960622096337145e-05, "loss": 0.9769, "step": 2863 }, { "epoch": 0.33961816672595757, "grad_norm": 1.7552565041603954, "learning_rate": 4.960579649336779e-05, "loss": 0.8059, "step": 2864 }, { "epoch": 0.33973674848808255, "grad_norm": 1.49552979531624, "learning_rate": 4.9605371796528864e-05, "loss": 0.786, "step": 2865 }, { "epoch": 0.33985533025020753, "grad_norm": 1.7502690392214282, "learning_rate": 4.9604946872858585e-05, "loss": 1.1297, "step": 2866 }, { "epoch": 0.3399739120123325, "grad_norm": 1.6302192735156025, "learning_rate": 4.960452172236086e-05, "loss": 0.9746, "step": 2867 }, { "epoch": 0.3400924937744575, "grad_norm": 1.670406605489306, "learning_rate": 4.960409634503962e-05, "loss": 1.0384, "step": 2868 }, { "epoch": 0.3402110755365825, "grad_norm": 1.7978646739080515, "learning_rate": 4.960367074089879e-05, "loss": 0.9543, "step": 2869 }, { "epoch": 0.34032965729870746, "grad_norm": 1.2808967466357806, "learning_rate": 4.9603244909942274e-05, "loss": 0.6514, "step": 2870 }, { "epoch": 0.34044823906083244, "grad_norm": 1.4142144379265653, "learning_rate": 4.960281885217402e-05, "loss": 0.693, "step": 2871 }, { "epoch": 0.3405668208229574, "grad_norm": 1.402273416382213, "learning_rate": 4.9602392567597946e-05, "loss": 0.7622, "step": 2872 }, { "epoch": 0.3406854025850824, "grad_norm": 1.5503730452737443, "learning_rate": 4.9601966056217974e-05, "loss": 0.7811, "step": 2873 }, { "epoch": 0.3408039843472074, "grad_norm": 1.6623532190872874, "learning_rate": 4.960153931803805e-05, "loss": 1.1486, "step": 2874 }, { "epoch": 0.34092256610933236, "grad_norm": 1.3508075864689846, "learning_rate": 4.9601112353062104e-05, "loss": 0.8321, "step": 2875 }, { "epoch": 0.3410411478714574, "grad_norm": 1.4919573525634406, "learning_rate": 4.960068516129407e-05, "loss": 1.1293, "step": 2876 }, { "epoch": 0.3411597296335824, "grad_norm": 1.5536254652927997, "learning_rate": 4.9600257742737885e-05, "loss": 0.7924, "step": 2877 }, { "epoch": 0.34127831139570736, "grad_norm": 1.7777949990598614, "learning_rate": 4.959983009739748e-05, "loss": 0.7825, "step": 2878 }, { "epoch": 0.34139689315783234, "grad_norm": 1.489579095403158, "learning_rate": 4.9599402225276825e-05, "loss": 0.7607, "step": 2879 }, { "epoch": 0.3415154749199573, "grad_norm": 1.7204057525117367, "learning_rate": 4.9598974126379836e-05, "loss": 0.8257, "step": 2880 }, { "epoch": 0.3416340566820823, "grad_norm": 1.5548134399020723, "learning_rate": 4.959854580071047e-05, "loss": 0.7232, "step": 2881 }, { "epoch": 0.3417526384442073, "grad_norm": 1.5267958144256801, "learning_rate": 4.9598117248272684e-05, "loss": 0.8521, "step": 2882 }, { "epoch": 0.34187122020633226, "grad_norm": 1.7164534194811047, "learning_rate": 4.959768846907042e-05, "loss": 0.8078, "step": 2883 }, { "epoch": 0.34198980196845724, "grad_norm": 2.121653714241097, "learning_rate": 4.959725946310763e-05, "loss": 1.0854, "step": 2884 }, { "epoch": 0.3421083837305822, "grad_norm": 1.4848838808553477, "learning_rate": 4.959683023038827e-05, "loss": 0.6335, "step": 2885 }, { "epoch": 0.3422269654927072, "grad_norm": 1.6805762609491637, "learning_rate": 4.9596400770916295e-05, "loss": 0.7168, "step": 2886 }, { "epoch": 0.3423455472548322, "grad_norm": 1.791311544759708, "learning_rate": 4.959597108469567e-05, "loss": 0.7745, "step": 2887 }, { "epoch": 0.3424641290169572, "grad_norm": 1.8439552954657066, "learning_rate": 4.9595541171730355e-05, "loss": 1.0191, "step": 2888 }, { "epoch": 0.3425827107790822, "grad_norm": 1.8490681041122836, "learning_rate": 4.959511103202431e-05, "loss": 0.8734, "step": 2889 }, { "epoch": 0.3427012925412072, "grad_norm": 1.6413582286901611, "learning_rate": 4.959468066558151e-05, "loss": 0.6911, "step": 2890 }, { "epoch": 0.34281987430333216, "grad_norm": 1.6828091334256225, "learning_rate": 4.95942500724059e-05, "loss": 0.8553, "step": 2891 }, { "epoch": 0.34293845606545714, "grad_norm": 1.3734560340484727, "learning_rate": 4.959381925250147e-05, "loss": 0.6126, "step": 2892 }, { "epoch": 0.3430570378275821, "grad_norm": 1.7643219648293704, "learning_rate": 4.9593388205872184e-05, "loss": 0.9363, "step": 2893 }, { "epoch": 0.3431756195897071, "grad_norm": 1.601004157565287, "learning_rate": 4.9592956932522015e-05, "loss": 0.8632, "step": 2894 }, { "epoch": 0.3432942013518321, "grad_norm": 1.5337521076998217, "learning_rate": 4.959252543245495e-05, "loss": 0.8709, "step": 2895 }, { "epoch": 0.34341278311395707, "grad_norm": 1.4482297831974633, "learning_rate": 4.959209370567495e-05, "loss": 0.7381, "step": 2896 }, { "epoch": 0.34353136487608205, "grad_norm": 1.5149045970592943, "learning_rate": 4.9591661752186e-05, "loss": 0.8716, "step": 2897 }, { "epoch": 0.343649946638207, "grad_norm": 1.4319461546205026, "learning_rate": 4.959122957199209e-05, "loss": 0.8863, "step": 2898 }, { "epoch": 0.343768528400332, "grad_norm": 1.4846130598672929, "learning_rate": 4.95907971650972e-05, "loss": 0.8486, "step": 2899 }, { "epoch": 0.343887110162457, "grad_norm": 1.4783318079775827, "learning_rate": 4.959036453150532e-05, "loss": 1.0275, "step": 2900 }, { "epoch": 0.344005691924582, "grad_norm": 1.5878967901218546, "learning_rate": 4.9589931671220426e-05, "loss": 0.971, "step": 2901 }, { "epoch": 0.344124273686707, "grad_norm": 1.5381404860042924, "learning_rate": 4.9589498584246516e-05, "loss": 0.8736, "step": 2902 }, { "epoch": 0.344242855448832, "grad_norm": 1.4901685133057792, "learning_rate": 4.958906527058759e-05, "loss": 0.7956, "step": 2903 }, { "epoch": 0.34436143721095697, "grad_norm": 1.4918793113371167, "learning_rate": 4.958863173024763e-05, "loss": 0.8814, "step": 2904 }, { "epoch": 0.34448001897308195, "grad_norm": 1.5736887725373363, "learning_rate": 4.9588197963230634e-05, "loss": 0.8224, "step": 2905 }, { "epoch": 0.34459860073520693, "grad_norm": 1.4567855777889704, "learning_rate": 4.9587763969540615e-05, "loss": 0.806, "step": 2906 }, { "epoch": 0.3447171824973319, "grad_norm": 1.7400888865345627, "learning_rate": 4.958732974918156e-05, "loss": 1.1106, "step": 2907 }, { "epoch": 0.3448357642594569, "grad_norm": 1.6463626810786787, "learning_rate": 4.9586895302157466e-05, "loss": 0.8044, "step": 2908 }, { "epoch": 0.34495434602158187, "grad_norm": 2.0682076583454396, "learning_rate": 4.958646062847235e-05, "loss": 0.956, "step": 2909 }, { "epoch": 0.34507292778370685, "grad_norm": 1.7049160832110697, "learning_rate": 4.958602572813023e-05, "loss": 0.8021, "step": 2910 }, { "epoch": 0.34519150954583183, "grad_norm": 2.0271936111919713, "learning_rate": 4.958559060113509e-05, "loss": 0.7953, "step": 2911 }, { "epoch": 0.3453100913079568, "grad_norm": 1.9357673191340123, "learning_rate": 4.958515524749096e-05, "loss": 1.1852, "step": 2912 }, { "epoch": 0.34542867307008185, "grad_norm": 1.8974394315908816, "learning_rate": 4.958471966720185e-05, "loss": 0.8866, "step": 2913 }, { "epoch": 0.34554725483220683, "grad_norm": 1.5913551807067745, "learning_rate": 4.9584283860271766e-05, "loss": 0.8123, "step": 2914 }, { "epoch": 0.3456658365943318, "grad_norm": 1.6841784536416802, "learning_rate": 4.958384782670473e-05, "loss": 0.9463, "step": 2915 }, { "epoch": 0.3457844183564568, "grad_norm": 1.4918550317000312, "learning_rate": 4.958341156650477e-05, "loss": 0.7815, "step": 2916 }, { "epoch": 0.34590300011858177, "grad_norm": 1.5962074725422235, "learning_rate": 4.95829750796759e-05, "loss": 1.057, "step": 2917 }, { "epoch": 0.34602158188070675, "grad_norm": 1.4915102033567327, "learning_rate": 4.958253836622214e-05, "loss": 0.8276, "step": 2918 }, { "epoch": 0.34614016364283173, "grad_norm": 1.4575562855826463, "learning_rate": 4.9582101426147524e-05, "loss": 0.8253, "step": 2919 }, { "epoch": 0.3462587454049567, "grad_norm": 1.3138631171233541, "learning_rate": 4.958166425945608e-05, "loss": 0.6927, "step": 2920 }, { "epoch": 0.3463773271670817, "grad_norm": 1.5390301051786683, "learning_rate": 4.958122686615183e-05, "loss": 0.749, "step": 2921 }, { "epoch": 0.3464959089292067, "grad_norm": 1.5113020196288256, "learning_rate": 4.958078924623882e-05, "loss": 0.8579, "step": 2922 }, { "epoch": 0.34661449069133166, "grad_norm": 1.5584020877082554, "learning_rate": 4.958035139972107e-05, "loss": 0.8704, "step": 2923 }, { "epoch": 0.34673307245345664, "grad_norm": 1.581652104238569, "learning_rate": 4.957991332660262e-05, "loss": 0.9784, "step": 2924 }, { "epoch": 0.3468516542155816, "grad_norm": 1.3080677917923837, "learning_rate": 4.957947502688752e-05, "loss": 0.6602, "step": 2925 }, { "epoch": 0.34697023597770665, "grad_norm": 1.6629534110270083, "learning_rate": 4.95790365005798e-05, "loss": 0.9093, "step": 2926 }, { "epoch": 0.34708881773983163, "grad_norm": 1.3684825803811067, "learning_rate": 4.95785977476835e-05, "loss": 0.7229, "step": 2927 }, { "epoch": 0.3472073995019566, "grad_norm": 1.699546159398271, "learning_rate": 4.957815876820268e-05, "loss": 0.9919, "step": 2928 }, { "epoch": 0.3473259812640816, "grad_norm": 1.4523691409449255, "learning_rate": 4.9577719562141354e-05, "loss": 0.7973, "step": 2929 }, { "epoch": 0.3474445630262066, "grad_norm": 1.7118166424198409, "learning_rate": 4.957728012950361e-05, "loss": 0.8463, "step": 2930 }, { "epoch": 0.34756314478833156, "grad_norm": 1.4668118436738335, "learning_rate": 4.957684047029348e-05, "loss": 0.7836, "step": 2931 }, { "epoch": 0.34768172655045654, "grad_norm": 1.436301002663976, "learning_rate": 4.9576400584515016e-05, "loss": 0.8215, "step": 2932 }, { "epoch": 0.3478003083125815, "grad_norm": 1.5620087406130712, "learning_rate": 4.9575960472172274e-05, "loss": 0.8432, "step": 2933 }, { "epoch": 0.3479188900747065, "grad_norm": 1.5848392804953346, "learning_rate": 4.957552013326931e-05, "loss": 0.802, "step": 2934 }, { "epoch": 0.3480374718368315, "grad_norm": 1.658576623667986, "learning_rate": 4.95750795678102e-05, "loss": 0.7934, "step": 2935 }, { "epoch": 0.34815605359895646, "grad_norm": 1.7102619746485916, "learning_rate": 4.957463877579899e-05, "loss": 0.8402, "step": 2936 }, { "epoch": 0.34827463536108144, "grad_norm": 1.4234612054929487, "learning_rate": 4.957419775723974e-05, "loss": 0.9876, "step": 2937 }, { "epoch": 0.3483932171232065, "grad_norm": 1.7310833873898996, "learning_rate": 4.957375651213651e-05, "loss": 0.9304, "step": 2938 }, { "epoch": 0.34851179888533146, "grad_norm": 1.5280488326137254, "learning_rate": 4.9573315040493396e-05, "loss": 0.7575, "step": 2939 }, { "epoch": 0.34863038064745644, "grad_norm": 1.4080776199050185, "learning_rate": 4.9572873342314454e-05, "loss": 0.6303, "step": 2940 }, { "epoch": 0.3487489624095814, "grad_norm": 1.3147304229328767, "learning_rate": 4.9572431417603734e-05, "loss": 0.6367, "step": 2941 }, { "epoch": 0.3488675441717064, "grad_norm": 1.7144716924788561, "learning_rate": 4.957198926636535e-05, "loss": 0.7634, "step": 2942 }, { "epoch": 0.3489861259338314, "grad_norm": 1.6234874142773068, "learning_rate": 4.957154688860335e-05, "loss": 0.7326, "step": 2943 }, { "epoch": 0.34910470769595636, "grad_norm": 1.556993440137025, "learning_rate": 4.957110428432181e-05, "loss": 0.8707, "step": 2944 }, { "epoch": 0.34922328945808134, "grad_norm": 1.6126807612520484, "learning_rate": 4.957066145352483e-05, "loss": 0.8636, "step": 2945 }, { "epoch": 0.3493418712202063, "grad_norm": 1.6328306517189772, "learning_rate": 4.9570218396216484e-05, "loss": 0.8413, "step": 2946 }, { "epoch": 0.3494604529823313, "grad_norm": 1.4379299411178703, "learning_rate": 4.9569775112400844e-05, "loss": 0.7204, "step": 2947 }, { "epoch": 0.3495790347444563, "grad_norm": 1.6388481213265393, "learning_rate": 4.9569331602082015e-05, "loss": 0.8935, "step": 2948 }, { "epoch": 0.34969761650658127, "grad_norm": 1.6032559555797714, "learning_rate": 4.956888786526407e-05, "loss": 0.7418, "step": 2949 }, { "epoch": 0.34981619826870625, "grad_norm": 1.4713099466171342, "learning_rate": 4.956844390195111e-05, "loss": 0.758, "step": 2950 }, { "epoch": 0.3499347800308313, "grad_norm": 1.6545557755063045, "learning_rate": 4.956799971214723e-05, "loss": 0.8655, "step": 2951 }, { "epoch": 0.35005336179295626, "grad_norm": 1.687222754277601, "learning_rate": 4.956755529585651e-05, "loss": 0.9229, "step": 2952 }, { "epoch": 0.35017194355508124, "grad_norm": 1.3206347253748691, "learning_rate": 4.956711065308307e-05, "loss": 0.5424, "step": 2953 }, { "epoch": 0.3502905253172062, "grad_norm": 1.8243610828010588, "learning_rate": 4.956666578383099e-05, "loss": 1.026, "step": 2954 }, { "epoch": 0.3504091070793312, "grad_norm": 1.6742606863312652, "learning_rate": 4.956622068810438e-05, "loss": 0.9345, "step": 2955 }, { "epoch": 0.3505276888414562, "grad_norm": 1.726377272865767, "learning_rate": 4.9565775365907334e-05, "loss": 0.8389, "step": 2956 }, { "epoch": 0.35064627060358117, "grad_norm": 1.5685248012943251, "learning_rate": 4.956532981724397e-05, "loss": 0.7085, "step": 2957 }, { "epoch": 0.35076485236570615, "grad_norm": 1.7795963662027212, "learning_rate": 4.956488404211839e-05, "loss": 0.9965, "step": 2958 }, { "epoch": 0.35088343412783113, "grad_norm": 1.9510552695806374, "learning_rate": 4.9564438040534694e-05, "loss": 0.9712, "step": 2959 }, { "epoch": 0.3510020158899561, "grad_norm": 1.3049175952627046, "learning_rate": 4.9563991812497006e-05, "loss": 0.7541, "step": 2960 }, { "epoch": 0.3511205976520811, "grad_norm": 1.49258541064587, "learning_rate": 4.9563545358009436e-05, "loss": 0.7934, "step": 2961 }, { "epoch": 0.35123917941420607, "grad_norm": 1.3036530677258027, "learning_rate": 4.9563098677076106e-05, "loss": 0.5105, "step": 2962 }, { "epoch": 0.3513577611763311, "grad_norm": 1.4782586639585151, "learning_rate": 4.956265176970112e-05, "loss": 0.7925, "step": 2963 }, { "epoch": 0.3514763429384561, "grad_norm": 1.5469636403054137, "learning_rate": 4.95622046358886e-05, "loss": 1.0086, "step": 2964 }, { "epoch": 0.35159492470058107, "grad_norm": 1.5570075982462888, "learning_rate": 4.956175727564268e-05, "loss": 0.7613, "step": 2965 }, { "epoch": 0.35171350646270605, "grad_norm": 1.573615697636306, "learning_rate": 4.9561309688967486e-05, "loss": 1.0269, "step": 2966 }, { "epoch": 0.35183208822483103, "grad_norm": 1.4107038297967127, "learning_rate": 4.956086187586712e-05, "loss": 0.7883, "step": 2967 }, { "epoch": 0.351950669986956, "grad_norm": 1.7837398546505594, "learning_rate": 4.9560413836345746e-05, "loss": 0.7122, "step": 2968 }, { "epoch": 0.352069251749081, "grad_norm": 1.5832268673622538, "learning_rate": 4.9559965570407454e-05, "loss": 0.861, "step": 2969 }, { "epoch": 0.35218783351120597, "grad_norm": 1.7826131916159293, "learning_rate": 4.955951707805641e-05, "loss": 0.7119, "step": 2970 }, { "epoch": 0.35230641527333095, "grad_norm": 1.6887855542094625, "learning_rate": 4.955906835929672e-05, "loss": 0.7696, "step": 2971 }, { "epoch": 0.35242499703545593, "grad_norm": 1.7890858742706244, "learning_rate": 4.955861941413256e-05, "loss": 0.7846, "step": 2972 }, { "epoch": 0.3525435787975809, "grad_norm": 1.4596349390894916, "learning_rate": 4.9558170242568024e-05, "loss": 0.7303, "step": 2973 }, { "epoch": 0.3526621605597059, "grad_norm": 1.923570221633485, "learning_rate": 4.955772084460728e-05, "loss": 0.8321, "step": 2974 }, { "epoch": 0.3527807423218309, "grad_norm": 1.6502747991989648, "learning_rate": 4.9557271220254467e-05, "loss": 0.7305, "step": 2975 }, { "epoch": 0.3528993240839559, "grad_norm": 1.6285583035323776, "learning_rate": 4.955682136951373e-05, "loss": 0.562, "step": 2976 }, { "epoch": 0.3530179058460809, "grad_norm": 1.6674177660973286, "learning_rate": 4.95563712923892e-05, "loss": 0.7961, "step": 2977 }, { "epoch": 0.3531364876082059, "grad_norm": 1.6808646309155737, "learning_rate": 4.955592098888505e-05, "loss": 0.9521, "step": 2978 }, { "epoch": 0.35325506937033085, "grad_norm": 1.6761801043979336, "learning_rate": 4.955547045900543e-05, "loss": 0.8625, "step": 2979 }, { "epoch": 0.35337365113245583, "grad_norm": 1.8601542255577375, "learning_rate": 4.9555019702754465e-05, "loss": 0.9694, "step": 2980 }, { "epoch": 0.3534922328945808, "grad_norm": 1.5292544616314994, "learning_rate": 4.955456872013634e-05, "loss": 0.8918, "step": 2981 }, { "epoch": 0.3536108146567058, "grad_norm": 1.4340198285115624, "learning_rate": 4.95541175111552e-05, "loss": 0.5483, "step": 2982 }, { "epoch": 0.3537293964188308, "grad_norm": 1.5800962189833856, "learning_rate": 4.955366607581521e-05, "loss": 0.9857, "step": 2983 }, { "epoch": 0.35384797818095576, "grad_norm": 1.5339571706420119, "learning_rate": 4.955321441412053e-05, "loss": 0.8825, "step": 2984 }, { "epoch": 0.35396655994308074, "grad_norm": 1.5178552469809528, "learning_rate": 4.955276252607531e-05, "loss": 0.7755, "step": 2985 }, { "epoch": 0.3540851417052057, "grad_norm": 1.569887299930257, "learning_rate": 4.9552310411683746e-05, "loss": 0.9048, "step": 2986 }, { "epoch": 0.3542037234673307, "grad_norm": 1.4289257071239987, "learning_rate": 4.9551858070949974e-05, "loss": 0.7332, "step": 2987 }, { "epoch": 0.35432230522945574, "grad_norm": 1.5354610492983838, "learning_rate": 4.9551405503878184e-05, "loss": 1.0801, "step": 2988 }, { "epoch": 0.3544408869915807, "grad_norm": 1.5137555515960313, "learning_rate": 4.955095271047254e-05, "loss": 0.7765, "step": 2989 }, { "epoch": 0.3545594687537057, "grad_norm": 1.9405116904435764, "learning_rate": 4.955049969073722e-05, "loss": 0.9585, "step": 2990 }, { "epoch": 0.3546780505158307, "grad_norm": 1.362903773780495, "learning_rate": 4.9550046444676404e-05, "loss": 0.6924, "step": 2991 }, { "epoch": 0.35479663227795566, "grad_norm": 1.5390871410249416, "learning_rate": 4.9549592972294255e-05, "loss": 0.9379, "step": 2992 }, { "epoch": 0.35491521404008064, "grad_norm": 1.4105440772947262, "learning_rate": 4.954913927359497e-05, "loss": 0.8856, "step": 2993 }, { "epoch": 0.3550337958022056, "grad_norm": 1.4581988888287054, "learning_rate": 4.9548685348582716e-05, "loss": 0.8131, "step": 2994 }, { "epoch": 0.3551523775643306, "grad_norm": 1.719569923507095, "learning_rate": 4.9548231197261694e-05, "loss": 0.7819, "step": 2995 }, { "epoch": 0.3552709593264556, "grad_norm": 1.488327990204134, "learning_rate": 4.954777681963608e-05, "loss": 0.8274, "step": 2996 }, { "epoch": 0.35538954108858056, "grad_norm": 1.7581560916023447, "learning_rate": 4.954732221571007e-05, "loss": 0.8761, "step": 2997 }, { "epoch": 0.35550812285070554, "grad_norm": 1.6533360909876231, "learning_rate": 4.954686738548785e-05, "loss": 0.7798, "step": 2998 }, { "epoch": 0.3556267046128305, "grad_norm": 1.5634067505450038, "learning_rate": 4.954641232897362e-05, "loss": 0.9799, "step": 2999 }, { "epoch": 0.3557452863749555, "grad_norm": 1.4407973692789942, "learning_rate": 4.954595704617157e-05, "loss": 0.7325, "step": 3000 }, { "epoch": 0.35586386813708054, "grad_norm": 1.5072455336585853, "learning_rate": 4.9545501537085883e-05, "loss": 0.8016, "step": 3001 }, { "epoch": 0.3559824498992055, "grad_norm": 1.5984563111311543, "learning_rate": 4.9545045801720775e-05, "loss": 0.744, "step": 3002 }, { "epoch": 0.3561010316613305, "grad_norm": 1.562704885265823, "learning_rate": 4.954458984008044e-05, "loss": 0.7576, "step": 3003 }, { "epoch": 0.3562196134234555, "grad_norm": 1.4125712517411957, "learning_rate": 4.9544133652169104e-05, "loss": 0.7392, "step": 3004 }, { "epoch": 0.35633819518558046, "grad_norm": 1.725572241192222, "learning_rate": 4.954367723799094e-05, "loss": 1.0061, "step": 3005 }, { "epoch": 0.35645677694770544, "grad_norm": 1.6055368127299405, "learning_rate": 4.9543220597550165e-05, "loss": 0.9393, "step": 3006 }, { "epoch": 0.3565753587098304, "grad_norm": 1.8379252399502464, "learning_rate": 4.9542763730851e-05, "loss": 0.9558, "step": 3007 }, { "epoch": 0.3566939404719554, "grad_norm": 1.247226392234636, "learning_rate": 4.954230663789765e-05, "loss": 0.4901, "step": 3008 }, { "epoch": 0.3568125222340804, "grad_norm": 1.4178700508244053, "learning_rate": 4.9541849318694325e-05, "loss": 0.6649, "step": 3009 }, { "epoch": 0.35693110399620537, "grad_norm": 1.4462425078907981, "learning_rate": 4.9541391773245246e-05, "loss": 0.5956, "step": 3010 }, { "epoch": 0.35704968575833035, "grad_norm": 1.5583754084070844, "learning_rate": 4.9540934001554636e-05, "loss": 0.78, "step": 3011 }, { "epoch": 0.35716826752045533, "grad_norm": 1.7814783610954232, "learning_rate": 4.95404760036267e-05, "loss": 0.8628, "step": 3012 }, { "epoch": 0.35728684928258037, "grad_norm": 1.9135780625032741, "learning_rate": 4.9540017779465674e-05, "loss": 0.7793, "step": 3013 }, { "epoch": 0.35740543104470535, "grad_norm": 2.089838912557425, "learning_rate": 4.953955932907577e-05, "loss": 1.039, "step": 3014 }, { "epoch": 0.3575240128068303, "grad_norm": 1.5963057652398853, "learning_rate": 4.953910065246123e-05, "loss": 0.7339, "step": 3015 }, { "epoch": 0.3576425945689553, "grad_norm": 1.6094023989165038, "learning_rate": 4.953864174962627e-05, "loss": 0.7641, "step": 3016 }, { "epoch": 0.3577611763310803, "grad_norm": 1.3763792051623827, "learning_rate": 4.953818262057512e-05, "loss": 0.8104, "step": 3017 }, { "epoch": 0.35787975809320527, "grad_norm": 1.534363063593355, "learning_rate": 4.9537723265312025e-05, "loss": 0.9068, "step": 3018 }, { "epoch": 0.35799833985533025, "grad_norm": 1.3666112664273016, "learning_rate": 4.9537263683841204e-05, "loss": 0.7003, "step": 3019 }, { "epoch": 0.35811692161745523, "grad_norm": 1.2125257799696063, "learning_rate": 4.953680387616691e-05, "loss": 0.4938, "step": 3020 }, { "epoch": 0.3582355033795802, "grad_norm": 1.6479222974773404, "learning_rate": 4.9536343842293364e-05, "loss": 0.8523, "step": 3021 }, { "epoch": 0.3583540851417052, "grad_norm": 1.4147977363952473, "learning_rate": 4.953588358222482e-05, "loss": 0.7956, "step": 3022 }, { "epoch": 0.3584726669038302, "grad_norm": 1.43393137191777, "learning_rate": 4.953542309596552e-05, "loss": 0.8112, "step": 3023 }, { "epoch": 0.35859124866595515, "grad_norm": 1.4328320955107607, "learning_rate": 4.953496238351971e-05, "loss": 0.6698, "step": 3024 }, { "epoch": 0.35870983042808013, "grad_norm": 1.604865243270586, "learning_rate": 4.953450144489162e-05, "loss": 0.7421, "step": 3025 }, { "epoch": 0.35882841219020517, "grad_norm": 1.5917373343691805, "learning_rate": 4.9534040280085525e-05, "loss": 0.6728, "step": 3026 }, { "epoch": 0.35894699395233015, "grad_norm": 1.6733732284370273, "learning_rate": 4.953357888910566e-05, "loss": 0.7577, "step": 3027 }, { "epoch": 0.35906557571445513, "grad_norm": 1.6558906471205739, "learning_rate": 4.953311727195629e-05, "loss": 0.8126, "step": 3028 }, { "epoch": 0.3591841574765801, "grad_norm": 1.5738568788593073, "learning_rate": 4.953265542864165e-05, "loss": 0.7169, "step": 3029 }, { "epoch": 0.3593027392387051, "grad_norm": 1.8758615822996598, "learning_rate": 4.953219335916602e-05, "loss": 0.8914, "step": 3030 }, { "epoch": 0.3594213210008301, "grad_norm": 1.5471360054195962, "learning_rate": 4.953173106353365e-05, "loss": 0.5907, "step": 3031 }, { "epoch": 0.35953990276295505, "grad_norm": 2.0368190995309, "learning_rate": 4.953126854174879e-05, "loss": 0.8821, "step": 3032 }, { "epoch": 0.35965848452508004, "grad_norm": 2.2349378020224298, "learning_rate": 4.953080579381573e-05, "loss": 0.9962, "step": 3033 }, { "epoch": 0.359777066287205, "grad_norm": 1.5120927156983903, "learning_rate": 4.953034281973872e-05, "loss": 0.7009, "step": 3034 }, { "epoch": 0.35989564804933, "grad_norm": 1.6131889636097, "learning_rate": 4.952987961952202e-05, "loss": 0.657, "step": 3035 }, { "epoch": 0.360014229811455, "grad_norm": 2.0291359075603954, "learning_rate": 4.9529416193169934e-05, "loss": 0.771, "step": 3036 }, { "epoch": 0.36013281157357996, "grad_norm": 1.489274538431269, "learning_rate": 4.952895254068669e-05, "loss": 0.854, "step": 3037 }, { "epoch": 0.360251393335705, "grad_norm": 1.5076793196768112, "learning_rate": 4.952848866207659e-05, "loss": 0.9301, "step": 3038 }, { "epoch": 0.36036997509783, "grad_norm": 1.692543501144454, "learning_rate": 4.95280245573439e-05, "loss": 1.0165, "step": 3039 }, { "epoch": 0.36048855685995496, "grad_norm": 1.5134494109263879, "learning_rate": 4.9527560226492904e-05, "loss": 0.8951, "step": 3040 }, { "epoch": 0.36060713862207994, "grad_norm": 1.5210311465001456, "learning_rate": 4.952709566952788e-05, "loss": 1.0127, "step": 3041 }, { "epoch": 0.3607257203842049, "grad_norm": 1.3956213050753752, "learning_rate": 4.9526630886453105e-05, "loss": 0.8691, "step": 3042 }, { "epoch": 0.3608443021463299, "grad_norm": 1.442554976220813, "learning_rate": 4.952616587727288e-05, "loss": 0.771, "step": 3043 }, { "epoch": 0.3609628839084549, "grad_norm": 1.461985484018066, "learning_rate": 4.952570064199148e-05, "loss": 0.787, "step": 3044 }, { "epoch": 0.36108146567057986, "grad_norm": 1.3279848473548264, "learning_rate": 4.9525235180613195e-05, "loss": 0.7694, "step": 3045 }, { "epoch": 0.36120004743270484, "grad_norm": 1.396806118629168, "learning_rate": 4.952476949314231e-05, "loss": 0.7692, "step": 3046 }, { "epoch": 0.3613186291948298, "grad_norm": 1.5233668443311017, "learning_rate": 4.952430357958313e-05, "loss": 0.6486, "step": 3047 }, { "epoch": 0.3614372109569548, "grad_norm": 1.8053216469751772, "learning_rate": 4.9523837439939944e-05, "loss": 0.9482, "step": 3048 }, { "epoch": 0.3615557927190798, "grad_norm": 1.6896153413715158, "learning_rate": 4.952337107421705e-05, "loss": 0.7094, "step": 3049 }, { "epoch": 0.36167437448120476, "grad_norm": 1.4259300385178255, "learning_rate": 4.9522904482418756e-05, "loss": 0.631, "step": 3050 }, { "epoch": 0.3617929562433298, "grad_norm": 1.7758430335378435, "learning_rate": 4.9522437664549335e-05, "loss": 1.0085, "step": 3051 }, { "epoch": 0.3619115380054548, "grad_norm": 1.8920695082568961, "learning_rate": 4.952197062061312e-05, "loss": 0.7856, "step": 3052 }, { "epoch": 0.36203011976757976, "grad_norm": 1.366156133602365, "learning_rate": 4.952150335061441e-05, "loss": 0.6513, "step": 3053 }, { "epoch": 0.36214870152970474, "grad_norm": 1.7236594256537405, "learning_rate": 4.9521035854557505e-05, "loss": 0.9887, "step": 3054 }, { "epoch": 0.3622672832918297, "grad_norm": 1.730354589363647, "learning_rate": 4.952056813244673e-05, "loss": 0.8043, "step": 3055 }, { "epoch": 0.3623858650539547, "grad_norm": 1.932169929444306, "learning_rate": 4.952010018428637e-05, "loss": 1.0445, "step": 3056 }, { "epoch": 0.3625044468160797, "grad_norm": 1.6779594466665573, "learning_rate": 4.951963201008076e-05, "loss": 0.7471, "step": 3057 }, { "epoch": 0.36262302857820466, "grad_norm": 1.482827695342862, "learning_rate": 4.9519163609834215e-05, "loss": 0.6568, "step": 3058 }, { "epoch": 0.36274161034032965, "grad_norm": 1.584255872841578, "learning_rate": 4.951869498355105e-05, "loss": 0.9082, "step": 3059 }, { "epoch": 0.3628601921024546, "grad_norm": 1.519188816814503, "learning_rate": 4.951822613123558e-05, "loss": 0.6978, "step": 3060 }, { "epoch": 0.3629787738645796, "grad_norm": 1.5248519900422424, "learning_rate": 4.951775705289213e-05, "loss": 0.7838, "step": 3061 }, { "epoch": 0.3630973556267046, "grad_norm": 1.542970212077856, "learning_rate": 4.951728774852503e-05, "loss": 0.9152, "step": 3062 }, { "epoch": 0.3632159373888296, "grad_norm": 1.8395570880387342, "learning_rate": 4.951681821813859e-05, "loss": 0.9609, "step": 3063 }, { "epoch": 0.3633345191509546, "grad_norm": 1.4848233052778923, "learning_rate": 4.9516348461737165e-05, "loss": 0.987, "step": 3064 }, { "epoch": 0.3634531009130796, "grad_norm": 1.504856122057004, "learning_rate": 4.951587847932507e-05, "loss": 0.7416, "step": 3065 }, { "epoch": 0.36357168267520457, "grad_norm": 1.4910796893326475, "learning_rate": 4.9515408270906634e-05, "loss": 0.8023, "step": 3066 }, { "epoch": 0.36369026443732955, "grad_norm": 1.383019189698515, "learning_rate": 4.95149378364862e-05, "loss": 0.9304, "step": 3067 }, { "epoch": 0.3638088461994545, "grad_norm": 1.3726024665149232, "learning_rate": 4.9514467176068095e-05, "loss": 0.6963, "step": 3068 }, { "epoch": 0.3639274279615795, "grad_norm": 1.3781340571948948, "learning_rate": 4.9513996289656675e-05, "loss": 0.7768, "step": 3069 }, { "epoch": 0.3640460097237045, "grad_norm": 1.2081512756433084, "learning_rate": 4.951352517725626e-05, "loss": 0.6691, "step": 3070 }, { "epoch": 0.36416459148582947, "grad_norm": 1.6018988435288883, "learning_rate": 4.9513053838871206e-05, "loss": 0.8019, "step": 3071 }, { "epoch": 0.36428317324795445, "grad_norm": 1.5986818618229484, "learning_rate": 4.9512582274505856e-05, "loss": 0.9968, "step": 3072 }, { "epoch": 0.36440175501007943, "grad_norm": 1.3852726037248357, "learning_rate": 4.951211048416455e-05, "loss": 0.6909, "step": 3073 }, { "epoch": 0.3645203367722044, "grad_norm": 1.3987378329116338, "learning_rate": 4.9511638467851655e-05, "loss": 0.5926, "step": 3074 }, { "epoch": 0.3646389185343294, "grad_norm": 1.3433850029677807, "learning_rate": 4.951116622557151e-05, "loss": 0.7893, "step": 3075 }, { "epoch": 0.36475750029645443, "grad_norm": 1.3974852777274687, "learning_rate": 4.9510693757328466e-05, "loss": 0.659, "step": 3076 }, { "epoch": 0.3648760820585794, "grad_norm": 1.6547634926704458, "learning_rate": 4.951022106312688e-05, "loss": 0.7087, "step": 3077 }, { "epoch": 0.3649946638207044, "grad_norm": 1.6940226680935768, "learning_rate": 4.950974814297112e-05, "loss": 0.7476, "step": 3078 }, { "epoch": 0.36511324558282937, "grad_norm": 1.3826705109332402, "learning_rate": 4.950927499686553e-05, "loss": 0.7631, "step": 3079 }, { "epoch": 0.36523182734495435, "grad_norm": 1.53213496447221, "learning_rate": 4.950880162481449e-05, "loss": 0.7835, "step": 3080 }, { "epoch": 0.36535040910707933, "grad_norm": 1.446269617506082, "learning_rate": 4.950832802682235e-05, "loss": 0.7253, "step": 3081 }, { "epoch": 0.3654689908692043, "grad_norm": 1.5452729747288778, "learning_rate": 4.9507854202893476e-05, "loss": 0.6578, "step": 3082 }, { "epoch": 0.3655875726313293, "grad_norm": 1.6925066698535058, "learning_rate": 4.9507380153032235e-05, "loss": 0.9766, "step": 3083 }, { "epoch": 0.3657061543934543, "grad_norm": 1.5861017323242954, "learning_rate": 4.950690587724301e-05, "loss": 0.7515, "step": 3084 }, { "epoch": 0.36582473615557926, "grad_norm": 1.5421785132360604, "learning_rate": 4.950643137553017e-05, "loss": 0.7139, "step": 3085 }, { "epoch": 0.36594331791770424, "grad_norm": 1.797656007354017, "learning_rate": 4.950595664789807e-05, "loss": 1.0194, "step": 3086 }, { "epoch": 0.3660618996798292, "grad_norm": 1.6612192330026478, "learning_rate": 4.950548169435111e-05, "loss": 0.7712, "step": 3087 }, { "epoch": 0.36618048144195425, "grad_norm": 1.408446272288491, "learning_rate": 4.950500651489366e-05, "loss": 0.7585, "step": 3088 }, { "epoch": 0.36629906320407923, "grad_norm": 1.4142255940348891, "learning_rate": 4.950453110953009e-05, "loss": 0.7657, "step": 3089 }, { "epoch": 0.3664176449662042, "grad_norm": 1.8816504784867716, "learning_rate": 4.950405547826481e-05, "loss": 0.935, "step": 3090 }, { "epoch": 0.3665362267283292, "grad_norm": 1.4370066479372023, "learning_rate": 4.9503579621102176e-05, "loss": 0.6169, "step": 3091 }, { "epoch": 0.3666548084904542, "grad_norm": 1.470547888839295, "learning_rate": 4.950310353804659e-05, "loss": 0.879, "step": 3092 }, { "epoch": 0.36677339025257916, "grad_norm": 1.7029642618683662, "learning_rate": 4.950262722910243e-05, "loss": 0.8014, "step": 3093 }, { "epoch": 0.36689197201470414, "grad_norm": 1.3426190199931218, "learning_rate": 4.9502150694274104e-05, "loss": 0.8665, "step": 3094 }, { "epoch": 0.3670105537768291, "grad_norm": 1.6361554051165519, "learning_rate": 4.950167393356599e-05, "loss": 0.857, "step": 3095 }, { "epoch": 0.3671291355389541, "grad_norm": 1.4564511792028059, "learning_rate": 4.9501196946982485e-05, "loss": 0.7492, "step": 3096 }, { "epoch": 0.3672477173010791, "grad_norm": 1.7369176612102415, "learning_rate": 4.9500719734527995e-05, "loss": 0.868, "step": 3097 }, { "epoch": 0.36736629906320406, "grad_norm": 1.5893430490275953, "learning_rate": 4.950024229620691e-05, "loss": 0.895, "step": 3098 }, { "epoch": 0.36748488082532904, "grad_norm": 1.7438388426276605, "learning_rate": 4.949976463202364e-05, "loss": 0.9188, "step": 3099 }, { "epoch": 0.367603462587454, "grad_norm": 1.8046485382079513, "learning_rate": 4.9499286741982583e-05, "loss": 0.9495, "step": 3100 }, { "epoch": 0.36772204434957906, "grad_norm": 1.5677319628989352, "learning_rate": 4.949880862608814e-05, "loss": 0.9387, "step": 3101 }, { "epoch": 0.36784062611170404, "grad_norm": 1.646655819356231, "learning_rate": 4.949833028434473e-05, "loss": 0.8451, "step": 3102 }, { "epoch": 0.367959207873829, "grad_norm": 1.7950707400402195, "learning_rate": 4.949785171675675e-05, "loss": 0.8476, "step": 3103 }, { "epoch": 0.368077789635954, "grad_norm": 1.3742436774358162, "learning_rate": 4.949737292332862e-05, "loss": 0.7807, "step": 3104 }, { "epoch": 0.368196371398079, "grad_norm": 1.7144668741626803, "learning_rate": 4.949689390406476e-05, "loss": 0.9348, "step": 3105 }, { "epoch": 0.36831495316020396, "grad_norm": 1.7201192604467652, "learning_rate": 4.949641465896957e-05, "loss": 0.9535, "step": 3106 }, { "epoch": 0.36843353492232894, "grad_norm": 1.5269513895875113, "learning_rate": 4.9495935188047485e-05, "loss": 0.7667, "step": 3107 }, { "epoch": 0.3685521166844539, "grad_norm": 1.4375063723949297, "learning_rate": 4.94954554913029e-05, "loss": 0.827, "step": 3108 }, { "epoch": 0.3686706984465789, "grad_norm": 1.4010376341069477, "learning_rate": 4.9494975568740264e-05, "loss": 0.7737, "step": 3109 }, { "epoch": 0.3687892802087039, "grad_norm": 1.506937707201339, "learning_rate": 4.9494495420364e-05, "loss": 0.9038, "step": 3110 }, { "epoch": 0.36890786197082887, "grad_norm": 1.4371368168437029, "learning_rate": 4.949401504617851e-05, "loss": 0.8653, "step": 3111 }, { "epoch": 0.36902644373295385, "grad_norm": 1.2752521208307115, "learning_rate": 4.949353444618825e-05, "loss": 0.748, "step": 3112 }, { "epoch": 0.3691450254950789, "grad_norm": 1.4067886922378956, "learning_rate": 4.949305362039763e-05, "loss": 0.8786, "step": 3113 }, { "epoch": 0.36926360725720386, "grad_norm": 1.5498589541877545, "learning_rate": 4.949257256881109e-05, "loss": 0.9742, "step": 3114 }, { "epoch": 0.36938218901932884, "grad_norm": 1.377755709501552, "learning_rate": 4.949209129143307e-05, "loss": 0.7635, "step": 3115 }, { "epoch": 0.3695007707814538, "grad_norm": 1.426476550326406, "learning_rate": 4.9491609788268e-05, "loss": 0.7045, "step": 3116 }, { "epoch": 0.3696193525435788, "grad_norm": 1.518607682738011, "learning_rate": 4.949112805932033e-05, "loss": 0.6794, "step": 3117 }, { "epoch": 0.3697379343057038, "grad_norm": 1.4301568253555041, "learning_rate": 4.9490646104594484e-05, "loss": 0.6418, "step": 3118 }, { "epoch": 0.36985651606782877, "grad_norm": 1.504160682879259, "learning_rate": 4.9490163924094915e-05, "loss": 0.8151, "step": 3119 }, { "epoch": 0.36997509782995375, "grad_norm": 1.4592808378482456, "learning_rate": 4.948968151782607e-05, "loss": 0.7903, "step": 3120 }, { "epoch": 0.37009367959207873, "grad_norm": 1.6521590023815609, "learning_rate": 4.9489198885792384e-05, "loss": 1.0075, "step": 3121 }, { "epoch": 0.3702122613542037, "grad_norm": 1.9312519868948987, "learning_rate": 4.948871602799832e-05, "loss": 1.0681, "step": 3122 }, { "epoch": 0.3703308431163287, "grad_norm": 1.738615563657647, "learning_rate": 4.948823294444832e-05, "loss": 0.9093, "step": 3123 }, { "epoch": 0.37044942487845367, "grad_norm": 1.8128934960529652, "learning_rate": 4.9487749635146854e-05, "loss": 0.9639, "step": 3124 }, { "epoch": 0.3705680066405787, "grad_norm": 1.3787411891821522, "learning_rate": 4.948726610009835e-05, "loss": 0.4826, "step": 3125 }, { "epoch": 0.3706865884027037, "grad_norm": 1.779818670080989, "learning_rate": 4.948678233930729e-05, "loss": 1.0307, "step": 3126 }, { "epoch": 0.37080517016482867, "grad_norm": 1.2947148610204073, "learning_rate": 4.948629835277812e-05, "loss": 0.7339, "step": 3127 }, { "epoch": 0.37092375192695365, "grad_norm": 1.4135005245490013, "learning_rate": 4.948581414051531e-05, "loss": 0.6742, "step": 3128 }, { "epoch": 0.37104233368907863, "grad_norm": 1.6987002172737578, "learning_rate": 4.948532970252332e-05, "loss": 0.917, "step": 3129 }, { "epoch": 0.3711609154512036, "grad_norm": 1.4449456552337652, "learning_rate": 4.948484503880662e-05, "loss": 0.7827, "step": 3130 }, { "epoch": 0.3712794972133286, "grad_norm": 1.4233277765094676, "learning_rate": 4.948436014936966e-05, "loss": 0.7593, "step": 3131 }, { "epoch": 0.37139807897545357, "grad_norm": 1.4465663303522114, "learning_rate": 4.948387503421693e-05, "loss": 1.0046, "step": 3132 }, { "epoch": 0.37151666073757855, "grad_norm": 1.4623876244790341, "learning_rate": 4.94833896933529e-05, "loss": 1.0284, "step": 3133 }, { "epoch": 0.37163524249970353, "grad_norm": 1.4300904266193268, "learning_rate": 4.948290412678204e-05, "loss": 0.6498, "step": 3134 }, { "epoch": 0.3717538242618285, "grad_norm": 1.4717989807132263, "learning_rate": 4.948241833450883e-05, "loss": 0.8905, "step": 3135 }, { "epoch": 0.3718724060239535, "grad_norm": 1.3262491376951644, "learning_rate": 4.9481932316537736e-05, "loss": 0.5522, "step": 3136 }, { "epoch": 0.3719909877860785, "grad_norm": 1.4481642258155922, "learning_rate": 4.948144607287326e-05, "loss": 0.9073, "step": 3137 }, { "epoch": 0.3721095695482035, "grad_norm": 1.4563062956790982, "learning_rate": 4.948095960351987e-05, "loss": 0.8969, "step": 3138 }, { "epoch": 0.3722281513103285, "grad_norm": 1.520419960996801, "learning_rate": 4.948047290848204e-05, "loss": 0.8179, "step": 3139 }, { "epoch": 0.3723467330724535, "grad_norm": 1.3450777803673666, "learning_rate": 4.947998598776428e-05, "loss": 0.8044, "step": 3140 }, { "epoch": 0.37246531483457845, "grad_norm": 1.5606968122080485, "learning_rate": 4.947949884137107e-05, "loss": 0.6789, "step": 3141 }, { "epoch": 0.37258389659670343, "grad_norm": 1.4062469307833254, "learning_rate": 4.9479011469306894e-05, "loss": 0.8931, "step": 3142 }, { "epoch": 0.3727024783588284, "grad_norm": 1.3980507100251707, "learning_rate": 4.9478523871576256e-05, "loss": 0.5461, "step": 3143 }, { "epoch": 0.3728210601209534, "grad_norm": 1.6939582675399725, "learning_rate": 4.947803604818364e-05, "loss": 0.7129, "step": 3144 }, { "epoch": 0.3729396418830784, "grad_norm": 1.4783008286089696, "learning_rate": 4.947754799913355e-05, "loss": 0.761, "step": 3145 }, { "epoch": 0.37305822364520336, "grad_norm": 1.5948483024844975, "learning_rate": 4.947705972443049e-05, "loss": 0.7131, "step": 3146 }, { "epoch": 0.37317680540732834, "grad_norm": 2.222387833273549, "learning_rate": 4.947657122407895e-05, "loss": 0.8595, "step": 3147 }, { "epoch": 0.3732953871694533, "grad_norm": 1.876941547816875, "learning_rate": 4.9476082498083436e-05, "loss": 0.9518, "step": 3148 }, { "epoch": 0.3734139689315783, "grad_norm": 1.5063571787320436, "learning_rate": 4.9475593546448464e-05, "loss": 0.7863, "step": 3149 }, { "epoch": 0.37353255069370334, "grad_norm": 1.5768965514697513, "learning_rate": 4.9475104369178525e-05, "loss": 0.6851, "step": 3150 }, { "epoch": 0.3736511324558283, "grad_norm": 1.4691952437462241, "learning_rate": 4.947461496627814e-05, "loss": 0.5351, "step": 3151 }, { "epoch": 0.3737697142179533, "grad_norm": 1.8114812930950623, "learning_rate": 4.9474125337751816e-05, "loss": 0.734, "step": 3152 }, { "epoch": 0.3738882959800783, "grad_norm": 1.4240446503931021, "learning_rate": 4.947363548360407e-05, "loss": 0.6649, "step": 3153 }, { "epoch": 0.37400687774220326, "grad_norm": 1.6531309937676175, "learning_rate": 4.947314540383942e-05, "loss": 0.7416, "step": 3154 }, { "epoch": 0.37412545950432824, "grad_norm": 1.4708664664760922, "learning_rate": 4.947265509846237e-05, "loss": 0.7357, "step": 3155 }, { "epoch": 0.3742440412664532, "grad_norm": 1.8048843383732813, "learning_rate": 4.947216456747747e-05, "loss": 1.0099, "step": 3156 }, { "epoch": 0.3743626230285782, "grad_norm": 1.5648079189735453, "learning_rate": 4.94716738108892e-05, "loss": 0.6917, "step": 3157 }, { "epoch": 0.3744812047907032, "grad_norm": 1.6302908568409067, "learning_rate": 4.9471182828702117e-05, "loss": 0.8869, "step": 3158 }, { "epoch": 0.37459978655282816, "grad_norm": 1.7228509019924438, "learning_rate": 4.947069162092074e-05, "loss": 0.6653, "step": 3159 }, { "epoch": 0.37471836831495314, "grad_norm": 1.4773590169339796, "learning_rate": 4.947020018754959e-05, "loss": 0.8771, "step": 3160 }, { "epoch": 0.3748369500770781, "grad_norm": 1.7114689799955096, "learning_rate": 4.9469708528593195e-05, "loss": 0.7831, "step": 3161 }, { "epoch": 0.3749555318392031, "grad_norm": 1.7996657150148339, "learning_rate": 4.9469216644056105e-05, "loss": 0.7412, "step": 3162 }, { "epoch": 0.37507411360132814, "grad_norm": 1.6191683638540606, "learning_rate": 4.946872453394284e-05, "loss": 0.7563, "step": 3163 }, { "epoch": 0.3751926953634531, "grad_norm": 1.3636154314902038, "learning_rate": 4.946823219825794e-05, "loss": 0.817, "step": 3164 }, { "epoch": 0.3753112771255781, "grad_norm": 1.5712974505498554, "learning_rate": 4.946773963700594e-05, "loss": 0.5072, "step": 3165 }, { "epoch": 0.3754298588877031, "grad_norm": 1.7789265812221249, "learning_rate": 4.9467246850191396e-05, "loss": 0.8249, "step": 3166 }, { "epoch": 0.37554844064982806, "grad_norm": 1.7366284351646062, "learning_rate": 4.946675383781883e-05, "loss": 0.8146, "step": 3167 }, { "epoch": 0.37566702241195304, "grad_norm": 1.6318880366672075, "learning_rate": 4.94662605998928e-05, "loss": 0.733, "step": 3168 }, { "epoch": 0.375785604174078, "grad_norm": 1.715867773946733, "learning_rate": 4.9465767136417854e-05, "loss": 0.9381, "step": 3169 }, { "epoch": 0.375904185936203, "grad_norm": 1.470873895639843, "learning_rate": 4.946527344739852e-05, "loss": 0.745, "step": 3170 }, { "epoch": 0.376022767698328, "grad_norm": 1.376725209381632, "learning_rate": 4.946477953283938e-05, "loss": 0.6302, "step": 3171 }, { "epoch": 0.37614134946045297, "grad_norm": 1.3574409045506295, "learning_rate": 4.946428539274497e-05, "loss": 0.6984, "step": 3172 }, { "epoch": 0.37625993122257795, "grad_norm": 1.6063654419338014, "learning_rate": 4.9463791027119855e-05, "loss": 0.8708, "step": 3173 }, { "epoch": 0.37637851298470293, "grad_norm": 1.675272102404614, "learning_rate": 4.946329643596859e-05, "loss": 0.9243, "step": 3174 }, { "epoch": 0.37649709474682796, "grad_norm": 1.300527882520946, "learning_rate": 4.946280161929572e-05, "loss": 0.5008, "step": 3175 }, { "epoch": 0.37661567650895295, "grad_norm": 1.4619334510515205, "learning_rate": 4.946230657710581e-05, "loss": 0.9637, "step": 3176 }, { "epoch": 0.3767342582710779, "grad_norm": 1.50147265079686, "learning_rate": 4.946181130940345e-05, "loss": 0.7774, "step": 3177 }, { "epoch": 0.3768528400332029, "grad_norm": 1.5529070294363772, "learning_rate": 4.946131581619318e-05, "loss": 0.7223, "step": 3178 }, { "epoch": 0.3769714217953279, "grad_norm": 1.537729678627995, "learning_rate": 4.946082009747957e-05, "loss": 0.9531, "step": 3179 }, { "epoch": 0.37709000355745287, "grad_norm": 1.3073230151784905, "learning_rate": 4.946032415326719e-05, "loss": 0.596, "step": 3180 }, { "epoch": 0.37720858531957785, "grad_norm": 1.574068515762463, "learning_rate": 4.945982798356062e-05, "loss": 0.7335, "step": 3181 }, { "epoch": 0.37732716708170283, "grad_norm": 1.5812123527383504, "learning_rate": 4.945933158836444e-05, "loss": 0.6088, "step": 3182 }, { "epoch": 0.3774457488438278, "grad_norm": 1.7007120737005263, "learning_rate": 4.945883496768321e-05, "loss": 1.0282, "step": 3183 }, { "epoch": 0.3775643306059528, "grad_norm": 1.5701105661628214, "learning_rate": 4.945833812152152e-05, "loss": 0.7584, "step": 3184 }, { "epoch": 0.37768291236807777, "grad_norm": 1.7147346433184019, "learning_rate": 4.945784104988394e-05, "loss": 0.8626, "step": 3185 }, { "epoch": 0.37780149413020275, "grad_norm": 1.5344752122503422, "learning_rate": 4.9457343752775056e-05, "loss": 0.7486, "step": 3186 }, { "epoch": 0.37792007589232773, "grad_norm": 1.5028270917437605, "learning_rate": 4.945684623019946e-05, "loss": 0.6737, "step": 3187 }, { "epoch": 0.37803865765445277, "grad_norm": 1.6534090106340509, "learning_rate": 4.945634848216173e-05, "loss": 0.7625, "step": 3188 }, { "epoch": 0.37815723941657775, "grad_norm": 1.6028359755042079, "learning_rate": 4.945585050866646e-05, "loss": 0.5606, "step": 3189 }, { "epoch": 0.37827582117870273, "grad_norm": 1.6952371649735012, "learning_rate": 4.945535230971823e-05, "loss": 0.9395, "step": 3190 }, { "epoch": 0.3783944029408277, "grad_norm": 1.7421054560489009, "learning_rate": 4.9454853885321646e-05, "loss": 0.8346, "step": 3191 }, { "epoch": 0.3785129847029527, "grad_norm": 1.5658393562438548, "learning_rate": 4.94543552354813e-05, "loss": 0.7322, "step": 3192 }, { "epoch": 0.3786315664650777, "grad_norm": 1.4965581241651604, "learning_rate": 4.945385636020178e-05, "loss": 0.7484, "step": 3193 }, { "epoch": 0.37875014822720265, "grad_norm": 1.6106757061715484, "learning_rate": 4.9453357259487695e-05, "loss": 0.9881, "step": 3194 }, { "epoch": 0.37886872998932764, "grad_norm": 1.9682943419689864, "learning_rate": 4.9452857933343644e-05, "loss": 0.9061, "step": 3195 }, { "epoch": 0.3789873117514526, "grad_norm": 1.7297291745174646, "learning_rate": 4.9452358381774235e-05, "loss": 0.4589, "step": 3196 }, { "epoch": 0.3791058935135776, "grad_norm": 1.5167427422582236, "learning_rate": 4.945185860478405e-05, "loss": 0.6759, "step": 3197 }, { "epoch": 0.3792244752757026, "grad_norm": 2.107242678501436, "learning_rate": 4.945135860237773e-05, "loss": 1.088, "step": 3198 }, { "epoch": 0.37934305703782756, "grad_norm": 1.7420784052269787, "learning_rate": 4.945085837455986e-05, "loss": 0.8665, "step": 3199 }, { "epoch": 0.3794616387999526, "grad_norm": 1.6553981378431084, "learning_rate": 4.945035792133507e-05, "loss": 0.8491, "step": 3200 }, { "epoch": 0.3795802205620776, "grad_norm": 1.3770407071189947, "learning_rate": 4.9449857242707945e-05, "loss": 0.6946, "step": 3201 }, { "epoch": 0.37969880232420256, "grad_norm": 1.546841518273277, "learning_rate": 4.944935633868313e-05, "loss": 0.8674, "step": 3202 }, { "epoch": 0.37981738408632754, "grad_norm": 1.5558363194793887, "learning_rate": 4.944885520926523e-05, "loss": 0.736, "step": 3203 }, { "epoch": 0.3799359658484525, "grad_norm": 1.2731785550420782, "learning_rate": 4.944835385445886e-05, "loss": 0.5703, "step": 3204 }, { "epoch": 0.3800545476105775, "grad_norm": 1.4275409889352497, "learning_rate": 4.944785227426866e-05, "loss": 0.7343, "step": 3205 }, { "epoch": 0.3801731293727025, "grad_norm": 1.628861859844902, "learning_rate": 4.944735046869924e-05, "loss": 0.5253, "step": 3206 }, { "epoch": 0.38029171113482746, "grad_norm": 1.4770288410451435, "learning_rate": 4.944684843775522e-05, "loss": 0.8356, "step": 3207 }, { "epoch": 0.38041029289695244, "grad_norm": 1.4984531193115744, "learning_rate": 4.944634618144124e-05, "loss": 0.7241, "step": 3208 }, { "epoch": 0.3805288746590774, "grad_norm": 1.7930841598970384, "learning_rate": 4.944584369976192e-05, "loss": 0.8751, "step": 3209 }, { "epoch": 0.3806474564212024, "grad_norm": 1.5391882347829557, "learning_rate": 4.944534099272191e-05, "loss": 0.6944, "step": 3210 }, { "epoch": 0.3807660381833274, "grad_norm": 1.4620488319081386, "learning_rate": 4.9444838060325824e-05, "loss": 0.5565, "step": 3211 }, { "epoch": 0.38088461994545236, "grad_norm": 1.7795897274277042, "learning_rate": 4.9444334902578315e-05, "loss": 0.6879, "step": 3212 }, { "epoch": 0.3810032017075774, "grad_norm": 2.0318057224496577, "learning_rate": 4.9443831519484006e-05, "loss": 1.0165, "step": 3213 }, { "epoch": 0.3811217834697024, "grad_norm": 1.9500451352331196, "learning_rate": 4.944332791104755e-05, "loss": 0.8578, "step": 3214 }, { "epoch": 0.38124036523182736, "grad_norm": 1.6877660051036747, "learning_rate": 4.944282407727359e-05, "loss": 0.7278, "step": 3215 }, { "epoch": 0.38135894699395234, "grad_norm": 1.4272935823812445, "learning_rate": 4.944232001816676e-05, "loss": 0.6532, "step": 3216 }, { "epoch": 0.3814775287560773, "grad_norm": 1.8710862340985495, "learning_rate": 4.944181573373171e-05, "loss": 1.0306, "step": 3217 }, { "epoch": 0.3815961105182023, "grad_norm": 1.4347736502535138, "learning_rate": 4.944131122397309e-05, "loss": 0.6131, "step": 3218 }, { "epoch": 0.3817146922803273, "grad_norm": 1.6325334188961806, "learning_rate": 4.944080648889556e-05, "loss": 0.8383, "step": 3219 }, { "epoch": 0.38183327404245226, "grad_norm": 1.3338105719880446, "learning_rate": 4.9440301528503766e-05, "loss": 0.6039, "step": 3220 }, { "epoch": 0.38195185580457724, "grad_norm": 1.4045480050218806, "learning_rate": 4.9439796342802355e-05, "loss": 0.64, "step": 3221 }, { "epoch": 0.3820704375667022, "grad_norm": 1.4466372993883514, "learning_rate": 4.943929093179599e-05, "loss": 0.7163, "step": 3222 }, { "epoch": 0.3821890193288272, "grad_norm": 1.5500648734178097, "learning_rate": 4.9438785295489345e-05, "loss": 0.734, "step": 3223 }, { "epoch": 0.3823076010909522, "grad_norm": 1.570605331982657, "learning_rate": 4.943827943388706e-05, "loss": 0.7607, "step": 3224 }, { "epoch": 0.3824261828530772, "grad_norm": 1.3646505079483133, "learning_rate": 4.94377733469938e-05, "loss": 0.8536, "step": 3225 }, { "epoch": 0.3825447646152022, "grad_norm": 1.4424544203799678, "learning_rate": 4.943726703481425e-05, "loss": 0.6229, "step": 3226 }, { "epoch": 0.3826633463773272, "grad_norm": 1.7042540177205865, "learning_rate": 4.943676049735306e-05, "loss": 0.764, "step": 3227 }, { "epoch": 0.38278192813945217, "grad_norm": 1.723461587132966, "learning_rate": 4.94362537346149e-05, "loss": 0.8225, "step": 3228 }, { "epoch": 0.38290050990157715, "grad_norm": 1.7172024465548814, "learning_rate": 4.943574674660445e-05, "loss": 0.854, "step": 3229 }, { "epoch": 0.3830190916637021, "grad_norm": 1.3082006367327952, "learning_rate": 4.9435239533326385e-05, "loss": 0.6813, "step": 3230 }, { "epoch": 0.3831376734258271, "grad_norm": 1.6411378008009438, "learning_rate": 4.943473209478537e-05, "loss": 0.797, "step": 3231 }, { "epoch": 0.3832562551879521, "grad_norm": 1.8692058454766995, "learning_rate": 4.9434224430986085e-05, "loss": 0.6085, "step": 3232 }, { "epoch": 0.38337483695007707, "grad_norm": 1.734893282235405, "learning_rate": 4.943371654193322e-05, "loss": 0.7662, "step": 3233 }, { "epoch": 0.38349341871220205, "grad_norm": 1.4542707627433027, "learning_rate": 4.943320842763145e-05, "loss": 0.6431, "step": 3234 }, { "epoch": 0.38361200047432703, "grad_norm": 1.5525541947145052, "learning_rate": 4.943270008808546e-05, "loss": 0.5759, "step": 3235 }, { "epoch": 0.383730582236452, "grad_norm": 1.7276720241415515, "learning_rate": 4.943219152329994e-05, "loss": 0.7173, "step": 3236 }, { "epoch": 0.383849163998577, "grad_norm": 1.6572381957177205, "learning_rate": 4.9431682733279574e-05, "loss": 0.7443, "step": 3237 }, { "epoch": 0.38396774576070203, "grad_norm": 1.8280189074308468, "learning_rate": 4.943117371802906e-05, "loss": 1.0962, "step": 3238 }, { "epoch": 0.384086327522827, "grad_norm": 1.7206032164545222, "learning_rate": 4.9430664477553065e-05, "loss": 0.8564, "step": 3239 }, { "epoch": 0.384204909284952, "grad_norm": 1.6109114175140071, "learning_rate": 4.943015501185632e-05, "loss": 0.689, "step": 3240 }, { "epoch": 0.38432349104707697, "grad_norm": 1.560493881262144, "learning_rate": 4.942964532094349e-05, "loss": 0.7849, "step": 3241 }, { "epoch": 0.38444207280920195, "grad_norm": 1.6714374623450774, "learning_rate": 4.942913540481929e-05, "loss": 0.8819, "step": 3242 }, { "epoch": 0.38456065457132693, "grad_norm": 1.3746458332764817, "learning_rate": 4.942862526348843e-05, "loss": 0.8442, "step": 3243 }, { "epoch": 0.3846792363334519, "grad_norm": 1.5135044505373825, "learning_rate": 4.942811489695559e-05, "loss": 0.5896, "step": 3244 }, { "epoch": 0.3847978180955769, "grad_norm": 1.5176651260072362, "learning_rate": 4.9427604305225495e-05, "loss": 0.9982, "step": 3245 }, { "epoch": 0.3849163998577019, "grad_norm": 1.6595143628642668, "learning_rate": 4.942709348830284e-05, "loss": 0.8944, "step": 3246 }, { "epoch": 0.38503498161982685, "grad_norm": 1.6771260511618922, "learning_rate": 4.9426582446192335e-05, "loss": 0.9621, "step": 3247 }, { "epoch": 0.38515356338195184, "grad_norm": 1.3944462078929023, "learning_rate": 4.942607117889869e-05, "loss": 0.7567, "step": 3248 }, { "epoch": 0.3852721451440768, "grad_norm": 1.6254229427188742, "learning_rate": 4.9425559686426635e-05, "loss": 0.9746, "step": 3249 }, { "epoch": 0.38539072690620185, "grad_norm": 1.662723354770451, "learning_rate": 4.9425047968780866e-05, "loss": 0.9029, "step": 3250 }, { "epoch": 0.38550930866832683, "grad_norm": 1.4871712832674107, "learning_rate": 4.9424536025966106e-05, "loss": 0.7813, "step": 3251 }, { "epoch": 0.3856278904304518, "grad_norm": 1.6183753481259633, "learning_rate": 4.9424023857987065e-05, "loss": 0.7846, "step": 3252 }, { "epoch": 0.3857464721925768, "grad_norm": 1.7266362277947056, "learning_rate": 4.942351146484849e-05, "loss": 0.85, "step": 3253 }, { "epoch": 0.3858650539547018, "grad_norm": 1.429297893713081, "learning_rate": 4.9422998846555084e-05, "loss": 0.8742, "step": 3254 }, { "epoch": 0.38598363571682676, "grad_norm": 1.5598735043186809, "learning_rate": 4.942248600311158e-05, "loss": 0.6862, "step": 3255 }, { "epoch": 0.38610221747895174, "grad_norm": 1.3599248605267584, "learning_rate": 4.9421972934522695e-05, "loss": 0.5351, "step": 3256 }, { "epoch": 0.3862207992410767, "grad_norm": 1.3516309865025078, "learning_rate": 4.942145964079318e-05, "loss": 0.8849, "step": 3257 }, { "epoch": 0.3863393810032017, "grad_norm": 1.6872664530749308, "learning_rate": 4.942094612192775e-05, "loss": 0.7987, "step": 3258 }, { "epoch": 0.3864579627653267, "grad_norm": 1.4703973708752998, "learning_rate": 4.942043237793114e-05, "loss": 0.8241, "step": 3259 }, { "epoch": 0.38657654452745166, "grad_norm": 1.7339408503007132, "learning_rate": 4.94199184088081e-05, "loss": 0.9833, "step": 3260 }, { "epoch": 0.38669512628957664, "grad_norm": 1.7571404510200175, "learning_rate": 4.9419404214563346e-05, "loss": 0.7578, "step": 3261 }, { "epoch": 0.3868137080517016, "grad_norm": 1.352606476313795, "learning_rate": 4.941888979520163e-05, "loss": 0.6468, "step": 3262 }, { "epoch": 0.38693228981382666, "grad_norm": 1.5753864298295939, "learning_rate": 4.94183751507277e-05, "loss": 0.7856, "step": 3263 }, { "epoch": 0.38705087157595164, "grad_norm": 1.3628070917687545, "learning_rate": 4.94178602811463e-05, "loss": 0.674, "step": 3264 }, { "epoch": 0.3871694533380766, "grad_norm": 1.4276681805990732, "learning_rate": 4.941734518646216e-05, "loss": 0.87, "step": 3265 }, { "epoch": 0.3872880351002016, "grad_norm": 1.447348059374916, "learning_rate": 4.941682986668005e-05, "loss": 0.8123, "step": 3266 }, { "epoch": 0.3874066168623266, "grad_norm": 1.577136083871205, "learning_rate": 4.9416314321804705e-05, "loss": 1.1176, "step": 3267 }, { "epoch": 0.38752519862445156, "grad_norm": 1.5432176237036788, "learning_rate": 4.9415798551840884e-05, "loss": 0.8998, "step": 3268 }, { "epoch": 0.38764378038657654, "grad_norm": 1.5614019493619116, "learning_rate": 4.941528255679334e-05, "loss": 0.8637, "step": 3269 }, { "epoch": 0.3877623621487015, "grad_norm": 1.2358149862891883, "learning_rate": 4.941476633666683e-05, "loss": 0.6726, "step": 3270 }, { "epoch": 0.3878809439108265, "grad_norm": 1.435991032563116, "learning_rate": 4.941424989146612e-05, "loss": 0.8389, "step": 3271 }, { "epoch": 0.3879995256729515, "grad_norm": 1.7090639872696989, "learning_rate": 4.9413733221195954e-05, "loss": 0.8458, "step": 3272 }, { "epoch": 0.38811810743507646, "grad_norm": 1.370839445725039, "learning_rate": 4.941321632586112e-05, "loss": 0.6159, "step": 3273 }, { "epoch": 0.38823668919720145, "grad_norm": 1.6330061753200962, "learning_rate": 4.941269920546636e-05, "loss": 0.873, "step": 3274 }, { "epoch": 0.3883552709593265, "grad_norm": 1.5734966018109338, "learning_rate": 4.941218186001645e-05, "loss": 0.9558, "step": 3275 }, { "epoch": 0.38847385272145146, "grad_norm": 1.708319256953241, "learning_rate": 4.9411664289516154e-05, "loss": 0.7515, "step": 3276 }, { "epoch": 0.38859243448357644, "grad_norm": 1.678446161771092, "learning_rate": 4.941114649397026e-05, "loss": 0.807, "step": 3277 }, { "epoch": 0.3887110162457014, "grad_norm": 2.0889158301984665, "learning_rate": 4.941062847338353e-05, "loss": 1.0104, "step": 3278 }, { "epoch": 0.3888295980078264, "grad_norm": 1.3491515048262477, "learning_rate": 4.941011022776073e-05, "loss": 0.6331, "step": 3279 }, { "epoch": 0.3889481797699514, "grad_norm": 1.8027892738195368, "learning_rate": 4.9409591757106655e-05, "loss": 0.8477, "step": 3280 }, { "epoch": 0.38906676153207637, "grad_norm": 1.6248770939591675, "learning_rate": 4.9409073061426084e-05, "loss": 1.0085, "step": 3281 }, { "epoch": 0.38918534329420135, "grad_norm": 1.5348868739595138, "learning_rate": 4.940855414072377e-05, "loss": 0.7302, "step": 3282 }, { "epoch": 0.3893039250563263, "grad_norm": 1.460753591592435, "learning_rate": 4.940803499500454e-05, "loss": 0.8878, "step": 3283 }, { "epoch": 0.3894225068184513, "grad_norm": 1.6395725770935132, "learning_rate": 4.940751562427315e-05, "loss": 0.7117, "step": 3284 }, { "epoch": 0.3895410885805763, "grad_norm": 1.7980389044425864, "learning_rate": 4.940699602853439e-05, "loss": 0.8298, "step": 3285 }, { "epoch": 0.38965967034270127, "grad_norm": 1.423547702493396, "learning_rate": 4.940647620779307e-05, "loss": 0.7047, "step": 3286 }, { "epoch": 0.38977825210482625, "grad_norm": 1.4263621663054926, "learning_rate": 4.940595616205396e-05, "loss": 0.8817, "step": 3287 }, { "epoch": 0.3898968338669513, "grad_norm": 1.422393723432806, "learning_rate": 4.940543589132186e-05, "loss": 0.7458, "step": 3288 }, { "epoch": 0.39001541562907627, "grad_norm": 1.4784881977921902, "learning_rate": 4.9404915395601574e-05, "loss": 0.6777, "step": 3289 }, { "epoch": 0.39013399739120125, "grad_norm": 1.5326485409600152, "learning_rate": 4.940439467489789e-05, "loss": 0.8657, "step": 3290 }, { "epoch": 0.39025257915332623, "grad_norm": 1.7660718154046016, "learning_rate": 4.940387372921562e-05, "loss": 0.9972, "step": 3291 }, { "epoch": 0.3903711609154512, "grad_norm": 1.7339433884538618, "learning_rate": 4.9403352558559564e-05, "loss": 0.793, "step": 3292 }, { "epoch": 0.3904897426775762, "grad_norm": 1.8361831142060703, "learning_rate": 4.940283116293452e-05, "loss": 0.7304, "step": 3293 }, { "epoch": 0.39060832443970117, "grad_norm": 1.6550205962358926, "learning_rate": 4.9402309542345294e-05, "loss": 0.8737, "step": 3294 }, { "epoch": 0.39072690620182615, "grad_norm": 1.9644551239812384, "learning_rate": 4.9401787696796695e-05, "loss": 0.7666, "step": 3295 }, { "epoch": 0.39084548796395113, "grad_norm": 1.7150059625474834, "learning_rate": 4.9401265626293534e-05, "loss": 0.8499, "step": 3296 }, { "epoch": 0.3909640697260761, "grad_norm": 1.5227352248686579, "learning_rate": 4.9400743330840634e-05, "loss": 0.8438, "step": 3297 }, { "epoch": 0.3910826514882011, "grad_norm": 1.578894035824458, "learning_rate": 4.940022081044281e-05, "loss": 0.9757, "step": 3298 }, { "epoch": 0.3912012332503261, "grad_norm": 1.6238206759532363, "learning_rate": 4.939969806510486e-05, "loss": 0.8863, "step": 3299 }, { "epoch": 0.3913198150124511, "grad_norm": 1.3440291685675794, "learning_rate": 4.939917509483162e-05, "loss": 0.7646, "step": 3300 }, { "epoch": 0.3914383967745761, "grad_norm": 1.5086282919821539, "learning_rate": 4.9398651899627904e-05, "loss": 0.6555, "step": 3301 }, { "epoch": 0.3915569785367011, "grad_norm": 1.1122777817209073, "learning_rate": 4.9398128479498535e-05, "loss": 0.4731, "step": 3302 }, { "epoch": 0.39167556029882605, "grad_norm": 1.4516725550872647, "learning_rate": 4.939760483444834e-05, "loss": 0.6484, "step": 3303 }, { "epoch": 0.39179414206095103, "grad_norm": 1.4504480110526297, "learning_rate": 4.939708096448216e-05, "loss": 0.8402, "step": 3304 }, { "epoch": 0.391912723823076, "grad_norm": 1.1918348332356694, "learning_rate": 4.939655686960479e-05, "loss": 0.6642, "step": 3305 }, { "epoch": 0.392031305585201, "grad_norm": 1.4163219298573644, "learning_rate": 4.93960325498211e-05, "loss": 0.7743, "step": 3306 }, { "epoch": 0.392149887347326, "grad_norm": 1.6273564640794183, "learning_rate": 4.939550800513589e-05, "loss": 0.8476, "step": 3307 }, { "epoch": 0.39226846910945096, "grad_norm": 1.5597893820936082, "learning_rate": 4.9394983235554026e-05, "loss": 0.7616, "step": 3308 }, { "epoch": 0.39238705087157594, "grad_norm": 1.5828427790084643, "learning_rate": 4.939445824108033e-05, "loss": 0.7975, "step": 3309 }, { "epoch": 0.3925056326337009, "grad_norm": 1.3681850527160384, "learning_rate": 4.939393302171964e-05, "loss": 0.7218, "step": 3310 }, { "epoch": 0.3926242143958259, "grad_norm": 1.6608662129483673, "learning_rate": 4.93934075774768e-05, "loss": 0.803, "step": 3311 }, { "epoch": 0.3927427961579509, "grad_norm": 1.4518046880463196, "learning_rate": 4.9392881908356656e-05, "loss": 0.7858, "step": 3312 }, { "epoch": 0.3928613779200759, "grad_norm": 1.7026444158450262, "learning_rate": 4.9392356014364064e-05, "loss": 0.8648, "step": 3313 }, { "epoch": 0.3929799596822009, "grad_norm": 1.3049209607878027, "learning_rate": 4.939182989550384e-05, "loss": 0.5429, "step": 3314 }, { "epoch": 0.3930985414443259, "grad_norm": 1.4527370492530713, "learning_rate": 4.9391303551780875e-05, "loss": 0.7274, "step": 3315 }, { "epoch": 0.39321712320645086, "grad_norm": 1.3896346423597878, "learning_rate": 4.9390776983199995e-05, "loss": 0.6508, "step": 3316 }, { "epoch": 0.39333570496857584, "grad_norm": 1.6249645979739245, "learning_rate": 4.939025018976606e-05, "loss": 0.7569, "step": 3317 }, { "epoch": 0.3934542867307008, "grad_norm": 1.5190156063919769, "learning_rate": 4.938972317148392e-05, "loss": 0.7588, "step": 3318 }, { "epoch": 0.3935728684928258, "grad_norm": 1.6685653326119232, "learning_rate": 4.9389195928358455e-05, "loss": 0.9356, "step": 3319 }, { "epoch": 0.3936914502549508, "grad_norm": 1.8987857422184213, "learning_rate": 4.93886684603945e-05, "loss": 0.8156, "step": 3320 }, { "epoch": 0.39381003201707576, "grad_norm": 1.5318532587333362, "learning_rate": 4.938814076759694e-05, "loss": 0.8025, "step": 3321 }, { "epoch": 0.39392861377920074, "grad_norm": 1.5395002117451884, "learning_rate": 4.938761284997062e-05, "loss": 0.8123, "step": 3322 }, { "epoch": 0.3940471955413257, "grad_norm": 1.5854882781999662, "learning_rate": 4.9387084707520424e-05, "loss": 0.6995, "step": 3323 }, { "epoch": 0.3941657773034507, "grad_norm": 1.7024722292091725, "learning_rate": 4.938655634025121e-05, "loss": 0.7811, "step": 3324 }, { "epoch": 0.39428435906557574, "grad_norm": 1.53892323711443, "learning_rate": 4.938602774816784e-05, "loss": 0.6055, "step": 3325 }, { "epoch": 0.3944029408277007, "grad_norm": 1.5910635459703626, "learning_rate": 4.938549893127522e-05, "loss": 0.6579, "step": 3326 }, { "epoch": 0.3945215225898257, "grad_norm": 1.4956944844954958, "learning_rate": 4.938496988957818e-05, "loss": 0.9684, "step": 3327 }, { "epoch": 0.3946401043519507, "grad_norm": 1.5579680423675262, "learning_rate": 4.9384440623081634e-05, "loss": 0.86, "step": 3328 }, { "epoch": 0.39475868611407566, "grad_norm": 1.6918983159602863, "learning_rate": 4.938391113179045e-05, "loss": 0.8824, "step": 3329 }, { "epoch": 0.39487726787620064, "grad_norm": 1.6496230498757027, "learning_rate": 4.938338141570949e-05, "loss": 0.7569, "step": 3330 }, { "epoch": 0.3949958496383256, "grad_norm": 1.6788619839830157, "learning_rate": 4.938285147484367e-05, "loss": 0.813, "step": 3331 }, { "epoch": 0.3951144314004506, "grad_norm": 1.4458562756329658, "learning_rate": 4.938232130919786e-05, "loss": 0.6295, "step": 3332 }, { "epoch": 0.3952330131625756, "grad_norm": 1.505330626191431, "learning_rate": 4.9381790918776944e-05, "loss": 0.7627, "step": 3333 }, { "epoch": 0.39535159492470057, "grad_norm": 1.4694504990661184, "learning_rate": 4.938126030358581e-05, "loss": 0.6246, "step": 3334 }, { "epoch": 0.39547017668682555, "grad_norm": 1.4346908739765836, "learning_rate": 4.938072946362936e-05, "loss": 0.7796, "step": 3335 }, { "epoch": 0.39558875844895053, "grad_norm": 1.293304158845047, "learning_rate": 4.9380198398912484e-05, "loss": 0.5415, "step": 3336 }, { "epoch": 0.3957073402110755, "grad_norm": 1.7584943751560544, "learning_rate": 4.9379667109440066e-05, "loss": 0.8135, "step": 3337 }, { "epoch": 0.39582592197320055, "grad_norm": 1.7645390824554963, "learning_rate": 4.937913559521702e-05, "loss": 0.8113, "step": 3338 }, { "epoch": 0.3959445037353255, "grad_norm": 1.4701346530124066, "learning_rate": 4.937860385624824e-05, "loss": 0.5539, "step": 3339 }, { "epoch": 0.3960630854974505, "grad_norm": 1.239994218604369, "learning_rate": 4.937807189253862e-05, "loss": 0.5161, "step": 3340 }, { "epoch": 0.3961816672595755, "grad_norm": 1.4749371391915223, "learning_rate": 4.937753970409308e-05, "loss": 0.7041, "step": 3341 }, { "epoch": 0.39630024902170047, "grad_norm": 2.29390332576408, "learning_rate": 4.937700729091652e-05, "loss": 1.0802, "step": 3342 }, { "epoch": 0.39641883078382545, "grad_norm": 2.0241809279902667, "learning_rate": 4.9376474653013836e-05, "loss": 1.0442, "step": 3343 }, { "epoch": 0.39653741254595043, "grad_norm": 1.647959462601484, "learning_rate": 4.937594179038996e-05, "loss": 0.8566, "step": 3344 }, { "epoch": 0.3966559943080754, "grad_norm": 1.5882247130810698, "learning_rate": 4.937540870304978e-05, "loss": 0.588, "step": 3345 }, { "epoch": 0.3967745760702004, "grad_norm": 1.4582133961969428, "learning_rate": 4.937487539099823e-05, "loss": 0.6158, "step": 3346 }, { "epoch": 0.39689315783232537, "grad_norm": 1.9055394927783285, "learning_rate": 4.937434185424021e-05, "loss": 0.9306, "step": 3347 }, { "epoch": 0.39701173959445035, "grad_norm": 1.6881088256853234, "learning_rate": 4.937380809278066e-05, "loss": 0.7005, "step": 3348 }, { "epoch": 0.39713032135657533, "grad_norm": 1.7204046429486144, "learning_rate": 4.9373274106624486e-05, "loss": 0.718, "step": 3349 }, { "epoch": 0.39724890311870037, "grad_norm": 1.813095108852824, "learning_rate": 4.937273989577661e-05, "loss": 0.7771, "step": 3350 }, { "epoch": 0.39736748488082535, "grad_norm": 1.6675996370654633, "learning_rate": 4.937220546024196e-05, "loss": 0.8399, "step": 3351 }, { "epoch": 0.39748606664295033, "grad_norm": 1.8288119795391033, "learning_rate": 4.937167080002546e-05, "loss": 0.9696, "step": 3352 }, { "epoch": 0.3976046484050753, "grad_norm": 1.6153792563060803, "learning_rate": 4.9371135915132046e-05, "loss": 0.7503, "step": 3353 }, { "epoch": 0.3977232301672003, "grad_norm": 1.2700027823378905, "learning_rate": 4.9370600805566644e-05, "loss": 0.82, "step": 3354 }, { "epoch": 0.3978418119293253, "grad_norm": 1.364088379972212, "learning_rate": 4.9370065471334195e-05, "loss": 0.6156, "step": 3355 }, { "epoch": 0.39796039369145025, "grad_norm": 1.503570511929708, "learning_rate": 4.936952991243961e-05, "loss": 0.8318, "step": 3356 }, { "epoch": 0.39807897545357523, "grad_norm": 1.4208174714595145, "learning_rate": 4.936899412888786e-05, "loss": 0.5889, "step": 3357 }, { "epoch": 0.3981975572157002, "grad_norm": 1.263580305900277, "learning_rate": 4.936845812068386e-05, "loss": 0.7254, "step": 3358 }, { "epoch": 0.3983161389778252, "grad_norm": 1.6364896714071715, "learning_rate": 4.9367921887832555e-05, "loss": 0.6205, "step": 3359 }, { "epoch": 0.3984347207399502, "grad_norm": 1.6880092895329795, "learning_rate": 4.93673854303389e-05, "loss": 0.7069, "step": 3360 }, { "epoch": 0.39855330250207516, "grad_norm": 1.7146483234076988, "learning_rate": 4.9366848748207825e-05, "loss": 0.9251, "step": 3361 }, { "epoch": 0.3986718842642002, "grad_norm": 1.2106349928792526, "learning_rate": 4.93663118414443e-05, "loss": 0.4962, "step": 3362 }, { "epoch": 0.3987904660263252, "grad_norm": 1.5855827639331197, "learning_rate": 4.936577471005325e-05, "loss": 0.892, "step": 3363 }, { "epoch": 0.39890904778845016, "grad_norm": 1.5816083378500552, "learning_rate": 4.936523735403963e-05, "loss": 0.6376, "step": 3364 }, { "epoch": 0.39902762955057514, "grad_norm": 1.4855177848230925, "learning_rate": 4.9364699773408406e-05, "loss": 0.6907, "step": 3365 }, { "epoch": 0.3991462113127001, "grad_norm": 1.3170190881884658, "learning_rate": 4.9364161968164524e-05, "loss": 0.6577, "step": 3366 }, { "epoch": 0.3992647930748251, "grad_norm": 1.6298589072215264, "learning_rate": 4.936362393831295e-05, "loss": 0.8367, "step": 3367 }, { "epoch": 0.3993833748369501, "grad_norm": 1.5387479753800952, "learning_rate": 4.9363085683858644e-05, "loss": 0.9639, "step": 3368 }, { "epoch": 0.39950195659907506, "grad_norm": 1.5261479950227446, "learning_rate": 4.936254720480656e-05, "loss": 0.7021, "step": 3369 }, { "epoch": 0.39962053836120004, "grad_norm": 1.9399829320051425, "learning_rate": 4.936200850116166e-05, "loss": 0.8731, "step": 3370 }, { "epoch": 0.399739120123325, "grad_norm": 1.4958363778256412, "learning_rate": 4.9361469572928925e-05, "loss": 0.6063, "step": 3371 }, { "epoch": 0.39985770188545, "grad_norm": 1.6236347143526728, "learning_rate": 4.936093042011331e-05, "loss": 0.9497, "step": 3372 }, { "epoch": 0.399976283647575, "grad_norm": 1.8810137861852714, "learning_rate": 4.9360391042719786e-05, "loss": 0.9255, "step": 3373 }, { "epoch": 0.40009486540969996, "grad_norm": 1.3787712495858466, "learning_rate": 4.935985144075334e-05, "loss": 0.6541, "step": 3374 }, { "epoch": 0.400213447171825, "grad_norm": 1.5274191748814043, "learning_rate": 4.9359311614218925e-05, "loss": 0.9342, "step": 3375 }, { "epoch": 0.40033202893395, "grad_norm": 1.344509935976214, "learning_rate": 4.9358771563121544e-05, "loss": 0.4745, "step": 3376 }, { "epoch": 0.40045061069607496, "grad_norm": 1.6576988718001862, "learning_rate": 4.935823128746615e-05, "loss": 0.8457, "step": 3377 }, { "epoch": 0.40056919245819994, "grad_norm": 1.5238953304913703, "learning_rate": 4.9357690787257727e-05, "loss": 0.8041, "step": 3378 }, { "epoch": 0.4006877742203249, "grad_norm": 1.405020032574401, "learning_rate": 4.935715006250127e-05, "loss": 0.6469, "step": 3379 }, { "epoch": 0.4008063559824499, "grad_norm": 1.5413253189765799, "learning_rate": 4.9356609113201765e-05, "loss": 0.8746, "step": 3380 }, { "epoch": 0.4009249377445749, "grad_norm": 1.5114623058462089, "learning_rate": 4.935606793936418e-05, "loss": 0.8747, "step": 3381 }, { "epoch": 0.40104351950669986, "grad_norm": 1.2867647258247454, "learning_rate": 4.935552654099352e-05, "loss": 0.8175, "step": 3382 }, { "epoch": 0.40116210126882484, "grad_norm": 1.3478712432482556, "learning_rate": 4.935498491809478e-05, "loss": 0.6771, "step": 3383 }, { "epoch": 0.4012806830309498, "grad_norm": 1.7330315836675476, "learning_rate": 4.935444307067294e-05, "loss": 0.7197, "step": 3384 }, { "epoch": 0.4013992647930748, "grad_norm": 1.4665619216901866, "learning_rate": 4.935390099873299e-05, "loss": 0.7805, "step": 3385 }, { "epoch": 0.4015178465551998, "grad_norm": 1.440689315931767, "learning_rate": 4.935335870227995e-05, "loss": 0.8184, "step": 3386 }, { "epoch": 0.4016364283173248, "grad_norm": 1.6097100683343846, "learning_rate": 4.93528161813188e-05, "loss": 0.8286, "step": 3387 }, { "epoch": 0.4017550100794498, "grad_norm": 1.5429713573431623, "learning_rate": 4.935227343585456e-05, "loss": 0.9283, "step": 3388 }, { "epoch": 0.4018735918415748, "grad_norm": 1.3851640255278754, "learning_rate": 4.935173046589221e-05, "loss": 0.8534, "step": 3389 }, { "epoch": 0.40199217360369977, "grad_norm": 1.5980185114582355, "learning_rate": 4.935118727143677e-05, "loss": 0.8216, "step": 3390 }, { "epoch": 0.40211075536582475, "grad_norm": 1.628011852553332, "learning_rate": 4.9350643852493246e-05, "loss": 0.9178, "step": 3391 }, { "epoch": 0.4022293371279497, "grad_norm": 1.2916267016545147, "learning_rate": 4.935010020906664e-05, "loss": 0.7533, "step": 3392 }, { "epoch": 0.4023479188900747, "grad_norm": 1.3867717963000399, "learning_rate": 4.934955634116198e-05, "loss": 0.5335, "step": 3393 }, { "epoch": 0.4024665006521997, "grad_norm": 1.5893106206145253, "learning_rate": 4.934901224878427e-05, "loss": 0.8569, "step": 3394 }, { "epoch": 0.40258508241432467, "grad_norm": 1.1778019412265597, "learning_rate": 4.9348467931938513e-05, "loss": 0.5039, "step": 3395 }, { "epoch": 0.40270366417644965, "grad_norm": 1.8783924132261551, "learning_rate": 4.9347923390629754e-05, "loss": 1.0726, "step": 3396 }, { "epoch": 0.40282224593857463, "grad_norm": 1.2022616088968383, "learning_rate": 4.9347378624862996e-05, "loss": 0.5291, "step": 3397 }, { "epoch": 0.4029408277006996, "grad_norm": 1.4995959226947364, "learning_rate": 4.9346833634643254e-05, "loss": 0.75, "step": 3398 }, { "epoch": 0.4030594094628246, "grad_norm": 1.8034446989076438, "learning_rate": 4.934628841997557e-05, "loss": 0.7166, "step": 3399 }, { "epoch": 0.40317799122494963, "grad_norm": 1.3233313178205286, "learning_rate": 4.9345742980864965e-05, "loss": 0.6449, "step": 3400 }, { "epoch": 0.4032965729870746, "grad_norm": 1.4255309221165924, "learning_rate": 4.934519731731645e-05, "loss": 0.7342, "step": 3401 }, { "epoch": 0.4034151547491996, "grad_norm": 1.5792627725796746, "learning_rate": 4.934465142933509e-05, "loss": 0.6658, "step": 3402 }, { "epoch": 0.40353373651132457, "grad_norm": 1.727653623241166, "learning_rate": 4.934410531692588e-05, "loss": 0.7189, "step": 3403 }, { "epoch": 0.40365231827344955, "grad_norm": 2.1020966399488366, "learning_rate": 4.9343558980093886e-05, "loss": 0.8857, "step": 3404 }, { "epoch": 0.40377090003557453, "grad_norm": 1.7388821362600073, "learning_rate": 4.934301241884412e-05, "loss": 0.9631, "step": 3405 }, { "epoch": 0.4038894817976995, "grad_norm": 1.4400083498352596, "learning_rate": 4.934246563318163e-05, "loss": 0.4774, "step": 3406 }, { "epoch": 0.4040080635598245, "grad_norm": 1.9420579513930956, "learning_rate": 4.9341918623111465e-05, "loss": 0.6372, "step": 3407 }, { "epoch": 0.4041266453219495, "grad_norm": 1.5099200276144495, "learning_rate": 4.9341371388638654e-05, "loss": 0.9154, "step": 3408 }, { "epoch": 0.40424522708407445, "grad_norm": 1.5452078716447772, "learning_rate": 4.9340823929768256e-05, "loss": 0.7251, "step": 3409 }, { "epoch": 0.40436380884619944, "grad_norm": 1.3661487184230467, "learning_rate": 4.9340276246505304e-05, "loss": 0.6121, "step": 3410 }, { "epoch": 0.4044823906083244, "grad_norm": 1.961458200481552, "learning_rate": 4.9339728338854854e-05, "loss": 1.0919, "step": 3411 }, { "epoch": 0.40460097237044945, "grad_norm": 1.5911094321446138, "learning_rate": 4.9339180206821955e-05, "loss": 0.7319, "step": 3412 }, { "epoch": 0.40471955413257443, "grad_norm": 1.7372527818237073, "learning_rate": 4.933863185041167e-05, "loss": 0.766, "step": 3413 }, { "epoch": 0.4048381358946994, "grad_norm": 1.4697016060135826, "learning_rate": 4.9338083269629033e-05, "loss": 0.7045, "step": 3414 }, { "epoch": 0.4049567176568244, "grad_norm": 1.3166802504153585, "learning_rate": 4.9337534464479115e-05, "loss": 0.6637, "step": 3415 }, { "epoch": 0.4050752994189494, "grad_norm": 1.277464774511906, "learning_rate": 4.9336985434966974e-05, "loss": 0.5968, "step": 3416 }, { "epoch": 0.40519388118107436, "grad_norm": 1.5931220291623915, "learning_rate": 4.933643618109768e-05, "loss": 0.9288, "step": 3417 }, { "epoch": 0.40531246294319934, "grad_norm": 1.2518560200232918, "learning_rate": 4.933588670287628e-05, "loss": 0.7393, "step": 3418 }, { "epoch": 0.4054310447053243, "grad_norm": 1.5079765308091393, "learning_rate": 4.933533700030785e-05, "loss": 0.6285, "step": 3419 }, { "epoch": 0.4055496264674493, "grad_norm": 1.6601107216223718, "learning_rate": 4.9334787073397457e-05, "loss": 0.5472, "step": 3420 }, { "epoch": 0.4056682082295743, "grad_norm": 1.3242192456250603, "learning_rate": 4.9334236922150165e-05, "loss": 0.6104, "step": 3421 }, { "epoch": 0.40578678999169926, "grad_norm": 1.7438692663798263, "learning_rate": 4.933368654657105e-05, "loss": 0.9018, "step": 3422 }, { "epoch": 0.40590537175382424, "grad_norm": 1.4367483782157127, "learning_rate": 4.9333135946665184e-05, "loss": 0.6488, "step": 3423 }, { "epoch": 0.4060239535159492, "grad_norm": 1.5978080753042538, "learning_rate": 4.9332585122437645e-05, "loss": 0.6762, "step": 3424 }, { "epoch": 0.40614253527807426, "grad_norm": 1.4488677975801816, "learning_rate": 4.933203407389351e-05, "loss": 0.5466, "step": 3425 }, { "epoch": 0.40626111704019924, "grad_norm": 1.5824385254384985, "learning_rate": 4.9331482801037856e-05, "loss": 0.7685, "step": 3426 }, { "epoch": 0.4063796988023242, "grad_norm": 1.9380282970927374, "learning_rate": 4.933093130387577e-05, "loss": 1.0251, "step": 3427 }, { "epoch": 0.4064982805644492, "grad_norm": 1.4500720640721474, "learning_rate": 4.933037958241233e-05, "loss": 0.6312, "step": 3428 }, { "epoch": 0.4066168623265742, "grad_norm": 1.5916545605778114, "learning_rate": 4.9329827636652634e-05, "loss": 0.7453, "step": 3429 }, { "epoch": 0.40673544408869916, "grad_norm": 1.6341541487652664, "learning_rate": 4.932927546660176e-05, "loss": 0.6945, "step": 3430 }, { "epoch": 0.40685402585082414, "grad_norm": 1.7196416855281644, "learning_rate": 4.9328723072264796e-05, "loss": 0.8219, "step": 3431 }, { "epoch": 0.4069726076129491, "grad_norm": 1.5825117829133402, "learning_rate": 4.932817045364684e-05, "loss": 0.7061, "step": 3432 }, { "epoch": 0.4070911893750741, "grad_norm": 1.5792277168757893, "learning_rate": 4.932761761075299e-05, "loss": 0.7178, "step": 3433 }, { "epoch": 0.4072097711371991, "grad_norm": 1.5135792919178124, "learning_rate": 4.932706454358834e-05, "loss": 0.7541, "step": 3434 }, { "epoch": 0.40732835289932406, "grad_norm": 1.44129597228733, "learning_rate": 4.932651125215798e-05, "loss": 0.5207, "step": 3435 }, { "epoch": 0.40744693466144905, "grad_norm": 1.418601843407384, "learning_rate": 4.9325957736467024e-05, "loss": 0.82, "step": 3436 }, { "epoch": 0.4075655164235741, "grad_norm": 1.6785701775057487, "learning_rate": 4.9325403996520556e-05, "loss": 0.8429, "step": 3437 }, { "epoch": 0.40768409818569906, "grad_norm": 1.5274176025786574, "learning_rate": 4.932485003232371e-05, "loss": 0.7568, "step": 3438 }, { "epoch": 0.40780267994782404, "grad_norm": 1.4593084696703231, "learning_rate": 4.932429584388156e-05, "loss": 0.8834, "step": 3439 }, { "epoch": 0.407921261709949, "grad_norm": 1.5031065216817876, "learning_rate": 4.932374143119924e-05, "loss": 0.6498, "step": 3440 }, { "epoch": 0.408039843472074, "grad_norm": 1.5074483218756476, "learning_rate": 4.9323186794281854e-05, "loss": 0.9409, "step": 3441 }, { "epoch": 0.408158425234199, "grad_norm": 1.3944935465291692, "learning_rate": 4.93226319331345e-05, "loss": 0.6616, "step": 3442 }, { "epoch": 0.40827700699632397, "grad_norm": 1.7637032208267687, "learning_rate": 4.9322076847762325e-05, "loss": 0.886, "step": 3443 }, { "epoch": 0.40839558875844895, "grad_norm": 1.5682612317748061, "learning_rate": 4.932152153817041e-05, "loss": 0.7478, "step": 3444 }, { "epoch": 0.4085141705205739, "grad_norm": 1.5443493968539377, "learning_rate": 4.93209660043639e-05, "loss": 0.8019, "step": 3445 }, { "epoch": 0.4086327522826989, "grad_norm": 1.5744348073970764, "learning_rate": 4.93204102463479e-05, "loss": 0.8721, "step": 3446 }, { "epoch": 0.4087513340448239, "grad_norm": 1.5048826586613848, "learning_rate": 4.931985426412755e-05, "loss": 0.7389, "step": 3447 }, { "epoch": 0.40886991580694887, "grad_norm": 1.6222487926822102, "learning_rate": 4.9319298057707963e-05, "loss": 0.7643, "step": 3448 }, { "epoch": 0.40898849756907385, "grad_norm": 1.3656604469741485, "learning_rate": 4.9318741627094274e-05, "loss": 0.6644, "step": 3449 }, { "epoch": 0.4091070793311989, "grad_norm": 1.6069248588623468, "learning_rate": 4.9318184972291605e-05, "loss": 0.8331, "step": 3450 }, { "epoch": 0.40922566109332387, "grad_norm": 1.6028723093677715, "learning_rate": 4.931762809330509e-05, "loss": 0.8465, "step": 3451 }, { "epoch": 0.40934424285544885, "grad_norm": 1.6336867309047571, "learning_rate": 4.931707099013987e-05, "loss": 0.7729, "step": 3452 }, { "epoch": 0.40946282461757383, "grad_norm": 1.5954690811340562, "learning_rate": 4.931651366280107e-05, "loss": 0.8472, "step": 3453 }, { "epoch": 0.4095814063796988, "grad_norm": 1.4966391219210247, "learning_rate": 4.9315956111293834e-05, "loss": 0.6397, "step": 3454 }, { "epoch": 0.4096999881418238, "grad_norm": 1.4440028027098661, "learning_rate": 4.93153983356233e-05, "loss": 0.7314, "step": 3455 }, { "epoch": 0.40981856990394877, "grad_norm": 1.5852238093115323, "learning_rate": 4.931484033579461e-05, "loss": 0.5464, "step": 3456 }, { "epoch": 0.40993715166607375, "grad_norm": 1.5729654258586305, "learning_rate": 4.931428211181292e-05, "loss": 0.7612, "step": 3457 }, { "epoch": 0.41005573342819873, "grad_norm": 1.3487204548532536, "learning_rate": 4.9313723663683347e-05, "loss": 0.4851, "step": 3458 }, { "epoch": 0.4101743151903237, "grad_norm": 1.4905212843137303, "learning_rate": 4.9313164991411064e-05, "loss": 0.6278, "step": 3459 }, { "epoch": 0.4102928969524487, "grad_norm": 1.3755600685344214, "learning_rate": 4.9312606095001226e-05, "loss": 0.7017, "step": 3460 }, { "epoch": 0.4104114787145737, "grad_norm": 1.8752606132270313, "learning_rate": 4.931204697445896e-05, "loss": 0.7082, "step": 3461 }, { "epoch": 0.4105300604766987, "grad_norm": 1.577139917113333, "learning_rate": 4.9311487629789435e-05, "loss": 0.6306, "step": 3462 }, { "epoch": 0.4106486422388237, "grad_norm": 1.5914606207220774, "learning_rate": 4.9310928060997816e-05, "loss": 0.8667, "step": 3463 }, { "epoch": 0.41076722400094867, "grad_norm": 1.7841461888097216, "learning_rate": 4.931036826808925e-05, "loss": 0.8442, "step": 3464 }, { "epoch": 0.41088580576307365, "grad_norm": 1.5518005588465547, "learning_rate": 4.930980825106889e-05, "loss": 0.6227, "step": 3465 }, { "epoch": 0.41100438752519863, "grad_norm": 1.7562414908245274, "learning_rate": 4.9309248009941914e-05, "loss": 0.725, "step": 3466 }, { "epoch": 0.4111229692873236, "grad_norm": 1.4494233795813607, "learning_rate": 4.930868754471348e-05, "loss": 0.6459, "step": 3467 }, { "epoch": 0.4112415510494486, "grad_norm": 1.442957451447209, "learning_rate": 4.9308126855388766e-05, "loss": 0.7303, "step": 3468 }, { "epoch": 0.4113601328115736, "grad_norm": 1.708564518054293, "learning_rate": 4.930756594197292e-05, "loss": 0.9952, "step": 3469 }, { "epoch": 0.41147871457369856, "grad_norm": 1.5289381108194489, "learning_rate": 4.930700480447113e-05, "loss": 0.5792, "step": 3470 }, { "epoch": 0.41159729633582354, "grad_norm": 1.439925454497166, "learning_rate": 4.930644344288856e-05, "loss": 0.5569, "step": 3471 }, { "epoch": 0.4117158780979485, "grad_norm": 1.617093750744133, "learning_rate": 4.930588185723039e-05, "loss": 0.9871, "step": 3472 }, { "epoch": 0.4118344598600735, "grad_norm": 1.4453161687567218, "learning_rate": 4.930532004750179e-05, "loss": 0.768, "step": 3473 }, { "epoch": 0.4119530416221985, "grad_norm": 1.4527607790123462, "learning_rate": 4.9304758013707955e-05, "loss": 0.7126, "step": 3474 }, { "epoch": 0.4120716233843235, "grad_norm": 1.5060731668936405, "learning_rate": 4.9304195755854046e-05, "loss": 0.7668, "step": 3475 }, { "epoch": 0.4121902051464485, "grad_norm": 1.5718959237329786, "learning_rate": 4.930363327394526e-05, "loss": 0.7469, "step": 3476 }, { "epoch": 0.4123087869085735, "grad_norm": 1.6925607504881741, "learning_rate": 4.9303070567986776e-05, "loss": 0.7178, "step": 3477 }, { "epoch": 0.41242736867069846, "grad_norm": 1.6654667133548968, "learning_rate": 4.930250763798378e-05, "loss": 0.8337, "step": 3478 }, { "epoch": 0.41254595043282344, "grad_norm": 1.6908092294152437, "learning_rate": 4.9301944483941477e-05, "loss": 0.8391, "step": 3479 }, { "epoch": 0.4126645321949484, "grad_norm": 1.5207017324353465, "learning_rate": 4.930138110586504e-05, "loss": 0.7875, "step": 3480 }, { "epoch": 0.4127831139570734, "grad_norm": 1.9390192450079304, "learning_rate": 4.9300817503759666e-05, "loss": 0.6412, "step": 3481 }, { "epoch": 0.4129016957191984, "grad_norm": 1.5247446077824394, "learning_rate": 4.930025367763056e-05, "loss": 0.7827, "step": 3482 }, { "epoch": 0.41302027748132336, "grad_norm": 1.4392877651827398, "learning_rate": 4.929968962748291e-05, "loss": 0.7235, "step": 3483 }, { "epoch": 0.41313885924344834, "grad_norm": 1.7384104651675722, "learning_rate": 4.929912535332192e-05, "loss": 0.8048, "step": 3484 }, { "epoch": 0.4132574410055733, "grad_norm": 1.2921828206814736, "learning_rate": 4.9298560855152795e-05, "loss": 0.467, "step": 3485 }, { "epoch": 0.4133760227676983, "grad_norm": 1.5098218903705045, "learning_rate": 4.9297996132980734e-05, "loss": 0.5982, "step": 3486 }, { "epoch": 0.41349460452982334, "grad_norm": 1.498529510436589, "learning_rate": 4.9297431186810946e-05, "loss": 0.6793, "step": 3487 }, { "epoch": 0.4136131862919483, "grad_norm": 2.012622463968173, "learning_rate": 4.9296866016648635e-05, "loss": 1.0554, "step": 3488 }, { "epoch": 0.4137317680540733, "grad_norm": 1.304370021550811, "learning_rate": 4.929630062249901e-05, "loss": 0.54, "step": 3489 }, { "epoch": 0.4138503498161983, "grad_norm": 1.5910058209386557, "learning_rate": 4.92957350043673e-05, "loss": 0.7642, "step": 3490 }, { "epoch": 0.41396893157832326, "grad_norm": 1.6771178219265093, "learning_rate": 4.9295169162258706e-05, "loss": 0.8248, "step": 3491 }, { "epoch": 0.41408751334044824, "grad_norm": 1.7524390891311574, "learning_rate": 4.929460309617844e-05, "loss": 0.7506, "step": 3492 }, { "epoch": 0.4142060951025732, "grad_norm": 1.3186552976242494, "learning_rate": 4.9294036806131716e-05, "loss": 0.4944, "step": 3493 }, { "epoch": 0.4143246768646982, "grad_norm": 1.5676041178828677, "learning_rate": 4.929347029212378e-05, "loss": 0.6818, "step": 3494 }, { "epoch": 0.4144432586268232, "grad_norm": 1.2629778387783006, "learning_rate": 4.9292903554159825e-05, "loss": 0.6744, "step": 3495 }, { "epoch": 0.41456184038894817, "grad_norm": 1.5543199898423563, "learning_rate": 4.929233659224509e-05, "loss": 0.9598, "step": 3496 }, { "epoch": 0.41468042215107315, "grad_norm": 1.392466191742914, "learning_rate": 4.9291769406384815e-05, "loss": 0.5856, "step": 3497 }, { "epoch": 0.4147990039131981, "grad_norm": 1.4195353735619303, "learning_rate": 4.92912019965842e-05, "loss": 0.6725, "step": 3498 }, { "epoch": 0.4149175856753231, "grad_norm": 1.7339981168627936, "learning_rate": 4.9290634362848496e-05, "loss": 0.8707, "step": 3499 }, { "epoch": 0.41503616743744814, "grad_norm": 1.6001785860816717, "learning_rate": 4.929006650518293e-05, "loss": 0.8114, "step": 3500 }, { "epoch": 0.4151547491995731, "grad_norm": 1.386350899960089, "learning_rate": 4.928949842359274e-05, "loss": 0.8131, "step": 3501 }, { "epoch": 0.4152733309616981, "grad_norm": 1.8402233478375718, "learning_rate": 4.928893011808316e-05, "loss": 0.7719, "step": 3502 }, { "epoch": 0.4153919127238231, "grad_norm": 1.5626656738683897, "learning_rate": 4.9288361588659425e-05, "loss": 0.961, "step": 3503 }, { "epoch": 0.41551049448594807, "grad_norm": 1.9040769285908825, "learning_rate": 4.928779283532679e-05, "loss": 1.0175, "step": 3504 }, { "epoch": 0.41562907624807305, "grad_norm": 1.5862661063074501, "learning_rate": 4.9287223858090475e-05, "loss": 0.6777, "step": 3505 }, { "epoch": 0.41574765801019803, "grad_norm": 1.4479039744577782, "learning_rate": 4.928665465695574e-05, "loss": 0.5448, "step": 3506 }, { "epoch": 0.415866239772323, "grad_norm": 1.8070495012197811, "learning_rate": 4.928608523192784e-05, "loss": 0.859, "step": 3507 }, { "epoch": 0.415984821534448, "grad_norm": 1.576782667520276, "learning_rate": 4.9285515583012014e-05, "loss": 0.7788, "step": 3508 }, { "epoch": 0.41610340329657297, "grad_norm": 1.564038324448268, "learning_rate": 4.9284945710213514e-05, "loss": 0.72, "step": 3509 }, { "epoch": 0.41622198505869795, "grad_norm": 1.637643711315041, "learning_rate": 4.928437561353759e-05, "loss": 0.6804, "step": 3510 }, { "epoch": 0.41634056682082293, "grad_norm": 1.393310972684991, "learning_rate": 4.9283805292989514e-05, "loss": 0.6632, "step": 3511 }, { "epoch": 0.41645914858294797, "grad_norm": 1.3798318232090196, "learning_rate": 4.9283234748574524e-05, "loss": 0.6344, "step": 3512 }, { "epoch": 0.41657773034507295, "grad_norm": 1.7109262826729672, "learning_rate": 4.928266398029789e-05, "loss": 0.8602, "step": 3513 }, { "epoch": 0.41669631210719793, "grad_norm": 1.5900011759965962, "learning_rate": 4.928209298816487e-05, "loss": 0.7648, "step": 3514 }, { "epoch": 0.4168148938693229, "grad_norm": 1.854129126487447, "learning_rate": 4.928152177218073e-05, "loss": 0.9259, "step": 3515 }, { "epoch": 0.4169334756314479, "grad_norm": 1.5047202853638824, "learning_rate": 4.9280950332350725e-05, "loss": 0.7767, "step": 3516 }, { "epoch": 0.4170520573935729, "grad_norm": 1.8199421049242315, "learning_rate": 4.928037866868015e-05, "loss": 0.9127, "step": 3517 }, { "epoch": 0.41717063915569785, "grad_norm": 1.4314315752243256, "learning_rate": 4.927980678117426e-05, "loss": 0.6471, "step": 3518 }, { "epoch": 0.41728922091782283, "grad_norm": 1.3300663970491629, "learning_rate": 4.9279234669838304e-05, "loss": 0.8425, "step": 3519 }, { "epoch": 0.4174078026799478, "grad_norm": 1.4923891540841085, "learning_rate": 4.9278662334677594e-05, "loss": 0.6898, "step": 3520 }, { "epoch": 0.4175263844420728, "grad_norm": 1.5828728769972298, "learning_rate": 4.9278089775697386e-05, "loss": 0.8851, "step": 3521 }, { "epoch": 0.4176449662041978, "grad_norm": 1.5270001649144636, "learning_rate": 4.927751699290296e-05, "loss": 0.5907, "step": 3522 }, { "epoch": 0.41776354796632276, "grad_norm": 1.43211114771798, "learning_rate": 4.92769439862996e-05, "loss": 0.5983, "step": 3523 }, { "epoch": 0.41788212972844774, "grad_norm": 1.7378783966698976, "learning_rate": 4.927637075589259e-05, "loss": 0.8189, "step": 3524 }, { "epoch": 0.4180007114905728, "grad_norm": 1.8966172742010539, "learning_rate": 4.92757973016872e-05, "loss": 0.9898, "step": 3525 }, { "epoch": 0.41811929325269775, "grad_norm": 1.3002902120509119, "learning_rate": 4.927522362368874e-05, "loss": 0.6857, "step": 3526 }, { "epoch": 0.41823787501482274, "grad_norm": 1.4700536867257123, "learning_rate": 4.9274649721902476e-05, "loss": 0.814, "step": 3527 }, { "epoch": 0.4183564567769477, "grad_norm": 1.6486968124252013, "learning_rate": 4.927407559633373e-05, "loss": 0.657, "step": 3528 }, { "epoch": 0.4184750385390727, "grad_norm": 1.784845725906869, "learning_rate": 4.927350124698775e-05, "loss": 0.7814, "step": 3529 }, { "epoch": 0.4185936203011977, "grad_norm": 1.3335203233829882, "learning_rate": 4.927292667386986e-05, "loss": 0.606, "step": 3530 }, { "epoch": 0.41871220206332266, "grad_norm": 1.2989491997378126, "learning_rate": 4.927235187698536e-05, "loss": 0.6775, "step": 3531 }, { "epoch": 0.41883078382544764, "grad_norm": 1.4535845474685962, "learning_rate": 4.9271776856339535e-05, "loss": 0.7103, "step": 3532 }, { "epoch": 0.4189493655875726, "grad_norm": 1.5546267145940014, "learning_rate": 4.927120161193769e-05, "loss": 0.7025, "step": 3533 }, { "epoch": 0.4190679473496976, "grad_norm": 1.6460273879388636, "learning_rate": 4.927062614378514e-05, "loss": 0.8167, "step": 3534 }, { "epoch": 0.4191865291118226, "grad_norm": 1.3792996867155898, "learning_rate": 4.927005045188716e-05, "loss": 0.555, "step": 3535 }, { "epoch": 0.41930511087394756, "grad_norm": 1.432479930635415, "learning_rate": 4.9269474536249094e-05, "loss": 0.5637, "step": 3536 }, { "epoch": 0.4194236926360726, "grad_norm": 1.3652992542784572, "learning_rate": 4.926889839687623e-05, "loss": 0.6221, "step": 3537 }, { "epoch": 0.4195422743981976, "grad_norm": 1.3187452075054733, "learning_rate": 4.926832203377388e-05, "loss": 0.6293, "step": 3538 }, { "epoch": 0.41966085616032256, "grad_norm": 1.6330179847261659, "learning_rate": 4.9267745446947356e-05, "loss": 0.6778, "step": 3539 }, { "epoch": 0.41977943792244754, "grad_norm": 1.975727614539572, "learning_rate": 4.926716863640198e-05, "loss": 1.0026, "step": 3540 }, { "epoch": 0.4198980196845725, "grad_norm": 1.958121796048007, "learning_rate": 4.926659160214307e-05, "loss": 0.7335, "step": 3541 }, { "epoch": 0.4200166014466975, "grad_norm": 1.6498080152457262, "learning_rate": 4.926601434417595e-05, "loss": 0.6311, "step": 3542 }, { "epoch": 0.4201351832088225, "grad_norm": 1.7130506452521574, "learning_rate": 4.926543686250593e-05, "loss": 0.8736, "step": 3543 }, { "epoch": 0.42025376497094746, "grad_norm": 1.5842018150112882, "learning_rate": 4.9264859157138345e-05, "loss": 0.8008, "step": 3544 }, { "epoch": 0.42037234673307244, "grad_norm": 1.8150917980430235, "learning_rate": 4.92642812280785e-05, "loss": 0.9648, "step": 3545 }, { "epoch": 0.4204909284951974, "grad_norm": 1.6833113126739736, "learning_rate": 4.9263703075331745e-05, "loss": 0.726, "step": 3546 }, { "epoch": 0.4206095102573224, "grad_norm": 1.6405329373035606, "learning_rate": 4.9263124698903404e-05, "loss": 0.6916, "step": 3547 }, { "epoch": 0.4207280920194474, "grad_norm": 1.423317558747031, "learning_rate": 4.92625460987988e-05, "loss": 0.7874, "step": 3548 }, { "epoch": 0.42084667378157237, "grad_norm": 1.9312037684789953, "learning_rate": 4.926196727502328e-05, "loss": 1.0744, "step": 3549 }, { "epoch": 0.4209652555436974, "grad_norm": 1.5607958927225387, "learning_rate": 4.9261388227582164e-05, "loss": 0.7294, "step": 3550 }, { "epoch": 0.4210838373058224, "grad_norm": 1.5326288238551535, "learning_rate": 4.926080895648081e-05, "loss": 0.5466, "step": 3551 }, { "epoch": 0.42120241906794736, "grad_norm": 1.4614499762077864, "learning_rate": 4.926022946172454e-05, "loss": 0.5313, "step": 3552 }, { "epoch": 0.42132100083007235, "grad_norm": 1.2185440339609745, "learning_rate": 4.9259649743318715e-05, "loss": 0.6272, "step": 3553 }, { "epoch": 0.4214395825921973, "grad_norm": 1.23296350371015, "learning_rate": 4.925906980126866e-05, "loss": 0.6447, "step": 3554 }, { "epoch": 0.4215581643543223, "grad_norm": 1.5527661833930966, "learning_rate": 4.925848963557973e-05, "loss": 0.5731, "step": 3555 }, { "epoch": 0.4216767461164473, "grad_norm": 1.416556079216272, "learning_rate": 4.925790924625727e-05, "loss": 0.7796, "step": 3556 }, { "epoch": 0.42179532787857227, "grad_norm": 1.4456527337256488, "learning_rate": 4.925732863330664e-05, "loss": 0.6367, "step": 3557 }, { "epoch": 0.42191390964069725, "grad_norm": 1.5393838260331123, "learning_rate": 4.925674779673319e-05, "loss": 0.4778, "step": 3558 }, { "epoch": 0.42203249140282223, "grad_norm": 1.1712302774523844, "learning_rate": 4.925616673654226e-05, "loss": 0.4387, "step": 3559 }, { "epoch": 0.4221510731649472, "grad_norm": 1.7376767364009615, "learning_rate": 4.925558545273923e-05, "loss": 0.8326, "step": 3560 }, { "epoch": 0.4222696549270722, "grad_norm": 1.781427047547457, "learning_rate": 4.925500394532944e-05, "loss": 0.785, "step": 3561 }, { "epoch": 0.4223882366891972, "grad_norm": 2.062542179565209, "learning_rate": 4.925442221431825e-05, "loss": 0.7379, "step": 3562 }, { "epoch": 0.4225068184513222, "grad_norm": 1.69686844221876, "learning_rate": 4.9253840259711044e-05, "loss": 0.7686, "step": 3563 }, { "epoch": 0.4226254002134472, "grad_norm": 1.6955283678200248, "learning_rate": 4.925325808151316e-05, "loss": 0.7466, "step": 3564 }, { "epoch": 0.42274398197557217, "grad_norm": 1.846277668698648, "learning_rate": 4.925267567972999e-05, "loss": 0.816, "step": 3565 }, { "epoch": 0.42286256373769715, "grad_norm": 1.8550378681862272, "learning_rate": 4.925209305436688e-05, "loss": 0.7164, "step": 3566 }, { "epoch": 0.42298114549982213, "grad_norm": 1.826908253972868, "learning_rate": 4.925151020542922e-05, "loss": 0.916, "step": 3567 }, { "epoch": 0.4230997272619471, "grad_norm": 1.3874577073278027, "learning_rate": 4.925092713292237e-05, "loss": 0.6935, "step": 3568 }, { "epoch": 0.4232183090240721, "grad_norm": 1.5609961314884795, "learning_rate": 4.925034383685171e-05, "loss": 0.7876, "step": 3569 }, { "epoch": 0.4233368907861971, "grad_norm": 1.4713092804102157, "learning_rate": 4.924976031722262e-05, "loss": 0.6411, "step": 3570 }, { "epoch": 0.42345547254832205, "grad_norm": 1.4704412599397514, "learning_rate": 4.924917657404048e-05, "loss": 0.7979, "step": 3571 }, { "epoch": 0.42357405431044703, "grad_norm": 1.520337509263915, "learning_rate": 4.924859260731066e-05, "loss": 0.7182, "step": 3572 }, { "epoch": 0.423692636072572, "grad_norm": 1.3856828313346083, "learning_rate": 4.924800841703856e-05, "loss": 0.6619, "step": 3573 }, { "epoch": 0.423811217834697, "grad_norm": 1.4983785203297317, "learning_rate": 4.9247424003229545e-05, "loss": 0.6431, "step": 3574 }, { "epoch": 0.42392979959682203, "grad_norm": 1.3017804331234106, "learning_rate": 4.924683936588903e-05, "loss": 0.6925, "step": 3575 }, { "epoch": 0.424048381358947, "grad_norm": 1.2734043362734362, "learning_rate": 4.9246254505022386e-05, "loss": 0.6866, "step": 3576 }, { "epoch": 0.424166963121072, "grad_norm": 1.6619665718376493, "learning_rate": 4.9245669420635e-05, "loss": 0.6716, "step": 3577 }, { "epoch": 0.424285544883197, "grad_norm": 1.486131225865649, "learning_rate": 4.9245084112732276e-05, "loss": 0.4649, "step": 3578 }, { "epoch": 0.42440412664532196, "grad_norm": 1.2932597737835567, "learning_rate": 4.924449858131961e-05, "loss": 0.5919, "step": 3579 }, { "epoch": 0.42452270840744694, "grad_norm": 2.2149386210146718, "learning_rate": 4.924391282640241e-05, "loss": 0.9465, "step": 3580 }, { "epoch": 0.4246412901695719, "grad_norm": 1.547804336265583, "learning_rate": 4.9243326847986043e-05, "loss": 0.7911, "step": 3581 }, { "epoch": 0.4247598719316969, "grad_norm": 1.4684751791866157, "learning_rate": 4.9242740646075944e-05, "loss": 0.5629, "step": 3582 }, { "epoch": 0.4248784536938219, "grad_norm": 2.00338335890547, "learning_rate": 4.9242154220677496e-05, "loss": 0.8294, "step": 3583 }, { "epoch": 0.42499703545594686, "grad_norm": 1.4541086795040787, "learning_rate": 4.924156757179612e-05, "loss": 0.785, "step": 3584 }, { "epoch": 0.42511561721807184, "grad_norm": 1.377526745544194, "learning_rate": 4.924098069943722e-05, "loss": 0.6608, "step": 3585 }, { "epoch": 0.4252341989801968, "grad_norm": 1.5842606218308786, "learning_rate": 4.92403936036062e-05, "loss": 0.7119, "step": 3586 }, { "epoch": 0.42535278074232186, "grad_norm": 1.4559660956518086, "learning_rate": 4.923980628430848e-05, "loss": 0.5201, "step": 3587 }, { "epoch": 0.42547136250444684, "grad_norm": 2.0140504689614858, "learning_rate": 4.9239218741549465e-05, "loss": 1.0032, "step": 3588 }, { "epoch": 0.4255899442665718, "grad_norm": 1.6600662389753644, "learning_rate": 4.923863097533458e-05, "loss": 0.7328, "step": 3589 }, { "epoch": 0.4257085260286968, "grad_norm": 1.7216566567609004, "learning_rate": 4.923804298566924e-05, "loss": 0.776, "step": 3590 }, { "epoch": 0.4258271077908218, "grad_norm": 1.6586163931785904, "learning_rate": 4.9237454772558875e-05, "loss": 0.768, "step": 3591 }, { "epoch": 0.42594568955294676, "grad_norm": 1.4797600851048218, "learning_rate": 4.923686633600889e-05, "loss": 0.6951, "step": 3592 }, { "epoch": 0.42606427131507174, "grad_norm": 1.9780805386359785, "learning_rate": 4.923627767602472e-05, "loss": 0.8154, "step": 3593 }, { "epoch": 0.4261828530771967, "grad_norm": 1.746785944833219, "learning_rate": 4.923568879261179e-05, "loss": 0.9425, "step": 3594 }, { "epoch": 0.4263014348393217, "grad_norm": 1.8735693465200096, "learning_rate": 4.923509968577553e-05, "loss": 0.7171, "step": 3595 }, { "epoch": 0.4264200166014467, "grad_norm": 1.3773955300560434, "learning_rate": 4.923451035552137e-05, "loss": 0.7619, "step": 3596 }, { "epoch": 0.42653859836357166, "grad_norm": 1.6528130108882608, "learning_rate": 4.9233920801854735e-05, "loss": 0.7374, "step": 3597 }, { "epoch": 0.42665718012569664, "grad_norm": 1.5627615246274467, "learning_rate": 4.923333102478108e-05, "loss": 0.6958, "step": 3598 }, { "epoch": 0.4267757618878217, "grad_norm": 1.1670083686722073, "learning_rate": 4.9232741024305827e-05, "loss": 0.5657, "step": 3599 }, { "epoch": 0.42689434364994666, "grad_norm": 1.2898307555228037, "learning_rate": 4.9232150800434415e-05, "loss": 0.5877, "step": 3600 }, { "epoch": 0.42701292541207164, "grad_norm": 1.6677942892979996, "learning_rate": 4.9231560353172293e-05, "loss": 1.029, "step": 3601 }, { "epoch": 0.4271315071741966, "grad_norm": 1.324527837267684, "learning_rate": 4.923096968252489e-05, "loss": 0.8308, "step": 3602 }, { "epoch": 0.4272500889363216, "grad_norm": 1.5354695954732414, "learning_rate": 4.923037878849767e-05, "loss": 0.7351, "step": 3603 }, { "epoch": 0.4273686706984466, "grad_norm": 1.2628997696640223, "learning_rate": 4.9229787671096064e-05, "loss": 0.5528, "step": 3604 }, { "epoch": 0.42748725246057157, "grad_norm": 1.475423898243588, "learning_rate": 4.922919633032553e-05, "loss": 0.6277, "step": 3605 }, { "epoch": 0.42760583422269655, "grad_norm": 1.5262853356718367, "learning_rate": 4.922860476619152e-05, "loss": 0.6486, "step": 3606 }, { "epoch": 0.4277244159848215, "grad_norm": 1.7899732784824407, "learning_rate": 4.9228012978699485e-05, "loss": 0.753, "step": 3607 }, { "epoch": 0.4278429977469465, "grad_norm": 1.152657026452765, "learning_rate": 4.9227420967854876e-05, "loss": 0.4617, "step": 3608 }, { "epoch": 0.4279615795090715, "grad_norm": 1.5508543915785118, "learning_rate": 4.922682873366316e-05, "loss": 0.6744, "step": 3609 }, { "epoch": 0.42808016127119647, "grad_norm": 2.041771537754529, "learning_rate": 4.922623627612979e-05, "loss": 0.8721, "step": 3610 }, { "epoch": 0.42819874303332145, "grad_norm": 1.5526423343742715, "learning_rate": 4.9225643595260226e-05, "loss": 0.6584, "step": 3611 }, { "epoch": 0.4283173247954465, "grad_norm": 1.429012045737173, "learning_rate": 4.922505069105995e-05, "loss": 0.6408, "step": 3612 }, { "epoch": 0.42843590655757147, "grad_norm": 1.6461667999253002, "learning_rate": 4.92244575635344e-05, "loss": 0.7386, "step": 3613 }, { "epoch": 0.42855448831969645, "grad_norm": 1.6958129230644987, "learning_rate": 4.922386421268906e-05, "loss": 0.7408, "step": 3614 }, { "epoch": 0.42867307008182143, "grad_norm": 1.3678971934138342, "learning_rate": 4.9223270638529395e-05, "loss": 0.6246, "step": 3615 }, { "epoch": 0.4287916518439464, "grad_norm": 2.2514740346975484, "learning_rate": 4.9222676841060884e-05, "loss": 0.9973, "step": 3616 }, { "epoch": 0.4289102336060714, "grad_norm": 1.7456557993212967, "learning_rate": 4.9222082820288995e-05, "loss": 0.6772, "step": 3617 }, { "epoch": 0.42902881536819637, "grad_norm": 1.5644205018426343, "learning_rate": 4.9221488576219204e-05, "loss": 0.7423, "step": 3618 }, { "epoch": 0.42914739713032135, "grad_norm": 1.5231342306745959, "learning_rate": 4.9220894108856993e-05, "loss": 0.7717, "step": 3619 }, { "epoch": 0.42926597889244633, "grad_norm": 1.5828451146834837, "learning_rate": 4.922029941820784e-05, "loss": 0.9748, "step": 3620 }, { "epoch": 0.4293845606545713, "grad_norm": 1.57544429361408, "learning_rate": 4.921970450427722e-05, "loss": 0.8111, "step": 3621 }, { "epoch": 0.4295031424166963, "grad_norm": 1.3832598400448304, "learning_rate": 4.9219109367070634e-05, "loss": 0.8215, "step": 3622 }, { "epoch": 0.4296217241788213, "grad_norm": 1.4107701692129464, "learning_rate": 4.921851400659355e-05, "loss": 0.8369, "step": 3623 }, { "epoch": 0.4297403059409463, "grad_norm": 1.5164034619063047, "learning_rate": 4.921791842285147e-05, "loss": 0.7799, "step": 3624 }, { "epoch": 0.4298588877030713, "grad_norm": 1.326331700257152, "learning_rate": 4.921732261584989e-05, "loss": 0.5584, "step": 3625 }, { "epoch": 0.42997746946519627, "grad_norm": 1.7467407500074026, "learning_rate": 4.921672658559428e-05, "loss": 1.0535, "step": 3626 }, { "epoch": 0.43009605122732125, "grad_norm": 1.472094091833865, "learning_rate": 4.921613033209015e-05, "loss": 0.6261, "step": 3627 }, { "epoch": 0.43021463298944623, "grad_norm": 1.5939412527840942, "learning_rate": 4.9215533855343e-05, "loss": 0.827, "step": 3628 }, { "epoch": 0.4303332147515712, "grad_norm": 1.4491678625367623, "learning_rate": 4.9214937155358314e-05, "loss": 0.7466, "step": 3629 }, { "epoch": 0.4304517965136962, "grad_norm": 1.7112870242884934, "learning_rate": 4.921434023214161e-05, "loss": 0.9231, "step": 3630 }, { "epoch": 0.4305703782758212, "grad_norm": 1.486700237721347, "learning_rate": 4.921374308569837e-05, "loss": 0.7307, "step": 3631 }, { "epoch": 0.43068896003794616, "grad_norm": 1.494024944603728, "learning_rate": 4.9213145716034126e-05, "loss": 0.5931, "step": 3632 }, { "epoch": 0.43080754180007114, "grad_norm": 1.400027914651356, "learning_rate": 4.921254812315437e-05, "loss": 0.6403, "step": 3633 }, { "epoch": 0.4309261235621961, "grad_norm": 1.9638210870232098, "learning_rate": 4.92119503070646e-05, "loss": 0.7431, "step": 3634 }, { "epoch": 0.4310447053243211, "grad_norm": 1.7555278779462034, "learning_rate": 4.921135226777035e-05, "loss": 0.743, "step": 3635 }, { "epoch": 0.4311632870864461, "grad_norm": 1.6976217148129749, "learning_rate": 4.921075400527712e-05, "loss": 0.714, "step": 3636 }, { "epoch": 0.4312818688485711, "grad_norm": 1.5782072039517103, "learning_rate": 4.9210155519590415e-05, "loss": 0.8035, "step": 3637 }, { "epoch": 0.4314004506106961, "grad_norm": 1.4685744763962134, "learning_rate": 4.9209556810715776e-05, "loss": 0.7166, "step": 3638 }, { "epoch": 0.4315190323728211, "grad_norm": 1.4634655088235458, "learning_rate": 4.920895787865871e-05, "loss": 0.8275, "step": 3639 }, { "epoch": 0.43163761413494606, "grad_norm": 1.3656673551094536, "learning_rate": 4.920835872342474e-05, "loss": 0.5801, "step": 3640 }, { "epoch": 0.43175619589707104, "grad_norm": 1.0789543302385225, "learning_rate": 4.920775934501939e-05, "loss": 0.4636, "step": 3641 }, { "epoch": 0.431874777659196, "grad_norm": 1.691919714558414, "learning_rate": 4.920715974344817e-05, "loss": 0.7791, "step": 3642 }, { "epoch": 0.431993359421321, "grad_norm": 1.7764887615934934, "learning_rate": 4.9206559918716646e-05, "loss": 0.8471, "step": 3643 }, { "epoch": 0.432111941183446, "grad_norm": 1.6564892670750262, "learning_rate": 4.92059598708303e-05, "loss": 0.6381, "step": 3644 }, { "epoch": 0.43223052294557096, "grad_norm": 1.4607827108712397, "learning_rate": 4.9205359599794705e-05, "loss": 0.7051, "step": 3645 }, { "epoch": 0.43234910470769594, "grad_norm": 1.462463283883136, "learning_rate": 4.9204759105615374e-05, "loss": 0.6712, "step": 3646 }, { "epoch": 0.4324676864698209, "grad_norm": 1.3440757466446647, "learning_rate": 4.920415838829784e-05, "loss": 0.8212, "step": 3647 }, { "epoch": 0.4325862682319459, "grad_norm": 1.5180572758932502, "learning_rate": 4.920355744784765e-05, "loss": 0.6539, "step": 3648 }, { "epoch": 0.43270484999407094, "grad_norm": 1.6734326087894855, "learning_rate": 4.920295628427034e-05, "loss": 0.707, "step": 3649 }, { "epoch": 0.4328234317561959, "grad_norm": 1.371012472460331, "learning_rate": 4.920235489757145e-05, "loss": 0.6526, "step": 3650 }, { "epoch": 0.4329420135183209, "grad_norm": 1.5815516194415, "learning_rate": 4.9201753287756525e-05, "loss": 0.7194, "step": 3651 }, { "epoch": 0.4330605952804459, "grad_norm": 1.4451949171689138, "learning_rate": 4.920115145483112e-05, "loss": 0.6913, "step": 3652 }, { "epoch": 0.43317917704257086, "grad_norm": 1.8469045899301022, "learning_rate": 4.9200549398800776e-05, "loss": 0.8504, "step": 3653 }, { "epoch": 0.43329775880469584, "grad_norm": 1.2330769137244062, "learning_rate": 4.919994711967104e-05, "loss": 0.4834, "step": 3654 }, { "epoch": 0.4334163405668208, "grad_norm": 1.6734947812265601, "learning_rate": 4.919934461744746e-05, "loss": 0.8655, "step": 3655 }, { "epoch": 0.4335349223289458, "grad_norm": 1.7832224715093994, "learning_rate": 4.9198741892135615e-05, "loss": 0.6894, "step": 3656 }, { "epoch": 0.4336535040910708, "grad_norm": 1.5120723118565969, "learning_rate": 4.919813894374103e-05, "loss": 0.7809, "step": 3657 }, { "epoch": 0.43377208585319577, "grad_norm": 1.4009153144787523, "learning_rate": 4.919753577226928e-05, "loss": 0.7459, "step": 3658 }, { "epoch": 0.43389066761532075, "grad_norm": 1.758782589767061, "learning_rate": 4.919693237772593e-05, "loss": 0.8602, "step": 3659 }, { "epoch": 0.4340092493774457, "grad_norm": 1.8162248823592884, "learning_rate": 4.919632876011653e-05, "loss": 0.851, "step": 3660 }, { "epoch": 0.4341278311395707, "grad_norm": 1.5342316444953457, "learning_rate": 4.9195724919446654e-05, "loss": 0.8674, "step": 3661 }, { "epoch": 0.43424641290169574, "grad_norm": 1.71097521152454, "learning_rate": 4.919512085572186e-05, "loss": 0.8472, "step": 3662 }, { "epoch": 0.4343649946638207, "grad_norm": 1.2693315980607447, "learning_rate": 4.919451656894772e-05, "loss": 0.6663, "step": 3663 }, { "epoch": 0.4344835764259457, "grad_norm": 1.500321649820223, "learning_rate": 4.9193912059129814e-05, "loss": 0.7773, "step": 3664 }, { "epoch": 0.4346021581880707, "grad_norm": 1.4185710089312993, "learning_rate": 4.9193307326273704e-05, "loss": 0.5322, "step": 3665 }, { "epoch": 0.43472073995019567, "grad_norm": 1.3746099979327262, "learning_rate": 4.919270237038497e-05, "loss": 0.702, "step": 3666 }, { "epoch": 0.43483932171232065, "grad_norm": 1.3613530508839398, "learning_rate": 4.919209719146919e-05, "loss": 0.8026, "step": 3667 }, { "epoch": 0.43495790347444563, "grad_norm": 1.4506416775954576, "learning_rate": 4.9191491789531926e-05, "loss": 0.6914, "step": 3668 }, { "epoch": 0.4350764852365706, "grad_norm": 1.3045897269564, "learning_rate": 4.9190886164578785e-05, "loss": 0.5917, "step": 3669 }, { "epoch": 0.4351950669986956, "grad_norm": 1.2444689054122857, "learning_rate": 4.9190280316615347e-05, "loss": 0.601, "step": 3670 }, { "epoch": 0.43531364876082057, "grad_norm": 1.4338860185182447, "learning_rate": 4.9189674245647174e-05, "loss": 0.6681, "step": 3671 }, { "epoch": 0.43543223052294555, "grad_norm": 1.8262187051994045, "learning_rate": 4.9189067951679866e-05, "loss": 0.8211, "step": 3672 }, { "epoch": 0.43555081228507053, "grad_norm": 1.5012674199011737, "learning_rate": 4.9188461434719025e-05, "loss": 0.5294, "step": 3673 }, { "epoch": 0.43566939404719557, "grad_norm": 1.8051003770663558, "learning_rate": 4.918785469477022e-05, "loss": 0.8733, "step": 3674 }, { "epoch": 0.43578797580932055, "grad_norm": 1.7767769820440422, "learning_rate": 4.918724773183907e-05, "loss": 0.7308, "step": 3675 }, { "epoch": 0.43590655757144553, "grad_norm": 1.659089147148862, "learning_rate": 4.918664054593114e-05, "loss": 0.5209, "step": 3676 }, { "epoch": 0.4360251393335705, "grad_norm": 1.4663770956953726, "learning_rate": 4.9186033137052055e-05, "loss": 0.593, "step": 3677 }, { "epoch": 0.4361437210956955, "grad_norm": 1.656563482684807, "learning_rate": 4.918542550520741e-05, "loss": 0.8125, "step": 3678 }, { "epoch": 0.43626230285782047, "grad_norm": 2.719523158507605, "learning_rate": 4.9184817650402784e-05, "loss": 0.7172, "step": 3679 }, { "epoch": 0.43638088461994545, "grad_norm": 1.5811272923199091, "learning_rate": 4.91842095726438e-05, "loss": 0.6138, "step": 3680 }, { "epoch": 0.43649946638207043, "grad_norm": 1.9327206016566538, "learning_rate": 4.918360127193606e-05, "loss": 1.0, "step": 3681 }, { "epoch": 0.4366180481441954, "grad_norm": 1.8397618233140558, "learning_rate": 4.918299274828519e-05, "loss": 0.8983, "step": 3682 }, { "epoch": 0.4367366299063204, "grad_norm": 1.4124432541445955, "learning_rate": 4.918238400169676e-05, "loss": 0.568, "step": 3683 }, { "epoch": 0.4368552116684454, "grad_norm": 1.327914249272055, "learning_rate": 4.918177503217641e-05, "loss": 0.4976, "step": 3684 }, { "epoch": 0.43697379343057036, "grad_norm": 1.7923800931396248, "learning_rate": 4.918116583972975e-05, "loss": 0.6769, "step": 3685 }, { "epoch": 0.43709237519269534, "grad_norm": 1.3925756345635616, "learning_rate": 4.9180556424362396e-05, "loss": 0.6912, "step": 3686 }, { "epoch": 0.4372109569548204, "grad_norm": 1.4603745488788336, "learning_rate": 4.917994678607996e-05, "loss": 0.855, "step": 3687 }, { "epoch": 0.43732953871694535, "grad_norm": 1.8095696079747157, "learning_rate": 4.917933692488807e-05, "loss": 0.7382, "step": 3688 }, { "epoch": 0.43744812047907033, "grad_norm": 1.849922042895717, "learning_rate": 4.917872684079234e-05, "loss": 0.9788, "step": 3689 }, { "epoch": 0.4375667022411953, "grad_norm": 1.7149869859652864, "learning_rate": 4.91781165337984e-05, "loss": 0.7345, "step": 3690 }, { "epoch": 0.4376852840033203, "grad_norm": 1.4496827083648698, "learning_rate": 4.917750600391188e-05, "loss": 0.583, "step": 3691 }, { "epoch": 0.4378038657654453, "grad_norm": 1.5589365568101325, "learning_rate": 4.917689525113839e-05, "loss": 0.753, "step": 3692 }, { "epoch": 0.43792244752757026, "grad_norm": 1.4597175602431152, "learning_rate": 4.917628427548358e-05, "loss": 0.6889, "step": 3693 }, { "epoch": 0.43804102928969524, "grad_norm": 1.823583848891648, "learning_rate": 4.9175673076953076e-05, "loss": 0.8768, "step": 3694 }, { "epoch": 0.4381596110518202, "grad_norm": 1.7123028027872849, "learning_rate": 4.917506165555252e-05, "loss": 0.6102, "step": 3695 }, { "epoch": 0.4382781928139452, "grad_norm": 1.4893953471738448, "learning_rate": 4.917445001128753e-05, "loss": 0.6914, "step": 3696 }, { "epoch": 0.4383967745760702, "grad_norm": 1.4332809003314921, "learning_rate": 4.9173838144163765e-05, "loss": 0.7672, "step": 3697 }, { "epoch": 0.43851535633819516, "grad_norm": 1.8586618413710596, "learning_rate": 4.917322605418685e-05, "loss": 0.9991, "step": 3698 }, { "epoch": 0.4386339381003202, "grad_norm": 1.440583183728854, "learning_rate": 4.9172613741362436e-05, "loss": 0.7482, "step": 3699 }, { "epoch": 0.4387525198624452, "grad_norm": 1.5492880173245118, "learning_rate": 4.917200120569616e-05, "loss": 0.9971, "step": 3700 }, { "epoch": 0.43887110162457016, "grad_norm": 1.3793141066878345, "learning_rate": 4.917138844719368e-05, "loss": 0.6898, "step": 3701 }, { "epoch": 0.43898968338669514, "grad_norm": 1.6170643774535185, "learning_rate": 4.917077546586064e-05, "loss": 0.9576, "step": 3702 }, { "epoch": 0.4391082651488201, "grad_norm": 1.3901001031585087, "learning_rate": 4.917016226170269e-05, "loss": 0.5633, "step": 3703 }, { "epoch": 0.4392268469109451, "grad_norm": 1.3449762708164168, "learning_rate": 4.9169548834725486e-05, "loss": 0.7096, "step": 3704 }, { "epoch": 0.4393454286730701, "grad_norm": 1.2260697050694453, "learning_rate": 4.9168935184934674e-05, "loss": 0.5112, "step": 3705 }, { "epoch": 0.43946401043519506, "grad_norm": 1.5825576765995726, "learning_rate": 4.916832131233592e-05, "loss": 0.7319, "step": 3706 }, { "epoch": 0.43958259219732004, "grad_norm": 1.5680591056232225, "learning_rate": 4.916770721693488e-05, "loss": 0.7233, "step": 3707 }, { "epoch": 0.439701173959445, "grad_norm": 1.4229518217731505, "learning_rate": 4.9167092898737224e-05, "loss": 0.5274, "step": 3708 }, { "epoch": 0.43981975572157, "grad_norm": 1.5300068456784883, "learning_rate": 4.91664783577486e-05, "loss": 0.7531, "step": 3709 }, { "epoch": 0.439938337483695, "grad_norm": 1.5310814384588105, "learning_rate": 4.916586359397468e-05, "loss": 0.6816, "step": 3710 }, { "epoch": 0.44005691924581997, "grad_norm": 2.1172809827624763, "learning_rate": 4.9165248607421136e-05, "loss": 1.1004, "step": 3711 }, { "epoch": 0.440175501007945, "grad_norm": 1.5464155690483254, "learning_rate": 4.9164633398093624e-05, "loss": 0.702, "step": 3712 }, { "epoch": 0.44029408277007, "grad_norm": 1.4943619429465729, "learning_rate": 4.9164017965997836e-05, "loss": 0.8011, "step": 3713 }, { "epoch": 0.44041266453219496, "grad_norm": 1.6845957672465786, "learning_rate": 4.916340231113942e-05, "loss": 0.7088, "step": 3714 }, { "epoch": 0.44053124629431994, "grad_norm": 2.0549469251025894, "learning_rate": 4.916278643352408e-05, "loss": 0.9282, "step": 3715 }, { "epoch": 0.4406498280564449, "grad_norm": 1.5611075286804772, "learning_rate": 4.916217033315747e-05, "loss": 0.7549, "step": 3716 }, { "epoch": 0.4407684098185699, "grad_norm": 1.4213731132756906, "learning_rate": 4.9161554010045283e-05, "loss": 0.4818, "step": 3717 }, { "epoch": 0.4408869915806949, "grad_norm": 1.6234472103234088, "learning_rate": 4.9160937464193205e-05, "loss": 0.9881, "step": 3718 }, { "epoch": 0.44100557334281987, "grad_norm": 1.5957563866931437, "learning_rate": 4.91603206956069e-05, "loss": 0.6846, "step": 3719 }, { "epoch": 0.44112415510494485, "grad_norm": 1.4734756467575763, "learning_rate": 4.9159703704292064e-05, "loss": 0.6638, "step": 3720 }, { "epoch": 0.44124273686706983, "grad_norm": 1.8775096623885879, "learning_rate": 4.915908649025439e-05, "loss": 0.9088, "step": 3721 }, { "epoch": 0.4413613186291948, "grad_norm": 1.4129643210765253, "learning_rate": 4.9158469053499565e-05, "loss": 0.55, "step": 3722 }, { "epoch": 0.4414799003913198, "grad_norm": 1.4467042404042083, "learning_rate": 4.915785139403328e-05, "loss": 0.5976, "step": 3723 }, { "epoch": 0.4415984821534448, "grad_norm": 1.8067560227426842, "learning_rate": 4.915723351186123e-05, "loss": 0.8322, "step": 3724 }, { "epoch": 0.4417170639155698, "grad_norm": 1.501098409605572, "learning_rate": 4.9156615406989117e-05, "loss": 0.6459, "step": 3725 }, { "epoch": 0.4418356456776948, "grad_norm": 1.6027400691424911, "learning_rate": 4.915599707942263e-05, "loss": 0.5494, "step": 3726 }, { "epoch": 0.44195422743981977, "grad_norm": 1.5644844153572834, "learning_rate": 4.915537852916747e-05, "loss": 0.9371, "step": 3727 }, { "epoch": 0.44207280920194475, "grad_norm": 1.8197042138895774, "learning_rate": 4.915475975622934e-05, "loss": 0.7445, "step": 3728 }, { "epoch": 0.44219139096406973, "grad_norm": 1.4482409800832963, "learning_rate": 4.9154140760613944e-05, "loss": 0.6838, "step": 3729 }, { "epoch": 0.4423099727261947, "grad_norm": 1.9586948516918967, "learning_rate": 4.9153521542326994e-05, "loss": 0.8686, "step": 3730 }, { "epoch": 0.4424285544883197, "grad_norm": 1.8051914067186734, "learning_rate": 4.9152902101374184e-05, "loss": 0.7008, "step": 3731 }, { "epoch": 0.4425471362504447, "grad_norm": 1.4752092220789619, "learning_rate": 4.915228243776125e-05, "loss": 0.5936, "step": 3732 }, { "epoch": 0.44266571801256965, "grad_norm": 1.4247274664317182, "learning_rate": 4.9151662551493884e-05, "loss": 0.5571, "step": 3733 }, { "epoch": 0.44278429977469463, "grad_norm": 2.1067707284390162, "learning_rate": 4.91510424425778e-05, "loss": 0.9149, "step": 3734 }, { "epoch": 0.4429028815368196, "grad_norm": 1.5361355697085894, "learning_rate": 4.915042211101872e-05, "loss": 0.7869, "step": 3735 }, { "epoch": 0.4430214632989446, "grad_norm": 1.3418294877099208, "learning_rate": 4.9149801556822374e-05, "loss": 0.5111, "step": 3736 }, { "epoch": 0.44314004506106963, "grad_norm": 1.5931091827568034, "learning_rate": 4.914918077999447e-05, "loss": 0.8624, "step": 3737 }, { "epoch": 0.4432586268231946, "grad_norm": 1.7827612958946482, "learning_rate": 4.9148559780540726e-05, "loss": 0.9248, "step": 3738 }, { "epoch": 0.4433772085853196, "grad_norm": 1.4053596753527213, "learning_rate": 4.9147938558466876e-05, "loss": 0.7461, "step": 3739 }, { "epoch": 0.4434957903474446, "grad_norm": 1.4329184616364687, "learning_rate": 4.9147317113778645e-05, "loss": 0.5927, "step": 3740 }, { "epoch": 0.44361437210956955, "grad_norm": 1.5630833810454374, "learning_rate": 4.914669544648177e-05, "loss": 0.5502, "step": 3741 }, { "epoch": 0.44373295387169454, "grad_norm": 1.56400966014751, "learning_rate": 4.914607355658196e-05, "loss": 0.6651, "step": 3742 }, { "epoch": 0.4438515356338195, "grad_norm": 1.7616046534451268, "learning_rate": 4.9145451444084966e-05, "loss": 0.6851, "step": 3743 }, { "epoch": 0.4439701173959445, "grad_norm": 1.5878650119468072, "learning_rate": 4.914482910899653e-05, "loss": 0.6144, "step": 3744 }, { "epoch": 0.4440886991580695, "grad_norm": 1.3360251556162615, "learning_rate": 4.914420655132236e-05, "loss": 0.587, "step": 3745 }, { "epoch": 0.44420728092019446, "grad_norm": 1.3908556014931897, "learning_rate": 4.914358377106822e-05, "loss": 0.6415, "step": 3746 }, { "epoch": 0.44432586268231944, "grad_norm": 1.431619376695129, "learning_rate": 4.914296076823985e-05, "loss": 0.5723, "step": 3747 }, { "epoch": 0.4444444444444444, "grad_norm": 1.556430548594164, "learning_rate": 4.9142337542842985e-05, "loss": 0.7124, "step": 3748 }, { "epoch": 0.44456302620656946, "grad_norm": 1.7201589634273486, "learning_rate": 4.914171409488337e-05, "loss": 0.7343, "step": 3749 }, { "epoch": 0.44468160796869444, "grad_norm": 1.8242733860975722, "learning_rate": 4.914109042436676e-05, "loss": 0.8909, "step": 3750 }, { "epoch": 0.4448001897308194, "grad_norm": 1.4112996987407562, "learning_rate": 4.9140466531298904e-05, "loss": 0.576, "step": 3751 }, { "epoch": 0.4449187714929444, "grad_norm": 1.630391042923391, "learning_rate": 4.913984241568554e-05, "loss": 0.84, "step": 3752 }, { "epoch": 0.4450373532550694, "grad_norm": 1.6409502129124651, "learning_rate": 4.913921807753244e-05, "loss": 0.7816, "step": 3753 }, { "epoch": 0.44515593501719436, "grad_norm": 1.414269556362842, "learning_rate": 4.913859351684534e-05, "loss": 0.5965, "step": 3754 }, { "epoch": 0.44527451677931934, "grad_norm": 1.6129567739004687, "learning_rate": 4.913796873363001e-05, "loss": 0.6209, "step": 3755 }, { "epoch": 0.4453930985414443, "grad_norm": 1.8511535227983973, "learning_rate": 4.913734372789221e-05, "loss": 0.9404, "step": 3756 }, { "epoch": 0.4455116803035693, "grad_norm": 1.707954855749132, "learning_rate": 4.91367184996377e-05, "loss": 0.6642, "step": 3757 }, { "epoch": 0.4456302620656943, "grad_norm": 1.818370375033754, "learning_rate": 4.913609304887225e-05, "loss": 0.6689, "step": 3758 }, { "epoch": 0.44574884382781926, "grad_norm": 1.6860975368572186, "learning_rate": 4.9135467375601616e-05, "loss": 0.7352, "step": 3759 }, { "epoch": 0.44586742558994424, "grad_norm": 2.0096452690905955, "learning_rate": 4.9134841479831576e-05, "loss": 0.9705, "step": 3760 }, { "epoch": 0.4459860073520692, "grad_norm": 1.535999674825247, "learning_rate": 4.913421536156788e-05, "loss": 0.6785, "step": 3761 }, { "epoch": 0.44610458911419426, "grad_norm": 1.2198690861992927, "learning_rate": 4.913358902081632e-05, "loss": 0.3961, "step": 3762 }, { "epoch": 0.44622317087631924, "grad_norm": 1.7567315605999472, "learning_rate": 4.913296245758266e-05, "loss": 0.7412, "step": 3763 }, { "epoch": 0.4463417526384442, "grad_norm": 1.6277791287368613, "learning_rate": 4.9132335671872686e-05, "loss": 0.7336, "step": 3764 }, { "epoch": 0.4464603344005692, "grad_norm": 1.4532483072853397, "learning_rate": 4.913170866369217e-05, "loss": 0.6846, "step": 3765 }, { "epoch": 0.4465789161626942, "grad_norm": 1.4885014287867977, "learning_rate": 4.913108143304689e-05, "loss": 0.5426, "step": 3766 }, { "epoch": 0.44669749792481916, "grad_norm": 1.5773922728543264, "learning_rate": 4.913045397994263e-05, "loss": 1.0181, "step": 3767 }, { "epoch": 0.44681607968694415, "grad_norm": 1.3610474505671297, "learning_rate": 4.912982630438518e-05, "loss": 0.6499, "step": 3768 }, { "epoch": 0.4469346614490691, "grad_norm": 1.651622434406524, "learning_rate": 4.912919840638032e-05, "loss": 0.8597, "step": 3769 }, { "epoch": 0.4470532432111941, "grad_norm": 1.0520033149010617, "learning_rate": 4.912857028593384e-05, "loss": 0.3987, "step": 3770 }, { "epoch": 0.4471718249733191, "grad_norm": 1.6425076495795228, "learning_rate": 4.9127941943051525e-05, "loss": 0.883, "step": 3771 }, { "epoch": 0.44729040673544407, "grad_norm": 1.2731972881379685, "learning_rate": 4.9127313377739176e-05, "loss": 0.589, "step": 3772 }, { "epoch": 0.44740898849756905, "grad_norm": 1.9231656365556318, "learning_rate": 4.912668459000258e-05, "loss": 0.985, "step": 3773 }, { "epoch": 0.4475275702596941, "grad_norm": 1.2825504668294465, "learning_rate": 4.9126055579847545e-05, "loss": 0.7065, "step": 3774 }, { "epoch": 0.44764615202181907, "grad_norm": 1.719422836285591, "learning_rate": 4.912542634727986e-05, "loss": 0.722, "step": 3775 }, { "epoch": 0.44776473378394405, "grad_norm": 1.4388996069150808, "learning_rate": 4.912479689230533e-05, "loss": 0.628, "step": 3776 }, { "epoch": 0.447883315546069, "grad_norm": 1.4902129687725527, "learning_rate": 4.9124167214929755e-05, "loss": 0.7974, "step": 3777 }, { "epoch": 0.448001897308194, "grad_norm": 1.4918084741995443, "learning_rate": 4.912353731515894e-05, "loss": 0.6942, "step": 3778 }, { "epoch": 0.448120479070319, "grad_norm": 1.729523899728431, "learning_rate": 4.91229071929987e-05, "loss": 0.7166, "step": 3779 }, { "epoch": 0.44823906083244397, "grad_norm": 1.623715462843089, "learning_rate": 4.912227684845483e-05, "loss": 0.6594, "step": 3780 }, { "epoch": 0.44835764259456895, "grad_norm": 1.7478286897809354, "learning_rate": 4.912164628153315e-05, "loss": 0.9286, "step": 3781 }, { "epoch": 0.44847622435669393, "grad_norm": 1.4604256847957344, "learning_rate": 4.9121015492239476e-05, "loss": 0.4092, "step": 3782 }, { "epoch": 0.4485948061188189, "grad_norm": 1.3089477128924802, "learning_rate": 4.912038448057961e-05, "loss": 0.673, "step": 3783 }, { "epoch": 0.4487133878809439, "grad_norm": 1.5961967442156921, "learning_rate": 4.911975324655939e-05, "loss": 0.7159, "step": 3784 }, { "epoch": 0.4488319696430689, "grad_norm": 1.3301222494624887, "learning_rate": 4.911912179018461e-05, "loss": 0.7359, "step": 3785 }, { "epoch": 0.44895055140519385, "grad_norm": 1.8420222578934395, "learning_rate": 4.91184901114611e-05, "loss": 1.0092, "step": 3786 }, { "epoch": 0.4490691331673189, "grad_norm": 1.378966185998779, "learning_rate": 4.9117858210394695e-05, "loss": 0.7268, "step": 3787 }, { "epoch": 0.44918771492944387, "grad_norm": 1.7787160039407819, "learning_rate": 4.911722608699122e-05, "loss": 0.7847, "step": 3788 }, { "epoch": 0.44930629669156885, "grad_norm": 1.3109885218541657, "learning_rate": 4.911659374125649e-05, "loss": 0.5242, "step": 3789 }, { "epoch": 0.44942487845369383, "grad_norm": 1.6146944022912781, "learning_rate": 4.911596117319633e-05, "loss": 0.7144, "step": 3790 }, { "epoch": 0.4495434602158188, "grad_norm": 1.607551373462184, "learning_rate": 4.911532838281659e-05, "loss": 0.7032, "step": 3791 }, { "epoch": 0.4496620419779438, "grad_norm": 1.5228137846995211, "learning_rate": 4.911469537012309e-05, "loss": 0.8923, "step": 3792 }, { "epoch": 0.4497806237400688, "grad_norm": 1.3007008424278421, "learning_rate": 4.911406213512167e-05, "loss": 0.6418, "step": 3793 }, { "epoch": 0.44989920550219376, "grad_norm": 1.2963399765415107, "learning_rate": 4.911342867781817e-05, "loss": 0.6311, "step": 3794 }, { "epoch": 0.45001778726431874, "grad_norm": 1.7411019417764901, "learning_rate": 4.9112794998218434e-05, "loss": 0.6924, "step": 3795 }, { "epoch": 0.4501363690264437, "grad_norm": 1.783734064891071, "learning_rate": 4.9112161096328294e-05, "loss": 0.7682, "step": 3796 }, { "epoch": 0.4502549507885687, "grad_norm": 1.293956454833335, "learning_rate": 4.911152697215359e-05, "loss": 0.5522, "step": 3797 }, { "epoch": 0.4503735325506937, "grad_norm": 1.535777114104377, "learning_rate": 4.911089262570018e-05, "loss": 0.5626, "step": 3798 }, { "epoch": 0.4504921143128187, "grad_norm": 1.2685280933203757, "learning_rate": 4.9110258056973904e-05, "loss": 0.5296, "step": 3799 }, { "epoch": 0.4506106960749437, "grad_norm": 1.2798759735181464, "learning_rate": 4.910962326598062e-05, "loss": 0.4881, "step": 3800 }, { "epoch": 0.4507292778370687, "grad_norm": 1.7206375722837797, "learning_rate": 4.9108988252726165e-05, "loss": 0.7156, "step": 3801 }, { "epoch": 0.45084785959919366, "grad_norm": 1.7938982466354982, "learning_rate": 4.9108353017216416e-05, "loss": 0.8591, "step": 3802 }, { "epoch": 0.45096644136131864, "grad_norm": 2.0662464710521116, "learning_rate": 4.91077175594572e-05, "loss": 0.8492, "step": 3803 }, { "epoch": 0.4510850231234436, "grad_norm": 1.2959847820625885, "learning_rate": 4.9107081879454405e-05, "loss": 0.5016, "step": 3804 }, { "epoch": 0.4512036048855686, "grad_norm": 1.843461402589745, "learning_rate": 4.9106445977213866e-05, "loss": 0.7092, "step": 3805 }, { "epoch": 0.4513221866476936, "grad_norm": 1.6160542140543694, "learning_rate": 4.9105809852741466e-05, "loss": 0.6072, "step": 3806 }, { "epoch": 0.45144076840981856, "grad_norm": 1.562064584699593, "learning_rate": 4.9105173506043054e-05, "loss": 0.5655, "step": 3807 }, { "epoch": 0.45155935017194354, "grad_norm": 1.7022415719479005, "learning_rate": 4.91045369371245e-05, "loss": 0.8045, "step": 3808 }, { "epoch": 0.4516779319340685, "grad_norm": 1.4270914321392234, "learning_rate": 4.910390014599168e-05, "loss": 0.5626, "step": 3809 }, { "epoch": 0.4517965136961935, "grad_norm": 1.313578023957952, "learning_rate": 4.9103263132650455e-05, "loss": 0.659, "step": 3810 }, { "epoch": 0.4519150954583185, "grad_norm": 1.3195594561473414, "learning_rate": 4.9102625897106704e-05, "loss": 0.3815, "step": 3811 }, { "epoch": 0.4520336772204435, "grad_norm": 1.6990982758180448, "learning_rate": 4.9101988439366295e-05, "loss": 0.6752, "step": 3812 }, { "epoch": 0.4521522589825685, "grad_norm": 1.4737468802858278, "learning_rate": 4.910135075943512e-05, "loss": 0.7983, "step": 3813 }, { "epoch": 0.4522708407446935, "grad_norm": 1.4934705597449243, "learning_rate": 4.910071285731903e-05, "loss": 0.833, "step": 3814 }, { "epoch": 0.45238942250681846, "grad_norm": 1.760011942289493, "learning_rate": 4.910007473302393e-05, "loss": 0.7476, "step": 3815 }, { "epoch": 0.45250800426894344, "grad_norm": 1.4224654529365122, "learning_rate": 4.9099436386555694e-05, "loss": 0.6646, "step": 3816 }, { "epoch": 0.4526265860310684, "grad_norm": 1.2455874492846077, "learning_rate": 4.9098797817920205e-05, "loss": 0.5943, "step": 3817 }, { "epoch": 0.4527451677931934, "grad_norm": 1.508674142573095, "learning_rate": 4.9098159027123355e-05, "loss": 0.7141, "step": 3818 }, { "epoch": 0.4528637495553184, "grad_norm": 1.4666608965950758, "learning_rate": 4.909752001417103e-05, "loss": 0.6025, "step": 3819 }, { "epoch": 0.45298233131744337, "grad_norm": 1.469878899838565, "learning_rate": 4.9096880779069124e-05, "loss": 0.6373, "step": 3820 }, { "epoch": 0.45310091307956835, "grad_norm": 1.4411637406241682, "learning_rate": 4.909624132182352e-05, "loss": 0.7271, "step": 3821 }, { "epoch": 0.4532194948416933, "grad_norm": 1.4021438941792483, "learning_rate": 4.9095601642440115e-05, "loss": 0.6757, "step": 3822 }, { "epoch": 0.4533380766038183, "grad_norm": 1.8623158577827397, "learning_rate": 4.9094961740924815e-05, "loss": 0.6938, "step": 3823 }, { "epoch": 0.45345665836594334, "grad_norm": 1.3933268720226462, "learning_rate": 4.909432161728352e-05, "loss": 0.6903, "step": 3824 }, { "epoch": 0.4535752401280683, "grad_norm": 1.6096553276358954, "learning_rate": 4.909368127152213e-05, "loss": 0.6633, "step": 3825 }, { "epoch": 0.4536938218901933, "grad_norm": 1.7989893279415112, "learning_rate": 4.9093040703646534e-05, "loss": 0.8226, "step": 3826 }, { "epoch": 0.4538124036523183, "grad_norm": 1.5464451450628212, "learning_rate": 4.9092399913662654e-05, "loss": 0.5812, "step": 3827 }, { "epoch": 0.45393098541444327, "grad_norm": 1.617298821479472, "learning_rate": 4.909175890157638e-05, "loss": 0.6264, "step": 3828 }, { "epoch": 0.45404956717656825, "grad_norm": 1.6430659180240612, "learning_rate": 4.909111766739365e-05, "loss": 0.8252, "step": 3829 }, { "epoch": 0.45416814893869323, "grad_norm": 1.4731128455602542, "learning_rate": 4.909047621112035e-05, "loss": 0.7514, "step": 3830 }, { "epoch": 0.4542867307008182, "grad_norm": 1.7273395973357997, "learning_rate": 4.908983453276239e-05, "loss": 0.6379, "step": 3831 }, { "epoch": 0.4544053124629432, "grad_norm": 1.467912585798283, "learning_rate": 4.908919263232571e-05, "loss": 0.5499, "step": 3832 }, { "epoch": 0.45452389422506817, "grad_norm": 1.7255056888954539, "learning_rate": 4.908855050981621e-05, "loss": 0.8297, "step": 3833 }, { "epoch": 0.45464247598719315, "grad_norm": 1.6881986314049335, "learning_rate": 4.9087908165239814e-05, "loss": 0.7899, "step": 3834 }, { "epoch": 0.45476105774931813, "grad_norm": 1.5771814855496442, "learning_rate": 4.908726559860244e-05, "loss": 0.8075, "step": 3835 }, { "epoch": 0.4548796395114431, "grad_norm": 1.373764834619931, "learning_rate": 4.908662280991002e-05, "loss": 0.5749, "step": 3836 }, { "epoch": 0.45499822127356815, "grad_norm": 1.474062270759114, "learning_rate": 4.908597979916847e-05, "loss": 0.5688, "step": 3837 }, { "epoch": 0.45511680303569313, "grad_norm": 2.1548988058717793, "learning_rate": 4.908533656638372e-05, "loss": 1.0108, "step": 3838 }, { "epoch": 0.4552353847978181, "grad_norm": 1.402959183715603, "learning_rate": 4.908469311156171e-05, "loss": 0.8226, "step": 3839 }, { "epoch": 0.4553539665599431, "grad_norm": 1.2477160800837894, "learning_rate": 4.908404943470836e-05, "loss": 0.5615, "step": 3840 }, { "epoch": 0.45547254832206807, "grad_norm": 1.3527597071957347, "learning_rate": 4.9083405535829606e-05, "loss": 0.7796, "step": 3841 }, { "epoch": 0.45559113008419305, "grad_norm": 1.7408160138616826, "learning_rate": 4.908276141493139e-05, "loss": 0.9229, "step": 3842 }, { "epoch": 0.45570971184631803, "grad_norm": 1.784155513595179, "learning_rate": 4.908211707201965e-05, "loss": 0.8841, "step": 3843 }, { "epoch": 0.455828293608443, "grad_norm": 1.7403243643152146, "learning_rate": 4.908147250710032e-05, "loss": 0.7747, "step": 3844 }, { "epoch": 0.455946875370568, "grad_norm": 1.3585062648161286, "learning_rate": 4.9080827720179335e-05, "loss": 0.6185, "step": 3845 }, { "epoch": 0.456065457132693, "grad_norm": 1.3027210885049758, "learning_rate": 4.9080182711262655e-05, "loss": 0.5389, "step": 3846 }, { "epoch": 0.45618403889481796, "grad_norm": 1.5884605082388368, "learning_rate": 4.907953748035622e-05, "loss": 0.8242, "step": 3847 }, { "epoch": 0.45630262065694294, "grad_norm": 1.4418144374708464, "learning_rate": 4.907889202746598e-05, "loss": 0.8891, "step": 3848 }, { "epoch": 0.456421202419068, "grad_norm": 1.2886277141789806, "learning_rate": 4.907824635259788e-05, "loss": 0.7018, "step": 3849 }, { "epoch": 0.45653978418119295, "grad_norm": 1.6661091468174534, "learning_rate": 4.9077600455757874e-05, "loss": 0.8326, "step": 3850 }, { "epoch": 0.45665836594331793, "grad_norm": 1.099852964184064, "learning_rate": 4.907695433695192e-05, "loss": 0.482, "step": 3851 }, { "epoch": 0.4567769477054429, "grad_norm": 1.5194879750727894, "learning_rate": 4.9076307996185965e-05, "loss": 0.8902, "step": 3852 }, { "epoch": 0.4568955294675679, "grad_norm": 1.3209386539182897, "learning_rate": 4.907566143346598e-05, "loss": 0.5603, "step": 3853 }, { "epoch": 0.4570141112296929, "grad_norm": 1.2800404838749628, "learning_rate": 4.907501464879792e-05, "loss": 0.5532, "step": 3854 }, { "epoch": 0.45713269299181786, "grad_norm": 1.7332696694044751, "learning_rate": 4.9074367642187755e-05, "loss": 0.8841, "step": 3855 }, { "epoch": 0.45725127475394284, "grad_norm": 1.8382919453597066, "learning_rate": 4.907372041364143e-05, "loss": 0.8905, "step": 3856 }, { "epoch": 0.4573698565160678, "grad_norm": 1.6235708588779727, "learning_rate": 4.907307296316493e-05, "loss": 0.8045, "step": 3857 }, { "epoch": 0.4574884382781928, "grad_norm": 1.8703965672329288, "learning_rate": 4.9072425290764215e-05, "loss": 0.7944, "step": 3858 }, { "epoch": 0.4576070200403178, "grad_norm": 1.4852276869980678, "learning_rate": 4.9071777396445264e-05, "loss": 0.6896, "step": 3859 }, { "epoch": 0.45772560180244276, "grad_norm": 1.4395871313724196, "learning_rate": 4.907112928021404e-05, "loss": 0.564, "step": 3860 }, { "epoch": 0.4578441835645678, "grad_norm": 1.4192767665836947, "learning_rate": 4.9070480942076525e-05, "loss": 0.6113, "step": 3861 }, { "epoch": 0.4579627653266928, "grad_norm": 1.5545815853504679, "learning_rate": 4.906983238203869e-05, "loss": 0.6205, "step": 3862 }, { "epoch": 0.45808134708881776, "grad_norm": 1.6435934277835695, "learning_rate": 4.9069183600106514e-05, "loss": 0.9305, "step": 3863 }, { "epoch": 0.45819992885094274, "grad_norm": 1.2115130005462942, "learning_rate": 4.9068534596285986e-05, "loss": 0.3547, "step": 3864 }, { "epoch": 0.4583185106130677, "grad_norm": 1.7952156415594516, "learning_rate": 4.906788537058308e-05, "loss": 0.6765, "step": 3865 }, { "epoch": 0.4584370923751927, "grad_norm": 1.6301437198739848, "learning_rate": 4.9067235923003785e-05, "loss": 0.5921, "step": 3866 }, { "epoch": 0.4585556741373177, "grad_norm": 1.478657463941508, "learning_rate": 4.906658625355409e-05, "loss": 0.4462, "step": 3867 }, { "epoch": 0.45867425589944266, "grad_norm": 1.6473083149437346, "learning_rate": 4.906593636223998e-05, "loss": 0.6595, "step": 3868 }, { "epoch": 0.45879283766156764, "grad_norm": 1.8653975638249014, "learning_rate": 4.906528624906746e-05, "loss": 0.9367, "step": 3869 }, { "epoch": 0.4589114194236926, "grad_norm": 1.831296404398805, "learning_rate": 4.9064635914042496e-05, "loss": 0.8648, "step": 3870 }, { "epoch": 0.4590300011858176, "grad_norm": 1.74845795477681, "learning_rate": 4.90639853571711e-05, "loss": 0.647, "step": 3871 }, { "epoch": 0.4591485829479426, "grad_norm": 1.5669897049683417, "learning_rate": 4.906333457845927e-05, "loss": 0.7053, "step": 3872 }, { "epoch": 0.45926716471006757, "grad_norm": 1.7906588997586064, "learning_rate": 4.9062683577913016e-05, "loss": 0.9526, "step": 3873 }, { "epoch": 0.4593857464721926, "grad_norm": 1.531394444625228, "learning_rate": 4.906203235553831e-05, "loss": 1.0061, "step": 3874 }, { "epoch": 0.4595043282343176, "grad_norm": 1.3768643370734988, "learning_rate": 4.906138091134118e-05, "loss": 0.5589, "step": 3875 }, { "epoch": 0.45962290999644256, "grad_norm": 1.4626533863096403, "learning_rate": 4.906072924532763e-05, "loss": 0.7018, "step": 3876 }, { "epoch": 0.45974149175856754, "grad_norm": 1.4904213663766408, "learning_rate": 4.906007735750365e-05, "loss": 0.851, "step": 3877 }, { "epoch": 0.4598600735206925, "grad_norm": 1.537247342365679, "learning_rate": 4.905942524787527e-05, "loss": 0.6715, "step": 3878 }, { "epoch": 0.4599786552828175, "grad_norm": 1.5440679214942663, "learning_rate": 4.905877291644849e-05, "loss": 0.5307, "step": 3879 }, { "epoch": 0.4600972370449425, "grad_norm": 1.5552460485875799, "learning_rate": 4.905812036322933e-05, "loss": 0.8204, "step": 3880 }, { "epoch": 0.46021581880706747, "grad_norm": 1.526570790249855, "learning_rate": 4.905746758822379e-05, "loss": 0.65, "step": 3881 }, { "epoch": 0.46033440056919245, "grad_norm": 1.7120137627091885, "learning_rate": 4.9056814591437915e-05, "loss": 0.7747, "step": 3882 }, { "epoch": 0.46045298233131743, "grad_norm": 1.171639455548391, "learning_rate": 4.90561613728777e-05, "loss": 0.5756, "step": 3883 }, { "epoch": 0.4605715640934424, "grad_norm": 1.3705261990298039, "learning_rate": 4.905550793254918e-05, "loss": 0.6278, "step": 3884 }, { "epoch": 0.4606901458555674, "grad_norm": 1.4925580393884696, "learning_rate": 4.905485427045838e-05, "loss": 0.8162, "step": 3885 }, { "epoch": 0.4608087276176924, "grad_norm": 1.7788135537556984, "learning_rate": 4.905420038661132e-05, "loss": 0.8564, "step": 3886 }, { "epoch": 0.4609273093798174, "grad_norm": 1.6144537417747584, "learning_rate": 4.9053546281014015e-05, "loss": 0.7105, "step": 3887 }, { "epoch": 0.4610458911419424, "grad_norm": 1.4439448607274257, "learning_rate": 4.905289195367253e-05, "loss": 0.6366, "step": 3888 }, { "epoch": 0.46116447290406737, "grad_norm": 1.5405235151430978, "learning_rate": 4.905223740459287e-05, "loss": 0.6678, "step": 3889 }, { "epoch": 0.46128305466619235, "grad_norm": 1.7674029583984499, "learning_rate": 4.905158263378107e-05, "loss": 0.6945, "step": 3890 }, { "epoch": 0.46140163642831733, "grad_norm": 1.3702347820301166, "learning_rate": 4.905092764124318e-05, "loss": 0.4349, "step": 3891 }, { "epoch": 0.4615202181904423, "grad_norm": 1.5827924155064585, "learning_rate": 4.905027242698522e-05, "loss": 0.6405, "step": 3892 }, { "epoch": 0.4616387999525673, "grad_norm": 1.9073888376252277, "learning_rate": 4.904961699101325e-05, "loss": 0.6366, "step": 3893 }, { "epoch": 0.46175738171469227, "grad_norm": 1.7506606794365636, "learning_rate": 4.90489613333333e-05, "loss": 0.9597, "step": 3894 }, { "epoch": 0.46187596347681725, "grad_norm": 1.3437260359346264, "learning_rate": 4.904830545395142e-05, "loss": 0.5944, "step": 3895 }, { "epoch": 0.46199454523894223, "grad_norm": 1.600913318675501, "learning_rate": 4.904764935287365e-05, "loss": 0.8692, "step": 3896 }, { "epoch": 0.4621131270010672, "grad_norm": 1.7949099350667155, "learning_rate": 4.9046993030106045e-05, "loss": 0.7092, "step": 3897 }, { "epoch": 0.4622317087631922, "grad_norm": 1.4142425472326396, "learning_rate": 4.904633648565465e-05, "loss": 0.5069, "step": 3898 }, { "epoch": 0.46235029052531723, "grad_norm": 1.3599823223103784, "learning_rate": 4.904567971952552e-05, "loss": 0.4836, "step": 3899 }, { "epoch": 0.4624688722874422, "grad_norm": 1.6288667276987718, "learning_rate": 4.904502273172471e-05, "loss": 0.9368, "step": 3900 }, { "epoch": 0.4625874540495672, "grad_norm": 1.6049253877551126, "learning_rate": 4.904436552225829e-05, "loss": 0.8813, "step": 3901 }, { "epoch": 0.4627060358116922, "grad_norm": 1.6354578853495267, "learning_rate": 4.9043708091132276e-05, "loss": 0.7031, "step": 3902 }, { "epoch": 0.46282461757381715, "grad_norm": 1.6208141587150502, "learning_rate": 4.904305043835278e-05, "loss": 0.78, "step": 3903 }, { "epoch": 0.46294319933594213, "grad_norm": 1.4385332523695364, "learning_rate": 4.904239256392584e-05, "loss": 0.55, "step": 3904 }, { "epoch": 0.4630617810980671, "grad_norm": 1.3830612769863857, "learning_rate": 4.904173446785751e-05, "loss": 0.6586, "step": 3905 }, { "epoch": 0.4631803628601921, "grad_norm": 1.5659664568763982, "learning_rate": 4.9041076150153884e-05, "loss": 0.7248, "step": 3906 }, { "epoch": 0.4632989446223171, "grad_norm": 1.3295450119027863, "learning_rate": 4.9040417610821024e-05, "loss": 0.7001, "step": 3907 }, { "epoch": 0.46341752638444206, "grad_norm": 1.4114730771787645, "learning_rate": 4.9039758849864974e-05, "loss": 0.6531, "step": 3908 }, { "epoch": 0.46353610814656704, "grad_norm": 1.4185807252531328, "learning_rate": 4.903909986729184e-05, "loss": 0.609, "step": 3909 }, { "epoch": 0.463654689908692, "grad_norm": 1.5495475489397543, "learning_rate": 4.9038440663107675e-05, "loss": 0.7076, "step": 3910 }, { "epoch": 0.46377327167081706, "grad_norm": 1.5026385015757315, "learning_rate": 4.903778123731857e-05, "loss": 0.6378, "step": 3911 }, { "epoch": 0.46389185343294204, "grad_norm": 1.9550129098826767, "learning_rate": 4.90371215899306e-05, "loss": 0.7823, "step": 3912 }, { "epoch": 0.464010435195067, "grad_norm": 1.6753413200228116, "learning_rate": 4.903646172094985e-05, "loss": 0.7834, "step": 3913 }, { "epoch": 0.464129016957192, "grad_norm": 1.637177941525017, "learning_rate": 4.903580163038239e-05, "loss": 0.6965, "step": 3914 }, { "epoch": 0.464247598719317, "grad_norm": 1.735417630666785, "learning_rate": 4.903514131823431e-05, "loss": 0.8469, "step": 3915 }, { "epoch": 0.46436618048144196, "grad_norm": 1.5843422197656436, "learning_rate": 4.9034480784511716e-05, "loss": 0.5814, "step": 3916 }, { "epoch": 0.46448476224356694, "grad_norm": 1.430894230393008, "learning_rate": 4.9033820029220664e-05, "loss": 0.6954, "step": 3917 }, { "epoch": 0.4646033440056919, "grad_norm": 1.4931532965248684, "learning_rate": 4.903315905236728e-05, "loss": 0.6591, "step": 3918 }, { "epoch": 0.4647219257678169, "grad_norm": 1.3851523589471468, "learning_rate": 4.903249785395763e-05, "loss": 0.6661, "step": 3919 }, { "epoch": 0.4648405075299419, "grad_norm": 1.4100291446072577, "learning_rate": 4.903183643399783e-05, "loss": 0.7618, "step": 3920 }, { "epoch": 0.46495908929206686, "grad_norm": 1.476628845485094, "learning_rate": 4.903117479249396e-05, "loss": 0.7454, "step": 3921 }, { "epoch": 0.46507767105419184, "grad_norm": 1.30179915827753, "learning_rate": 4.903051292945213e-05, "loss": 0.607, "step": 3922 }, { "epoch": 0.4651962528163168, "grad_norm": 1.4579507027328646, "learning_rate": 4.9029850844878434e-05, "loss": 0.5942, "step": 3923 }, { "epoch": 0.46531483457844186, "grad_norm": 1.2556975059942581, "learning_rate": 4.9029188538778984e-05, "loss": 0.5013, "step": 3924 }, { "epoch": 0.46543341634056684, "grad_norm": 1.5365495039134514, "learning_rate": 4.902852601115989e-05, "loss": 0.737, "step": 3925 }, { "epoch": 0.4655519981026918, "grad_norm": 1.4403506397512933, "learning_rate": 4.902786326202724e-05, "loss": 0.6221, "step": 3926 }, { "epoch": 0.4656705798648168, "grad_norm": 1.4277837516883107, "learning_rate": 4.902720029138716e-05, "loss": 0.6272, "step": 3927 }, { "epoch": 0.4657891616269418, "grad_norm": 1.3315725734733932, "learning_rate": 4.902653709924576e-05, "loss": 0.6386, "step": 3928 }, { "epoch": 0.46590774338906676, "grad_norm": 1.40808178588933, "learning_rate": 4.902587368560915e-05, "loss": 0.5094, "step": 3929 }, { "epoch": 0.46602632515119174, "grad_norm": 1.4438750135640057, "learning_rate": 4.902521005048344e-05, "loss": 0.4953, "step": 3930 }, { "epoch": 0.4661449069133167, "grad_norm": 1.938640573701615, "learning_rate": 4.9024546193874766e-05, "loss": 0.9957, "step": 3931 }, { "epoch": 0.4662634886754417, "grad_norm": 1.5640975204949963, "learning_rate": 4.9023882115789235e-05, "loss": 0.6202, "step": 3932 }, { "epoch": 0.4663820704375667, "grad_norm": 1.374178567427156, "learning_rate": 4.902321781623297e-05, "loss": 0.5316, "step": 3933 }, { "epoch": 0.46650065219969167, "grad_norm": 1.8903055510531646, "learning_rate": 4.9022553295212096e-05, "loss": 0.7963, "step": 3934 }, { "epoch": 0.46661923396181665, "grad_norm": 1.751624954078587, "learning_rate": 4.902188855273273e-05, "loss": 0.6553, "step": 3935 }, { "epoch": 0.4667378157239417, "grad_norm": 2.1899938311854474, "learning_rate": 4.9021223588801015e-05, "loss": 0.8237, "step": 3936 }, { "epoch": 0.46685639748606667, "grad_norm": 1.5922649326482845, "learning_rate": 4.902055840342308e-05, "loss": 0.8273, "step": 3937 }, { "epoch": 0.46697497924819165, "grad_norm": 1.7151532308645714, "learning_rate": 4.901989299660505e-05, "loss": 0.7354, "step": 3938 }, { "epoch": 0.4670935610103166, "grad_norm": 1.7443760513214397, "learning_rate": 4.901922736835306e-05, "loss": 0.5804, "step": 3939 }, { "epoch": 0.4672121427724416, "grad_norm": 1.751310645529247, "learning_rate": 4.9018561518673244e-05, "loss": 0.504, "step": 3940 }, { "epoch": 0.4673307245345666, "grad_norm": 1.7285568000634297, "learning_rate": 4.901789544757175e-05, "loss": 0.8114, "step": 3941 }, { "epoch": 0.46744930629669157, "grad_norm": 1.5402565423477548, "learning_rate": 4.901722915505471e-05, "loss": 0.6299, "step": 3942 }, { "epoch": 0.46756788805881655, "grad_norm": 1.3436366313004255, "learning_rate": 4.9016562641128274e-05, "loss": 0.5137, "step": 3943 }, { "epoch": 0.46768646982094153, "grad_norm": 1.3481116358833505, "learning_rate": 4.901589590579858e-05, "loss": 0.5649, "step": 3944 }, { "epoch": 0.4678050515830665, "grad_norm": 1.6811926161648858, "learning_rate": 4.9015228949071775e-05, "loss": 0.7793, "step": 3945 }, { "epoch": 0.4679236333451915, "grad_norm": 1.4634823852735208, "learning_rate": 4.9014561770954e-05, "loss": 0.597, "step": 3946 }, { "epoch": 0.4680422151073165, "grad_norm": 1.4135062307625486, "learning_rate": 4.901389437145143e-05, "loss": 0.684, "step": 3947 }, { "epoch": 0.46816079686944145, "grad_norm": 1.682643521684982, "learning_rate": 4.901322675057019e-05, "loss": 0.7634, "step": 3948 }, { "epoch": 0.4682793786315665, "grad_norm": 1.5275828697094942, "learning_rate": 4.9012558908316456e-05, "loss": 0.8555, "step": 3949 }, { "epoch": 0.46839796039369147, "grad_norm": 1.7360246657558485, "learning_rate": 4.901189084469636e-05, "loss": 0.9502, "step": 3950 }, { "epoch": 0.46851654215581645, "grad_norm": 1.6032419262345101, "learning_rate": 4.901122255971609e-05, "loss": 0.6202, "step": 3951 }, { "epoch": 0.46863512391794143, "grad_norm": 1.7432053249266153, "learning_rate": 4.901055405338179e-05, "loss": 0.7833, "step": 3952 }, { "epoch": 0.4687537056800664, "grad_norm": 1.4496754789134678, "learning_rate": 4.900988532569962e-05, "loss": 0.571, "step": 3953 }, { "epoch": 0.4688722874421914, "grad_norm": 1.314564641626623, "learning_rate": 4.900921637667575e-05, "loss": 0.6157, "step": 3954 }, { "epoch": 0.4689908692043164, "grad_norm": 1.7614749590123622, "learning_rate": 4.900854720631635e-05, "loss": 0.6339, "step": 3955 }, { "epoch": 0.46910945096644135, "grad_norm": 1.5547691706950737, "learning_rate": 4.900787781462759e-05, "loss": 0.6558, "step": 3956 }, { "epoch": 0.46922803272856634, "grad_norm": 2.055557692717598, "learning_rate": 4.900720820161563e-05, "loss": 1.0625, "step": 3957 }, { "epoch": 0.4693466144906913, "grad_norm": 1.4193368362369563, "learning_rate": 4.900653836728665e-05, "loss": 0.6164, "step": 3958 }, { "epoch": 0.4694651962528163, "grad_norm": 1.318129806750987, "learning_rate": 4.900586831164683e-05, "loss": 0.5594, "step": 3959 }, { "epoch": 0.4695837780149413, "grad_norm": 1.0981519983463217, "learning_rate": 4.9005198034702334e-05, "loss": 0.4395, "step": 3960 }, { "epoch": 0.4697023597770663, "grad_norm": 1.594499940337011, "learning_rate": 4.900452753645936e-05, "loss": 0.6927, "step": 3961 }, { "epoch": 0.4698209415391913, "grad_norm": 1.4850699575153201, "learning_rate": 4.900385681692408e-05, "loss": 0.6133, "step": 3962 }, { "epoch": 0.4699395233013163, "grad_norm": 1.4197672629729599, "learning_rate": 4.900318587610266e-05, "loss": 0.6068, "step": 3963 }, { "epoch": 0.47005810506344126, "grad_norm": 1.3895682920916126, "learning_rate": 4.9002514714001316e-05, "loss": 0.5485, "step": 3964 }, { "epoch": 0.47017668682556624, "grad_norm": 1.4942194581011752, "learning_rate": 4.9001843330626206e-05, "loss": 0.755, "step": 3965 }, { "epoch": 0.4702952685876912, "grad_norm": 1.4330464968844026, "learning_rate": 4.900117172598354e-05, "loss": 0.6341, "step": 3966 }, { "epoch": 0.4704138503498162, "grad_norm": 1.4457597417394499, "learning_rate": 4.9000499900079514e-05, "loss": 0.5418, "step": 3967 }, { "epoch": 0.4705324321119412, "grad_norm": 2.019120421486503, "learning_rate": 4.89998278529203e-05, "loss": 0.7049, "step": 3968 }, { "epoch": 0.47065101387406616, "grad_norm": 1.4719025360944575, "learning_rate": 4.89991555845121e-05, "loss": 0.6184, "step": 3969 }, { "epoch": 0.47076959563619114, "grad_norm": 1.3611737649430142, "learning_rate": 4.899848309486112e-05, "loss": 0.4403, "step": 3970 }, { "epoch": 0.4708881773983161, "grad_norm": 1.419523577489354, "learning_rate": 4.8997810383973555e-05, "loss": 0.4889, "step": 3971 }, { "epoch": 0.4710067591604411, "grad_norm": 1.5603446084461012, "learning_rate": 4.89971374518556e-05, "loss": 0.625, "step": 3972 }, { "epoch": 0.4711253409225661, "grad_norm": 1.8143458039256453, "learning_rate": 4.899646429851348e-05, "loss": 0.7447, "step": 3973 }, { "epoch": 0.4712439226846911, "grad_norm": 2.1663887676376348, "learning_rate": 4.899579092395337e-05, "loss": 0.8614, "step": 3974 }, { "epoch": 0.4713625044468161, "grad_norm": 1.5155045664094833, "learning_rate": 4.89951173281815e-05, "loss": 0.7454, "step": 3975 }, { "epoch": 0.4714810862089411, "grad_norm": 1.5983429094696837, "learning_rate": 4.8994443511204066e-05, "loss": 0.7299, "step": 3976 }, { "epoch": 0.47159966797106606, "grad_norm": 1.3322413839941973, "learning_rate": 4.89937694730273e-05, "loss": 0.5266, "step": 3977 }, { "epoch": 0.47171824973319104, "grad_norm": 1.656333853396519, "learning_rate": 4.899309521365739e-05, "loss": 0.7718, "step": 3978 }, { "epoch": 0.471836831495316, "grad_norm": 1.5622427901651401, "learning_rate": 4.8992420733100575e-05, "loss": 0.9564, "step": 3979 }, { "epoch": 0.471955413257441, "grad_norm": 1.521875384698585, "learning_rate": 4.899174603136306e-05, "loss": 0.6321, "step": 3980 }, { "epoch": 0.472073995019566, "grad_norm": 1.3876440854518506, "learning_rate": 4.899107110845106e-05, "loss": 0.523, "step": 3981 }, { "epoch": 0.47219257678169096, "grad_norm": 1.484977238624346, "learning_rate": 4.899039596437081e-05, "loss": 0.6178, "step": 3982 }, { "epoch": 0.47231115854381595, "grad_norm": 1.5739026044610969, "learning_rate": 4.898972059912853e-05, "loss": 0.8231, "step": 3983 }, { "epoch": 0.4724297403059409, "grad_norm": 1.3631026074105321, "learning_rate": 4.898904501273044e-05, "loss": 0.7308, "step": 3984 }, { "epoch": 0.4725483220680659, "grad_norm": 1.3962003734040087, "learning_rate": 4.898836920518277e-05, "loss": 0.6757, "step": 3985 }, { "epoch": 0.47266690383019094, "grad_norm": 1.4260222496595076, "learning_rate": 4.8987693176491755e-05, "loss": 0.6364, "step": 3986 }, { "epoch": 0.4727854855923159, "grad_norm": 1.7031115856236982, "learning_rate": 4.8987016926663634e-05, "loss": 0.7404, "step": 3987 }, { "epoch": 0.4729040673544409, "grad_norm": 1.4270711873264215, "learning_rate": 4.898634045570462e-05, "loss": 0.8067, "step": 3988 }, { "epoch": 0.4730226491165659, "grad_norm": 1.3196458940706794, "learning_rate": 4.898566376362096e-05, "loss": 0.5122, "step": 3989 }, { "epoch": 0.47314123087869087, "grad_norm": 1.5540140525155384, "learning_rate": 4.89849868504189e-05, "loss": 0.7291, "step": 3990 }, { "epoch": 0.47325981264081585, "grad_norm": 1.5398439188462036, "learning_rate": 4.898430971610467e-05, "loss": 0.8767, "step": 3991 }, { "epoch": 0.4733783944029408, "grad_norm": 1.5213517661258038, "learning_rate": 4.8983632360684515e-05, "loss": 0.587, "step": 3992 }, { "epoch": 0.4734969761650658, "grad_norm": 1.544424965668842, "learning_rate": 4.898295478416469e-05, "loss": 0.591, "step": 3993 }, { "epoch": 0.4736155579271908, "grad_norm": 1.7192561139355211, "learning_rate": 4.8982276986551425e-05, "loss": 0.7074, "step": 3994 }, { "epoch": 0.47373413968931577, "grad_norm": 2.1650992817174517, "learning_rate": 4.8981598967850976e-05, "loss": 1.085, "step": 3995 }, { "epoch": 0.47385272145144075, "grad_norm": 1.3411474107828578, "learning_rate": 4.898092072806959e-05, "loss": 0.5091, "step": 3996 }, { "epoch": 0.47397130321356573, "grad_norm": 1.3029771241772945, "learning_rate": 4.8980242267213515e-05, "loss": 0.5191, "step": 3997 }, { "epoch": 0.4740898849756907, "grad_norm": 1.8634113149361473, "learning_rate": 4.8979563585289026e-05, "loss": 0.852, "step": 3998 }, { "epoch": 0.47420846673781575, "grad_norm": 1.3602374025023698, "learning_rate": 4.897888468230236e-05, "loss": 0.6197, "step": 3999 }, { "epoch": 0.47432704849994073, "grad_norm": 1.6569787616312923, "learning_rate": 4.897820555825978e-05, "loss": 0.6495, "step": 4000 }, { "epoch": 0.4744456302620657, "grad_norm": 1.9807303477404288, "learning_rate": 4.897752621316756e-05, "loss": 0.7887, "step": 4001 }, { "epoch": 0.4745642120241907, "grad_norm": 1.2689286980761965, "learning_rate": 4.8976846647031935e-05, "loss": 0.3509, "step": 4002 }, { "epoch": 0.47468279378631567, "grad_norm": 1.4730594431095392, "learning_rate": 4.8976166859859195e-05, "loss": 0.5824, "step": 4003 }, { "epoch": 0.47480137554844065, "grad_norm": 1.443382242028131, "learning_rate": 4.89754868516556e-05, "loss": 0.6988, "step": 4004 }, { "epoch": 0.47491995731056563, "grad_norm": 1.4456194629572676, "learning_rate": 4.897480662242742e-05, "loss": 0.6312, "step": 4005 }, { "epoch": 0.4750385390726906, "grad_norm": 1.7066445885245023, "learning_rate": 4.8974126172180914e-05, "loss": 0.7714, "step": 4006 }, { "epoch": 0.4751571208348156, "grad_norm": 1.7300554750289905, "learning_rate": 4.897344550092237e-05, "loss": 0.8768, "step": 4007 }, { "epoch": 0.4752757025969406, "grad_norm": 1.6961087913619637, "learning_rate": 4.897276460865805e-05, "loss": 0.7856, "step": 4008 }, { "epoch": 0.47539428435906556, "grad_norm": 1.3678737900741764, "learning_rate": 4.8972083495394245e-05, "loss": 0.5315, "step": 4009 }, { "epoch": 0.47551286612119054, "grad_norm": 1.5793692209423693, "learning_rate": 4.8971402161137226e-05, "loss": 0.6267, "step": 4010 }, { "epoch": 0.4756314478833156, "grad_norm": 1.588296401621017, "learning_rate": 4.8970720605893275e-05, "loss": 0.7175, "step": 4011 }, { "epoch": 0.47575002964544055, "grad_norm": 1.5988067220892, "learning_rate": 4.897003882966866e-05, "loss": 0.6675, "step": 4012 }, { "epoch": 0.47586861140756553, "grad_norm": 1.397453887214475, "learning_rate": 4.89693568324697e-05, "loss": 0.6756, "step": 4013 }, { "epoch": 0.4759871931696905, "grad_norm": 1.8477954354125279, "learning_rate": 4.8968674614302656e-05, "loss": 0.8347, "step": 4014 }, { "epoch": 0.4761057749318155, "grad_norm": 1.4740456338380772, "learning_rate": 4.896799217517383e-05, "loss": 0.7265, "step": 4015 }, { "epoch": 0.4762243566939405, "grad_norm": 1.4444323844621767, "learning_rate": 4.8967309515089496e-05, "loss": 0.7475, "step": 4016 }, { "epoch": 0.47634293845606546, "grad_norm": 1.5344883361660566, "learning_rate": 4.896662663405597e-05, "loss": 0.654, "step": 4017 }, { "epoch": 0.47646152021819044, "grad_norm": 1.7910780444585894, "learning_rate": 4.896594353207953e-05, "loss": 0.6878, "step": 4018 }, { "epoch": 0.4765801019803154, "grad_norm": 1.6883597187225852, "learning_rate": 4.896526020916647e-05, "loss": 0.813, "step": 4019 }, { "epoch": 0.4766986837424404, "grad_norm": 1.2489307469669835, "learning_rate": 4.896457666532311e-05, "loss": 0.3902, "step": 4020 }, { "epoch": 0.4768172655045654, "grad_norm": 1.529896729236191, "learning_rate": 4.8963892900555745e-05, "loss": 0.6819, "step": 4021 }, { "epoch": 0.47693584726669036, "grad_norm": 1.4559016496272676, "learning_rate": 4.896320891487067e-05, "loss": 0.508, "step": 4022 }, { "epoch": 0.47705442902881534, "grad_norm": 1.1620507816987244, "learning_rate": 4.8962524708274195e-05, "loss": 0.5299, "step": 4023 }, { "epoch": 0.4771730107909404, "grad_norm": 1.4597397148806852, "learning_rate": 4.896184028077263e-05, "loss": 0.5113, "step": 4024 }, { "epoch": 0.47729159255306536, "grad_norm": 2.019835618225478, "learning_rate": 4.896115563237227e-05, "loss": 0.8913, "step": 4025 }, { "epoch": 0.47741017431519034, "grad_norm": 1.7249381078122055, "learning_rate": 4.8960470763079444e-05, "loss": 0.6928, "step": 4026 }, { "epoch": 0.4775287560773153, "grad_norm": 1.452990692402297, "learning_rate": 4.895978567290047e-05, "loss": 0.5525, "step": 4027 }, { "epoch": 0.4776473378394403, "grad_norm": 1.3474797999461565, "learning_rate": 4.8959100361841643e-05, "loss": 0.8029, "step": 4028 }, { "epoch": 0.4777659196015653, "grad_norm": 1.7018900648720616, "learning_rate": 4.8958414829909296e-05, "loss": 0.7859, "step": 4029 }, { "epoch": 0.47788450136369026, "grad_norm": 1.6926426370638796, "learning_rate": 4.895772907710974e-05, "loss": 0.6492, "step": 4030 }, { "epoch": 0.47800308312581524, "grad_norm": 1.7433949304626866, "learning_rate": 4.8957043103449296e-05, "loss": 0.4681, "step": 4031 }, { "epoch": 0.4781216648879402, "grad_norm": 1.477728800187049, "learning_rate": 4.89563569089343e-05, "loss": 0.7129, "step": 4032 }, { "epoch": 0.4782402466500652, "grad_norm": 1.3005606221716723, "learning_rate": 4.895567049357107e-05, "loss": 0.6076, "step": 4033 }, { "epoch": 0.4783588284121902, "grad_norm": 1.5038771543549763, "learning_rate": 4.895498385736593e-05, "loss": 0.734, "step": 4034 }, { "epoch": 0.47847741017431517, "grad_norm": 1.6848296533218543, "learning_rate": 4.8954297000325225e-05, "loss": 0.8285, "step": 4035 }, { "epoch": 0.4785959919364402, "grad_norm": 1.6794823041534357, "learning_rate": 4.895360992245527e-05, "loss": 0.6397, "step": 4036 }, { "epoch": 0.4787145736985652, "grad_norm": 1.7476007210168303, "learning_rate": 4.89529226237624e-05, "loss": 0.7239, "step": 4037 }, { "epoch": 0.47883315546069016, "grad_norm": 1.5703064005891074, "learning_rate": 4.895223510425296e-05, "loss": 0.6421, "step": 4038 }, { "epoch": 0.47895173722281514, "grad_norm": 1.87443774142814, "learning_rate": 4.895154736393329e-05, "loss": 0.9704, "step": 4039 }, { "epoch": 0.4790703189849401, "grad_norm": 1.5653107126365349, "learning_rate": 4.8950859402809724e-05, "loss": 0.6423, "step": 4040 }, { "epoch": 0.4791889007470651, "grad_norm": 1.617704406298519, "learning_rate": 4.89501712208886e-05, "loss": 0.6999, "step": 4041 }, { "epoch": 0.4793074825091901, "grad_norm": 1.5171819855324005, "learning_rate": 4.894948281817626e-05, "loss": 0.8446, "step": 4042 }, { "epoch": 0.47942606427131507, "grad_norm": 1.3941988529859095, "learning_rate": 4.894879419467907e-05, "loss": 0.5789, "step": 4043 }, { "epoch": 0.47954464603344005, "grad_norm": 1.298817209928704, "learning_rate": 4.894810535040336e-05, "loss": 0.4436, "step": 4044 }, { "epoch": 0.47966322779556503, "grad_norm": 1.671129374601477, "learning_rate": 4.894741628535549e-05, "loss": 0.6212, "step": 4045 }, { "epoch": 0.47978180955769, "grad_norm": 1.6626633351931168, "learning_rate": 4.89467269995418e-05, "loss": 0.6574, "step": 4046 }, { "epoch": 0.479900391319815, "grad_norm": 1.7987262732440326, "learning_rate": 4.894603749296866e-05, "loss": 0.6805, "step": 4047 }, { "epoch": 0.48001897308193997, "grad_norm": 1.8410941049156422, "learning_rate": 4.8945347765642414e-05, "loss": 0.7511, "step": 4048 }, { "epoch": 0.480137554844065, "grad_norm": 1.7156379650873421, "learning_rate": 4.894465781756943e-05, "loss": 0.5072, "step": 4049 }, { "epoch": 0.48025613660619, "grad_norm": 1.4723610685674535, "learning_rate": 4.894396764875606e-05, "loss": 0.588, "step": 4050 }, { "epoch": 0.48037471836831497, "grad_norm": 1.6741522949163534, "learning_rate": 4.894327725920868e-05, "loss": 0.8677, "step": 4051 }, { "epoch": 0.48049330013043995, "grad_norm": 1.4864454634420534, "learning_rate": 4.894258664893363e-05, "loss": 0.7783, "step": 4052 }, { "epoch": 0.48061188189256493, "grad_norm": 1.6999465145214452, "learning_rate": 4.89418958179373e-05, "loss": 0.811, "step": 4053 }, { "epoch": 0.4807304636546899, "grad_norm": 1.516233615843892, "learning_rate": 4.894120476622605e-05, "loss": 0.4956, "step": 4054 }, { "epoch": 0.4808490454168149, "grad_norm": 1.5693543010984206, "learning_rate": 4.894051349380624e-05, "loss": 0.6724, "step": 4055 }, { "epoch": 0.48096762717893987, "grad_norm": 1.6279231489096926, "learning_rate": 4.893982200068426e-05, "loss": 0.8112, "step": 4056 }, { "epoch": 0.48108620894106485, "grad_norm": 1.706148097966865, "learning_rate": 4.893913028686649e-05, "loss": 0.5358, "step": 4057 }, { "epoch": 0.48120479070318983, "grad_norm": 1.1372754006512094, "learning_rate": 4.893843835235928e-05, "loss": 0.4509, "step": 4058 }, { "epoch": 0.4813233724653148, "grad_norm": 1.6060460644852042, "learning_rate": 4.8937746197169026e-05, "loss": 0.5663, "step": 4059 }, { "epoch": 0.4814419542274398, "grad_norm": 1.8041408234356868, "learning_rate": 4.893705382130211e-05, "loss": 0.711, "step": 4060 }, { "epoch": 0.48156053598956483, "grad_norm": 1.5240559669729792, "learning_rate": 4.893636122476491e-05, "loss": 0.8108, "step": 4061 }, { "epoch": 0.4816791177516898, "grad_norm": 1.4873789488663596, "learning_rate": 4.893566840756382e-05, "loss": 0.7501, "step": 4062 }, { "epoch": 0.4817976995138148, "grad_norm": 1.5210579920654985, "learning_rate": 4.8934975369705206e-05, "loss": 0.5496, "step": 4063 }, { "epoch": 0.4819162812759398, "grad_norm": 1.4447483588927672, "learning_rate": 4.893428211119547e-05, "loss": 0.553, "step": 4064 }, { "epoch": 0.48203486303806475, "grad_norm": 1.6740466848860047, "learning_rate": 4.893358863204102e-05, "loss": 0.7344, "step": 4065 }, { "epoch": 0.48215344480018973, "grad_norm": 1.6292526941699856, "learning_rate": 4.893289493224821e-05, "loss": 0.5869, "step": 4066 }, { "epoch": 0.4822720265623147, "grad_norm": 1.6136912403114538, "learning_rate": 4.893220101182348e-05, "loss": 0.8246, "step": 4067 }, { "epoch": 0.4823906083244397, "grad_norm": 1.6158382972926673, "learning_rate": 4.893150687077319e-05, "loss": 0.5992, "step": 4068 }, { "epoch": 0.4825091900865647, "grad_norm": 1.546305554071709, "learning_rate": 4.8930812509103754e-05, "loss": 0.7114, "step": 4069 }, { "epoch": 0.48262777184868966, "grad_norm": 1.6256671101198086, "learning_rate": 4.8930117926821575e-05, "loss": 0.6918, "step": 4070 }, { "epoch": 0.48274635361081464, "grad_norm": 1.537034421437289, "learning_rate": 4.892942312393305e-05, "loss": 0.6889, "step": 4071 }, { "epoch": 0.4828649353729396, "grad_norm": 1.2500742039316979, "learning_rate": 4.892872810044459e-05, "loss": 0.5916, "step": 4072 }, { "epoch": 0.4829835171350646, "grad_norm": 1.6405389578015364, "learning_rate": 4.8928032856362606e-05, "loss": 0.6848, "step": 4073 }, { "epoch": 0.48310209889718964, "grad_norm": 1.6960840410535822, "learning_rate": 4.8927337391693496e-05, "loss": 0.7937, "step": 4074 }, { "epoch": 0.4832206806593146, "grad_norm": 1.9319655568202767, "learning_rate": 4.892664170644368e-05, "loss": 0.6337, "step": 4075 }, { "epoch": 0.4833392624214396, "grad_norm": 1.4384555355081734, "learning_rate": 4.892594580061956e-05, "loss": 0.5111, "step": 4076 }, { "epoch": 0.4834578441835646, "grad_norm": 1.39379814598019, "learning_rate": 4.892524967422757e-05, "loss": 0.5648, "step": 4077 }, { "epoch": 0.48357642594568956, "grad_norm": 1.7211400203628826, "learning_rate": 4.892455332727411e-05, "loss": 0.5459, "step": 4078 }, { "epoch": 0.48369500770781454, "grad_norm": 1.6520252665648292, "learning_rate": 4.892385675976561e-05, "loss": 0.7197, "step": 4079 }, { "epoch": 0.4838135894699395, "grad_norm": 1.5109557819232557, "learning_rate": 4.8923159971708486e-05, "loss": 0.7451, "step": 4080 }, { "epoch": 0.4839321712320645, "grad_norm": 1.6118726597728827, "learning_rate": 4.8922462963109164e-05, "loss": 0.7617, "step": 4081 }, { "epoch": 0.4840507529941895, "grad_norm": 1.7294407075149008, "learning_rate": 4.892176573397407e-05, "loss": 0.9104, "step": 4082 }, { "epoch": 0.48416933475631446, "grad_norm": 1.6104780839581314, "learning_rate": 4.892106828430963e-05, "loss": 0.6309, "step": 4083 }, { "epoch": 0.48428791651843944, "grad_norm": 1.8083183201491937, "learning_rate": 4.8920370614122276e-05, "loss": 0.8299, "step": 4084 }, { "epoch": 0.4844064982805644, "grad_norm": 1.4528208733084937, "learning_rate": 4.8919672723418445e-05, "loss": 0.6245, "step": 4085 }, { "epoch": 0.48452508004268946, "grad_norm": 1.265037789273784, "learning_rate": 4.891897461220455e-05, "loss": 0.5078, "step": 4086 }, { "epoch": 0.48464366180481444, "grad_norm": 1.7523662108839682, "learning_rate": 4.891827628048705e-05, "loss": 0.8144, "step": 4087 }, { "epoch": 0.4847622435669394, "grad_norm": 1.940696520113745, "learning_rate": 4.8917577728272366e-05, "loss": 0.9523, "step": 4088 }, { "epoch": 0.4848808253290644, "grad_norm": 1.4980767489192377, "learning_rate": 4.8916878955566944e-05, "loss": 0.611, "step": 4089 }, { "epoch": 0.4849994070911894, "grad_norm": 1.8046204333877207, "learning_rate": 4.891617996237724e-05, "loss": 0.7514, "step": 4090 }, { "epoch": 0.48511798885331436, "grad_norm": 1.32352084124772, "learning_rate": 4.8915480748709676e-05, "loss": 0.6854, "step": 4091 }, { "epoch": 0.48523657061543934, "grad_norm": 1.5245406008052478, "learning_rate": 4.89147813145707e-05, "loss": 0.7774, "step": 4092 }, { "epoch": 0.4853551523775643, "grad_norm": 1.5098346099708075, "learning_rate": 4.891408165996678e-05, "loss": 0.8574, "step": 4093 }, { "epoch": 0.4854737341396893, "grad_norm": 1.4580324273644576, "learning_rate": 4.891338178490435e-05, "loss": 0.5788, "step": 4094 }, { "epoch": 0.4855923159018143, "grad_norm": 1.6065984179723092, "learning_rate": 4.891268168938985e-05, "loss": 0.793, "step": 4095 }, { "epoch": 0.48571089766393927, "grad_norm": 1.3341504926731984, "learning_rate": 4.891198137342976e-05, "loss": 0.5563, "step": 4096 }, { "epoch": 0.48582947942606425, "grad_norm": 1.4323232108624768, "learning_rate": 4.8911280837030524e-05, "loss": 0.5846, "step": 4097 }, { "epoch": 0.4859480611881893, "grad_norm": 1.7164217240185693, "learning_rate": 4.8910580080198595e-05, "loss": 0.5719, "step": 4098 }, { "epoch": 0.48606664295031426, "grad_norm": 1.5967946442701428, "learning_rate": 4.890987910294045e-05, "loss": 0.6094, "step": 4099 }, { "epoch": 0.48618522471243925, "grad_norm": 1.4920737327854732, "learning_rate": 4.890917790526254e-05, "loss": 0.7785, "step": 4100 }, { "epoch": 0.4863038064745642, "grad_norm": 1.5188401832717104, "learning_rate": 4.890847648717132e-05, "loss": 0.8239, "step": 4101 }, { "epoch": 0.4864223882366892, "grad_norm": 1.5397230860448081, "learning_rate": 4.8907774848673266e-05, "loss": 0.5947, "step": 4102 }, { "epoch": 0.4865409699988142, "grad_norm": 1.4253714923762693, "learning_rate": 4.8907072989774846e-05, "loss": 0.5785, "step": 4103 }, { "epoch": 0.48665955176093917, "grad_norm": 1.4849977100756124, "learning_rate": 4.890637091048253e-05, "loss": 0.554, "step": 4104 }, { "epoch": 0.48677813352306415, "grad_norm": 2.130122589854132, "learning_rate": 4.890566861080279e-05, "loss": 0.8115, "step": 4105 }, { "epoch": 0.48689671528518913, "grad_norm": 1.5268531439250852, "learning_rate": 4.8904966090742105e-05, "loss": 0.6931, "step": 4106 }, { "epoch": 0.4870152970473141, "grad_norm": 1.7458415197480863, "learning_rate": 4.8904263350306946e-05, "loss": 0.6334, "step": 4107 }, { "epoch": 0.4871338788094391, "grad_norm": 1.5900275340024916, "learning_rate": 4.8903560389503785e-05, "loss": 0.701, "step": 4108 }, { "epoch": 0.48725246057156407, "grad_norm": 1.5468873138125376, "learning_rate": 4.8902857208339115e-05, "loss": 0.6714, "step": 4109 }, { "epoch": 0.48737104233368905, "grad_norm": 1.6606342853677105, "learning_rate": 4.890215380681942e-05, "loss": 0.6874, "step": 4110 }, { "epoch": 0.4874896240958141, "grad_norm": 1.5581855374473612, "learning_rate": 4.890145018495117e-05, "loss": 0.7354, "step": 4111 }, { "epoch": 0.48760820585793907, "grad_norm": 2.102185665828938, "learning_rate": 4.8900746342740854e-05, "loss": 0.9245, "step": 4112 }, { "epoch": 0.48772678762006405, "grad_norm": 1.6588631539365102, "learning_rate": 4.890004228019497e-05, "loss": 0.6962, "step": 4113 }, { "epoch": 0.48784536938218903, "grad_norm": 1.6970211134979376, "learning_rate": 4.889933799732001e-05, "loss": 0.8241, "step": 4114 }, { "epoch": 0.487963951144314, "grad_norm": 1.6667289133910457, "learning_rate": 4.889863349412246e-05, "loss": 0.6617, "step": 4115 }, { "epoch": 0.488082532906439, "grad_norm": 1.253757782531047, "learning_rate": 4.889792877060881e-05, "loss": 0.5734, "step": 4116 }, { "epoch": 0.488201114668564, "grad_norm": 1.55631041443559, "learning_rate": 4.8897223826785566e-05, "loss": 0.6085, "step": 4117 }, { "epoch": 0.48831969643068895, "grad_norm": 1.6873564117248379, "learning_rate": 4.8896518662659216e-05, "loss": 0.669, "step": 4118 }, { "epoch": 0.48843827819281393, "grad_norm": 2.4422979425445517, "learning_rate": 4.889581327823628e-05, "loss": 0.7793, "step": 4119 }, { "epoch": 0.4885568599549389, "grad_norm": 1.503631848742357, "learning_rate": 4.889510767352325e-05, "loss": 0.7079, "step": 4120 }, { "epoch": 0.4886754417170639, "grad_norm": 1.4481281580581187, "learning_rate": 4.889440184852661e-05, "loss": 0.6951, "step": 4121 }, { "epoch": 0.4887940234791889, "grad_norm": 1.3778356376513048, "learning_rate": 4.8893695803252906e-05, "loss": 0.5633, "step": 4122 }, { "epoch": 0.4889126052413139, "grad_norm": 1.6849604465025894, "learning_rate": 4.889298953770861e-05, "loss": 0.7162, "step": 4123 }, { "epoch": 0.4890311870034389, "grad_norm": 1.5255935518260273, "learning_rate": 4.889228305190026e-05, "loss": 0.6022, "step": 4124 }, { "epoch": 0.4891497687655639, "grad_norm": 1.4820373294779996, "learning_rate": 4.8891576345834356e-05, "loss": 0.7192, "step": 4125 }, { "epoch": 0.48926835052768886, "grad_norm": 1.8751054113127694, "learning_rate": 4.889086941951742e-05, "loss": 0.8899, "step": 4126 }, { "epoch": 0.48938693228981384, "grad_norm": 1.24773879548174, "learning_rate": 4.8890162272955965e-05, "loss": 0.5906, "step": 4127 }, { "epoch": 0.4895055140519388, "grad_norm": 1.704567914020501, "learning_rate": 4.8889454906156505e-05, "loss": 0.6906, "step": 4128 }, { "epoch": 0.4896240958140638, "grad_norm": 2.0079580171076024, "learning_rate": 4.888874731912557e-05, "loss": 0.7996, "step": 4129 }, { "epoch": 0.4897426775761888, "grad_norm": 1.805758434603456, "learning_rate": 4.888803951186968e-05, "loss": 0.8334, "step": 4130 }, { "epoch": 0.48986125933831376, "grad_norm": 1.6825341754045586, "learning_rate": 4.888733148439535e-05, "loss": 0.5857, "step": 4131 }, { "epoch": 0.48997984110043874, "grad_norm": 1.5677698233854147, "learning_rate": 4.888662323670913e-05, "loss": 0.4552, "step": 4132 }, { "epoch": 0.4900984228625637, "grad_norm": 1.6767162847898258, "learning_rate": 4.888591476881752e-05, "loss": 0.773, "step": 4133 }, { "epoch": 0.4902170046246887, "grad_norm": 1.5594819633686676, "learning_rate": 4.888520608072707e-05, "loss": 0.7898, "step": 4134 }, { "epoch": 0.4903355863868137, "grad_norm": 1.5421139748801498, "learning_rate": 4.888449717244432e-05, "loss": 0.4921, "step": 4135 }, { "epoch": 0.4904541681489387, "grad_norm": 1.34077188423937, "learning_rate": 4.88837880439758e-05, "loss": 0.4277, "step": 4136 }, { "epoch": 0.4905727499110637, "grad_norm": 1.561133768175664, "learning_rate": 4.8883078695328024e-05, "loss": 0.8, "step": 4137 }, { "epoch": 0.4906913316731887, "grad_norm": 1.6404591085136184, "learning_rate": 4.888236912650756e-05, "loss": 0.6402, "step": 4138 }, { "epoch": 0.49080991343531366, "grad_norm": 1.417770694364946, "learning_rate": 4.8881659337520936e-05, "loss": 0.6099, "step": 4139 }, { "epoch": 0.49092849519743864, "grad_norm": 1.6909666240490429, "learning_rate": 4.8880949328374706e-05, "loss": 0.5555, "step": 4140 }, { "epoch": 0.4910470769595636, "grad_norm": 1.245433334589589, "learning_rate": 4.88802390990754e-05, "loss": 0.5044, "step": 4141 }, { "epoch": 0.4911656587216886, "grad_norm": 1.8321746367368652, "learning_rate": 4.8879528649629576e-05, "loss": 0.9042, "step": 4142 }, { "epoch": 0.4912842404838136, "grad_norm": 1.305741089603041, "learning_rate": 4.887881798004378e-05, "loss": 0.4757, "step": 4143 }, { "epoch": 0.49140282224593856, "grad_norm": 1.3943127188450002, "learning_rate": 4.8878107090324566e-05, "loss": 0.5234, "step": 4144 }, { "epoch": 0.49152140400806354, "grad_norm": 1.6508612661443025, "learning_rate": 4.887739598047848e-05, "loss": 0.6387, "step": 4145 }, { "epoch": 0.4916399857701885, "grad_norm": 2.0678054677163806, "learning_rate": 4.887668465051209e-05, "loss": 0.9759, "step": 4146 }, { "epoch": 0.4917585675323135, "grad_norm": 1.5928100850259528, "learning_rate": 4.887597310043194e-05, "loss": 0.6206, "step": 4147 }, { "epoch": 0.49187714929443854, "grad_norm": 1.8251664810641781, "learning_rate": 4.88752613302446e-05, "loss": 0.8042, "step": 4148 }, { "epoch": 0.4919957310565635, "grad_norm": 1.699626363261998, "learning_rate": 4.8874549339956634e-05, "loss": 0.5784, "step": 4149 }, { "epoch": 0.4921143128186885, "grad_norm": 1.7501436979317118, "learning_rate": 4.8873837129574595e-05, "loss": 0.6352, "step": 4150 }, { "epoch": 0.4922328945808135, "grad_norm": 1.3134200765468103, "learning_rate": 4.887312469910506e-05, "loss": 0.517, "step": 4151 }, { "epoch": 0.49235147634293847, "grad_norm": 1.4403613011533414, "learning_rate": 4.887241204855458e-05, "loss": 0.753, "step": 4152 }, { "epoch": 0.49247005810506345, "grad_norm": 1.5750643229769377, "learning_rate": 4.887169917792974e-05, "loss": 0.7375, "step": 4153 }, { "epoch": 0.4925886398671884, "grad_norm": 1.7407713983302688, "learning_rate": 4.887098608723711e-05, "loss": 0.7501, "step": 4154 }, { "epoch": 0.4927072216293134, "grad_norm": 1.2546618616730723, "learning_rate": 4.8870272776483263e-05, "loss": 0.559, "step": 4155 }, { "epoch": 0.4928258033914384, "grad_norm": 1.5695898163434259, "learning_rate": 4.886955924567476e-05, "loss": 0.6674, "step": 4156 }, { "epoch": 0.49294438515356337, "grad_norm": 1.3865669006483519, "learning_rate": 4.886884549481821e-05, "loss": 0.5162, "step": 4157 }, { "epoch": 0.49306296691568835, "grad_norm": 1.5574696071485596, "learning_rate": 4.886813152392016e-05, "loss": 0.5564, "step": 4158 }, { "epoch": 0.49318154867781333, "grad_norm": 1.50565383158194, "learning_rate": 4.886741733298721e-05, "loss": 0.6875, "step": 4159 }, { "epoch": 0.4933001304399383, "grad_norm": 1.174068859498064, "learning_rate": 4.8866702922025934e-05, "loss": 0.4053, "step": 4160 }, { "epoch": 0.49341871220206335, "grad_norm": 1.477747394461469, "learning_rate": 4.886598829104293e-05, "loss": 0.5704, "step": 4161 }, { "epoch": 0.49353729396418833, "grad_norm": 1.6593838599138053, "learning_rate": 4.8865273440044784e-05, "loss": 0.6486, "step": 4162 }, { "epoch": 0.4936558757263133, "grad_norm": 1.257871526941035, "learning_rate": 4.8864558369038084e-05, "loss": 0.6281, "step": 4163 }, { "epoch": 0.4937744574884383, "grad_norm": 1.4212857980846925, "learning_rate": 4.8863843078029416e-05, "loss": 0.3895, "step": 4164 }, { "epoch": 0.49389303925056327, "grad_norm": 1.6167045505101534, "learning_rate": 4.886312756702537e-05, "loss": 0.6028, "step": 4165 }, { "epoch": 0.49401162101268825, "grad_norm": 1.5976736478926108, "learning_rate": 4.886241183603256e-05, "loss": 0.5893, "step": 4166 }, { "epoch": 0.49413020277481323, "grad_norm": 1.8155228437003454, "learning_rate": 4.886169588505758e-05, "loss": 0.7451, "step": 4167 }, { "epoch": 0.4942487845369382, "grad_norm": 1.8801691852585987, "learning_rate": 4.886097971410701e-05, "loss": 0.588, "step": 4168 }, { "epoch": 0.4943673662990632, "grad_norm": 1.897293570669478, "learning_rate": 4.8860263323187484e-05, "loss": 0.6171, "step": 4169 }, { "epoch": 0.4944859480611882, "grad_norm": 2.014350674090454, "learning_rate": 4.885954671230558e-05, "loss": 0.838, "step": 4170 }, { "epoch": 0.49460452982331315, "grad_norm": 1.4582943101312837, "learning_rate": 4.885882988146791e-05, "loss": 0.6183, "step": 4171 }, { "epoch": 0.49472311158543814, "grad_norm": 1.534112061742086, "learning_rate": 4.8858112830681103e-05, "loss": 0.5194, "step": 4172 }, { "epoch": 0.49484169334756317, "grad_norm": 1.477448897460018, "learning_rate": 4.885739555995174e-05, "loss": 0.4589, "step": 4173 }, { "epoch": 0.49496027510968815, "grad_norm": 1.6422734516772388, "learning_rate": 4.8856678069286444e-05, "loss": 0.657, "step": 4174 }, { "epoch": 0.49507885687181313, "grad_norm": 1.4317073687155395, "learning_rate": 4.885596035869184e-05, "loss": 0.511, "step": 4175 }, { "epoch": 0.4951974386339381, "grad_norm": 1.1763927505840408, "learning_rate": 4.885524242817453e-05, "loss": 0.4551, "step": 4176 }, { "epoch": 0.4953160203960631, "grad_norm": 1.411962298666528, "learning_rate": 4.885452427774114e-05, "loss": 0.7799, "step": 4177 }, { "epoch": 0.4954346021581881, "grad_norm": 1.5564797788444686, "learning_rate": 4.8853805907398285e-05, "loss": 0.7688, "step": 4178 }, { "epoch": 0.49555318392031306, "grad_norm": 1.5399659255151243, "learning_rate": 4.885308731715259e-05, "loss": 0.5685, "step": 4179 }, { "epoch": 0.49567176568243804, "grad_norm": 1.4136804299191332, "learning_rate": 4.885236850701068e-05, "loss": 0.5345, "step": 4180 }, { "epoch": 0.495790347444563, "grad_norm": 1.6974687606245338, "learning_rate": 4.8851649476979186e-05, "loss": 0.7021, "step": 4181 }, { "epoch": 0.495908929206688, "grad_norm": 1.479164042532574, "learning_rate": 4.885093022706474e-05, "loss": 0.6877, "step": 4182 }, { "epoch": 0.496027510968813, "grad_norm": 1.7268811040623062, "learning_rate": 4.885021075727395e-05, "loss": 0.7108, "step": 4183 }, { "epoch": 0.49614609273093796, "grad_norm": 1.7785163771219035, "learning_rate": 4.884949106761347e-05, "loss": 0.8364, "step": 4184 }, { "epoch": 0.49626467449306294, "grad_norm": 1.6849634177784654, "learning_rate": 4.884877115808994e-05, "loss": 0.8014, "step": 4185 }, { "epoch": 0.496383256255188, "grad_norm": 1.5578554150730428, "learning_rate": 4.884805102870997e-05, "loss": 0.789, "step": 4186 }, { "epoch": 0.49650183801731296, "grad_norm": 1.1898274144184005, "learning_rate": 4.884733067948022e-05, "loss": 0.3891, "step": 4187 }, { "epoch": 0.49662041977943794, "grad_norm": 1.6693449916857581, "learning_rate": 4.884661011040732e-05, "loss": 0.7627, "step": 4188 }, { "epoch": 0.4967390015415629, "grad_norm": 1.5523424827098533, "learning_rate": 4.8845889321497924e-05, "loss": 0.6997, "step": 4189 }, { "epoch": 0.4968575833036879, "grad_norm": 1.3891164936677896, "learning_rate": 4.8845168312758666e-05, "loss": 0.5352, "step": 4190 }, { "epoch": 0.4969761650658129, "grad_norm": 1.6715473176601217, "learning_rate": 4.88444470841962e-05, "loss": 0.7345, "step": 4191 }, { "epoch": 0.49709474682793786, "grad_norm": 1.7407339993306394, "learning_rate": 4.884372563581717e-05, "loss": 0.8162, "step": 4192 }, { "epoch": 0.49721332859006284, "grad_norm": 1.4784768258988155, "learning_rate": 4.884300396762823e-05, "loss": 0.5728, "step": 4193 }, { "epoch": 0.4973319103521878, "grad_norm": 1.489602126228985, "learning_rate": 4.884228207963603e-05, "loss": 0.6675, "step": 4194 }, { "epoch": 0.4974504921143128, "grad_norm": 1.6384792962331787, "learning_rate": 4.884155997184722e-05, "loss": 0.7756, "step": 4195 }, { "epoch": 0.4975690738764378, "grad_norm": 1.861307279641469, "learning_rate": 4.8840837644268475e-05, "loss": 0.6619, "step": 4196 }, { "epoch": 0.49768765563856276, "grad_norm": 1.473318276156382, "learning_rate": 4.884011509690644e-05, "loss": 0.6956, "step": 4197 }, { "epoch": 0.4978062374006878, "grad_norm": 1.061196047377696, "learning_rate": 4.883939232976777e-05, "loss": 0.371, "step": 4198 }, { "epoch": 0.4979248191628128, "grad_norm": 1.6278635107735988, "learning_rate": 4.883866934285914e-05, "loss": 0.754, "step": 4199 }, { "epoch": 0.49804340092493776, "grad_norm": 1.5076234190528521, "learning_rate": 4.883794613618722e-05, "loss": 0.7768, "step": 4200 }, { "epoch": 0.49816198268706274, "grad_norm": 1.872306013792876, "learning_rate": 4.883722270975867e-05, "loss": 0.9127, "step": 4201 }, { "epoch": 0.4982805644491877, "grad_norm": 1.4074893571222526, "learning_rate": 4.8836499063580146e-05, "loss": 0.5926, "step": 4202 }, { "epoch": 0.4983991462113127, "grad_norm": 1.5216243608245041, "learning_rate": 4.883577519765833e-05, "loss": 0.7436, "step": 4203 }, { "epoch": 0.4985177279734377, "grad_norm": 1.4405062317053228, "learning_rate": 4.883505111199991e-05, "loss": 0.7063, "step": 4204 }, { "epoch": 0.49863630973556267, "grad_norm": 1.779269922314004, "learning_rate": 4.883432680661154e-05, "loss": 0.8743, "step": 4205 }, { "epoch": 0.49875489149768765, "grad_norm": 1.4789425910200547, "learning_rate": 4.88336022814999e-05, "loss": 0.7553, "step": 4206 }, { "epoch": 0.4988734732598126, "grad_norm": 1.7794041384353698, "learning_rate": 4.883287753667168e-05, "loss": 0.619, "step": 4207 }, { "epoch": 0.4989920550219376, "grad_norm": 1.5933419171339571, "learning_rate": 4.883215257213355e-05, "loss": 0.7724, "step": 4208 }, { "epoch": 0.4991106367840626, "grad_norm": 1.4844177478767082, "learning_rate": 4.88314273878922e-05, "loss": 0.5486, "step": 4209 }, { "epoch": 0.49922921854618757, "grad_norm": 1.1878547377186868, "learning_rate": 4.8830701983954315e-05, "loss": 0.5809, "step": 4210 }, { "epoch": 0.4993478003083126, "grad_norm": 1.3873467405899866, "learning_rate": 4.882997636032658e-05, "loss": 0.702, "step": 4211 }, { "epoch": 0.4994663820704376, "grad_norm": 1.6044905952323638, "learning_rate": 4.8829250517015684e-05, "loss": 0.7674, "step": 4212 }, { "epoch": 0.49958496383256257, "grad_norm": 1.2884582427232414, "learning_rate": 4.8828524454028316e-05, "loss": 0.5025, "step": 4213 }, { "epoch": 0.49970354559468755, "grad_norm": 1.4136532623261207, "learning_rate": 4.882779817137118e-05, "loss": 0.7417, "step": 4214 }, { "epoch": 0.49982212735681253, "grad_norm": 1.3912320284418225, "learning_rate": 4.882707166905096e-05, "loss": 0.5058, "step": 4215 }, { "epoch": 0.4999407091189375, "grad_norm": 1.3825235372558804, "learning_rate": 4.8826344947074366e-05, "loss": 0.6605, "step": 4216 }, { "epoch": 0.5000592908810625, "grad_norm": 1.3774562264883885, "learning_rate": 4.882561800544808e-05, "loss": 0.5235, "step": 4217 }, { "epoch": 0.5001778726431875, "grad_norm": 1.801641450892196, "learning_rate": 4.882489084417882e-05, "loss": 0.8543, "step": 4218 }, { "epoch": 0.5002964544053125, "grad_norm": 1.7233188633556076, "learning_rate": 4.882416346327328e-05, "loss": 0.7866, "step": 4219 }, { "epoch": 0.5004150361674374, "grad_norm": 1.5515120599469079, "learning_rate": 4.8823435862738164e-05, "loss": 0.8005, "step": 4220 }, { "epoch": 0.5005336179295624, "grad_norm": 1.5546421398652037, "learning_rate": 4.882270804258018e-05, "loss": 0.7, "step": 4221 }, { "epoch": 0.5006521996916874, "grad_norm": 1.3279820447450426, "learning_rate": 4.882198000280605e-05, "loss": 0.5414, "step": 4222 }, { "epoch": 0.5007707814538124, "grad_norm": 1.6117348888181129, "learning_rate": 4.882125174342247e-05, "loss": 0.7985, "step": 4223 }, { "epoch": 0.5008893632159374, "grad_norm": 1.3126769099056539, "learning_rate": 4.882052326443617e-05, "loss": 0.5891, "step": 4224 }, { "epoch": 0.5010079449780623, "grad_norm": 1.2497594271467203, "learning_rate": 4.881979456585385e-05, "loss": 0.6086, "step": 4225 }, { "epoch": 0.5011265267401873, "grad_norm": 1.4286193726464136, "learning_rate": 4.881906564768224e-05, "loss": 0.4473, "step": 4226 }, { "epoch": 0.5012451085023123, "grad_norm": 1.514045026920415, "learning_rate": 4.881833650992804e-05, "loss": 0.4986, "step": 4227 }, { "epoch": 0.5013636902644373, "grad_norm": 1.2983605260850395, "learning_rate": 4.8817607152598e-05, "loss": 0.5017, "step": 4228 }, { "epoch": 0.5014822720265624, "grad_norm": 1.5663459204185326, "learning_rate": 4.881687757569882e-05, "loss": 0.5535, "step": 4229 }, { "epoch": 0.5016008537886874, "grad_norm": 1.5590348755485475, "learning_rate": 4.8816147779237233e-05, "loss": 0.616, "step": 4230 }, { "epoch": 0.5017194355508123, "grad_norm": 1.6849064477247209, "learning_rate": 4.881541776321997e-05, "loss": 0.5921, "step": 4231 }, { "epoch": 0.5018380173129373, "grad_norm": 1.3703863095399684, "learning_rate": 4.881468752765377e-05, "loss": 0.5513, "step": 4232 }, { "epoch": 0.5019565990750623, "grad_norm": 1.7246021852827493, "learning_rate": 4.881395707254535e-05, "loss": 0.5612, "step": 4233 }, { "epoch": 0.5020751808371873, "grad_norm": 1.6816272680633655, "learning_rate": 4.8813226397901445e-05, "loss": 0.5198, "step": 4234 }, { "epoch": 0.5021937625993123, "grad_norm": 1.356148412104441, "learning_rate": 4.881249550372879e-05, "loss": 0.5843, "step": 4235 }, { "epoch": 0.5023123443614372, "grad_norm": 1.996944208667317, "learning_rate": 4.8811764390034136e-05, "loss": 0.9025, "step": 4236 }, { "epoch": 0.5024309261235622, "grad_norm": 1.6982715690074102, "learning_rate": 4.881103305682421e-05, "loss": 0.6618, "step": 4237 }, { "epoch": 0.5025495078856872, "grad_norm": 1.6151975918804486, "learning_rate": 4.881030150410576e-05, "loss": 0.7062, "step": 4238 }, { "epoch": 0.5026680896478122, "grad_norm": 1.8491658047589286, "learning_rate": 4.880956973188553e-05, "loss": 0.5378, "step": 4239 }, { "epoch": 0.5027866714099372, "grad_norm": 1.4313157141007176, "learning_rate": 4.8808837740170255e-05, "loss": 0.6601, "step": 4240 }, { "epoch": 0.5029052531720621, "grad_norm": 1.740632426834221, "learning_rate": 4.88081055289667e-05, "loss": 0.5644, "step": 4241 }, { "epoch": 0.5030238349341871, "grad_norm": 1.7504321603976716, "learning_rate": 4.880737309828161e-05, "loss": 0.8082, "step": 4242 }, { "epoch": 0.5031424166963121, "grad_norm": 1.4353924714650723, "learning_rate": 4.8806640448121734e-05, "loss": 0.5907, "step": 4243 }, { "epoch": 0.5032609984584371, "grad_norm": 1.6107280250245621, "learning_rate": 4.8805907578493815e-05, "loss": 0.6974, "step": 4244 }, { "epoch": 0.5033795802205621, "grad_norm": 1.41990326212491, "learning_rate": 4.880517448940463e-05, "loss": 0.4632, "step": 4245 }, { "epoch": 0.503498161982687, "grad_norm": 1.5788742408691216, "learning_rate": 4.880444118086093e-05, "loss": 0.6662, "step": 4246 }, { "epoch": 0.503616743744812, "grad_norm": 1.6118784305872118, "learning_rate": 4.880370765286947e-05, "loss": 0.5502, "step": 4247 }, { "epoch": 0.503735325506937, "grad_norm": 2.1054420277646564, "learning_rate": 4.8802973905437014e-05, "loss": 1.0403, "step": 4248 }, { "epoch": 0.503853907269062, "grad_norm": 1.4594871542109626, "learning_rate": 4.880223993857033e-05, "loss": 0.7616, "step": 4249 }, { "epoch": 0.503972489031187, "grad_norm": 1.462582950889475, "learning_rate": 4.880150575227619e-05, "loss": 0.6323, "step": 4250 }, { "epoch": 0.504091070793312, "grad_norm": 1.7575824499643078, "learning_rate": 4.8800771346561344e-05, "loss": 0.7085, "step": 4251 }, { "epoch": 0.5042096525554369, "grad_norm": 1.3885744676303227, "learning_rate": 4.8800036721432576e-05, "loss": 0.5506, "step": 4252 }, { "epoch": 0.5043282343175619, "grad_norm": 1.573913101757252, "learning_rate": 4.8799301876896654e-05, "loss": 0.5521, "step": 4253 }, { "epoch": 0.504446816079687, "grad_norm": 1.6100738796254352, "learning_rate": 4.879856681296035e-05, "loss": 0.7821, "step": 4254 }, { "epoch": 0.504565397841812, "grad_norm": 1.2580862955276049, "learning_rate": 4.8797831529630444e-05, "loss": 0.5561, "step": 4255 }, { "epoch": 0.504683979603937, "grad_norm": 1.2241080064586294, "learning_rate": 4.8797096026913716e-05, "loss": 0.4425, "step": 4256 }, { "epoch": 0.5048025613660619, "grad_norm": 1.7110876613388555, "learning_rate": 4.8796360304816946e-05, "loss": 0.6743, "step": 4257 }, { "epoch": 0.5049211431281869, "grad_norm": 1.357456952015221, "learning_rate": 4.8795624363346915e-05, "loss": 0.7113, "step": 4258 }, { "epoch": 0.5050397248903119, "grad_norm": 1.0427786329694098, "learning_rate": 4.879488820251041e-05, "loss": 0.4747, "step": 4259 }, { "epoch": 0.5051583066524369, "grad_norm": 1.1352901158948334, "learning_rate": 4.879415182231421e-05, "loss": 0.4775, "step": 4260 }, { "epoch": 0.5052768884145619, "grad_norm": 1.721441822370237, "learning_rate": 4.879341522276512e-05, "loss": 0.6987, "step": 4261 }, { "epoch": 0.5053954701766868, "grad_norm": 1.5464069681153891, "learning_rate": 4.87926784038699e-05, "loss": 0.5903, "step": 4262 }, { "epoch": 0.5055140519388118, "grad_norm": 1.5314737576847943, "learning_rate": 4.879194136563537e-05, "loss": 0.6618, "step": 4263 }, { "epoch": 0.5056326337009368, "grad_norm": 1.4400615373627441, "learning_rate": 4.8791204108068325e-05, "loss": 0.5703, "step": 4264 }, { "epoch": 0.5057512154630618, "grad_norm": 1.5214081666390176, "learning_rate": 4.879046663117554e-05, "loss": 0.6375, "step": 4265 }, { "epoch": 0.5058697972251868, "grad_norm": 1.4304312983777487, "learning_rate": 4.878972893496383e-05, "loss": 0.4897, "step": 4266 }, { "epoch": 0.5059883789873117, "grad_norm": 1.525342411241944, "learning_rate": 4.8788991019439994e-05, "loss": 0.4666, "step": 4267 }, { "epoch": 0.5061069607494367, "grad_norm": 1.6547747495172347, "learning_rate": 4.878825288461083e-05, "loss": 0.8045, "step": 4268 }, { "epoch": 0.5062255425115617, "grad_norm": 1.7061044359475626, "learning_rate": 4.8787514530483154e-05, "loss": 0.8173, "step": 4269 }, { "epoch": 0.5063441242736867, "grad_norm": 1.6459805379134642, "learning_rate": 4.878677595706376e-05, "loss": 0.6463, "step": 4270 }, { "epoch": 0.5064627060358117, "grad_norm": 1.5001298552477893, "learning_rate": 4.878603716435946e-05, "loss": 0.431, "step": 4271 }, { "epoch": 0.5065812877979367, "grad_norm": 1.5647085445165811, "learning_rate": 4.8785298152377056e-05, "loss": 0.7245, "step": 4272 }, { "epoch": 0.5066998695600616, "grad_norm": 2.014055464191367, "learning_rate": 4.878455892112338e-05, "loss": 0.6241, "step": 4273 }, { "epoch": 0.5068184513221866, "grad_norm": 1.6431332446322693, "learning_rate": 4.878381947060524e-05, "loss": 0.7089, "step": 4274 }, { "epoch": 0.5069370330843116, "grad_norm": 1.5592536658261191, "learning_rate": 4.878307980082945e-05, "loss": 0.6915, "step": 4275 }, { "epoch": 0.5070556148464366, "grad_norm": 1.3562566241530667, "learning_rate": 4.878233991180282e-05, "loss": 0.5154, "step": 4276 }, { "epoch": 0.5071741966085616, "grad_norm": 1.795781477147339, "learning_rate": 4.8781599803532186e-05, "loss": 0.6748, "step": 4277 }, { "epoch": 0.5072927783706865, "grad_norm": 1.2232488770553969, "learning_rate": 4.8780859476024365e-05, "loss": 0.4768, "step": 4278 }, { "epoch": 0.5074113601328116, "grad_norm": 1.5798784371533379, "learning_rate": 4.878011892928618e-05, "loss": 0.5479, "step": 4279 }, { "epoch": 0.5075299418949366, "grad_norm": 1.5229794354372765, "learning_rate": 4.877937816332446e-05, "loss": 0.6582, "step": 4280 }, { "epoch": 0.5076485236570616, "grad_norm": 1.5747052056397377, "learning_rate": 4.8778637178146034e-05, "loss": 0.7234, "step": 4281 }, { "epoch": 0.5077671054191866, "grad_norm": 1.6317492098254287, "learning_rate": 4.8777895973757735e-05, "loss": 0.6669, "step": 4282 }, { "epoch": 0.5078856871813116, "grad_norm": 1.4086505952961939, "learning_rate": 4.877715455016639e-05, "loss": 0.4567, "step": 4283 }, { "epoch": 0.5080042689434365, "grad_norm": 1.7872303841028734, "learning_rate": 4.877641290737884e-05, "loss": 0.7972, "step": 4284 }, { "epoch": 0.5081228507055615, "grad_norm": 1.839904312015274, "learning_rate": 4.8775671045401917e-05, "loss": 0.7262, "step": 4285 }, { "epoch": 0.5082414324676865, "grad_norm": 1.7398210483128675, "learning_rate": 4.877492896424247e-05, "loss": 0.9163, "step": 4286 }, { "epoch": 0.5083600142298115, "grad_norm": 1.2991097468156927, "learning_rate": 4.8774186663907324e-05, "loss": 0.454, "step": 4287 }, { "epoch": 0.5084785959919365, "grad_norm": 1.448745979056906, "learning_rate": 4.877344414440333e-05, "loss": 0.6695, "step": 4288 }, { "epoch": 0.5085971777540614, "grad_norm": 1.204904170374811, "learning_rate": 4.8772701405737344e-05, "loss": 0.4157, "step": 4289 }, { "epoch": 0.5087157595161864, "grad_norm": 1.7133064179963, "learning_rate": 4.877195844791619e-05, "loss": 0.7037, "step": 4290 }, { "epoch": 0.5088343412783114, "grad_norm": 1.9414203542059185, "learning_rate": 4.877121527094674e-05, "loss": 0.6867, "step": 4291 }, { "epoch": 0.5089529230404364, "grad_norm": 1.7554749595806534, "learning_rate": 4.877047187483583e-05, "loss": 0.6805, "step": 4292 }, { "epoch": 0.5090715048025614, "grad_norm": 1.3328127396353469, "learning_rate": 4.876972825959032e-05, "loss": 0.5189, "step": 4293 }, { "epoch": 0.5091900865646863, "grad_norm": 1.5364637849081937, "learning_rate": 4.876898442521707e-05, "loss": 0.5138, "step": 4294 }, { "epoch": 0.5093086683268113, "grad_norm": 1.67316819522203, "learning_rate": 4.876824037172292e-05, "loss": 0.7078, "step": 4295 }, { "epoch": 0.5094272500889363, "grad_norm": 1.8755695795416718, "learning_rate": 4.876749609911475e-05, "loss": 0.6817, "step": 4296 }, { "epoch": 0.5095458318510613, "grad_norm": 1.6341065759798845, "learning_rate": 4.8766751607399406e-05, "loss": 0.7713, "step": 4297 }, { "epoch": 0.5096644136131863, "grad_norm": 1.9452685899135145, "learning_rate": 4.876600689658376e-05, "loss": 0.7185, "step": 4298 }, { "epoch": 0.5097829953753112, "grad_norm": 1.5140360276676623, "learning_rate": 4.876526196667467e-05, "loss": 0.9291, "step": 4299 }, { "epoch": 0.5099015771374362, "grad_norm": 1.4011209200325194, "learning_rate": 4.876451681767901e-05, "loss": 0.6767, "step": 4300 }, { "epoch": 0.5100201588995612, "grad_norm": 1.1976750679777761, "learning_rate": 4.876377144960365e-05, "loss": 0.4699, "step": 4301 }, { "epoch": 0.5101387406616862, "grad_norm": 1.490329002873059, "learning_rate": 4.876302586245545e-05, "loss": 0.5952, "step": 4302 }, { "epoch": 0.5102573224238112, "grad_norm": 1.2275232555015385, "learning_rate": 4.876228005624129e-05, "loss": 0.5118, "step": 4303 }, { "epoch": 0.5103759041859363, "grad_norm": 1.3172748855868155, "learning_rate": 4.876153403096806e-05, "loss": 0.6384, "step": 4304 }, { "epoch": 0.5104944859480612, "grad_norm": 1.336336256458007, "learning_rate": 4.876078778664262e-05, "loss": 0.5737, "step": 4305 }, { "epoch": 0.5106130677101862, "grad_norm": 1.729024748724994, "learning_rate": 4.876004132327185e-05, "loss": 0.8029, "step": 4306 }, { "epoch": 0.5107316494723112, "grad_norm": 1.3874971652252845, "learning_rate": 4.8759294640862644e-05, "loss": 0.537, "step": 4307 }, { "epoch": 0.5108502312344362, "grad_norm": 1.7491461912373207, "learning_rate": 4.875854773942187e-05, "loss": 0.8612, "step": 4308 }, { "epoch": 0.5109688129965612, "grad_norm": 1.882271306943973, "learning_rate": 4.875780061895642e-05, "loss": 0.8454, "step": 4309 }, { "epoch": 0.5110873947586861, "grad_norm": 1.3244913070521658, "learning_rate": 4.875705327947319e-05, "loss": 0.5742, "step": 4310 }, { "epoch": 0.5112059765208111, "grad_norm": 1.4003872733668077, "learning_rate": 4.875630572097904e-05, "loss": 0.5193, "step": 4311 }, { "epoch": 0.5113245582829361, "grad_norm": 1.5406857483333913, "learning_rate": 4.87555579434809e-05, "loss": 0.7657, "step": 4312 }, { "epoch": 0.5114431400450611, "grad_norm": 1.4874976336138959, "learning_rate": 4.8754809946985655e-05, "loss": 0.6416, "step": 4313 }, { "epoch": 0.5115617218071861, "grad_norm": 1.669455380926477, "learning_rate": 4.8754061731500175e-05, "loss": 0.68, "step": 4314 }, { "epoch": 0.511680303569311, "grad_norm": 1.661920317808168, "learning_rate": 4.875331329703139e-05, "loss": 0.825, "step": 4315 }, { "epoch": 0.511798885331436, "grad_norm": 1.7875666062831155, "learning_rate": 4.875256464358618e-05, "loss": 0.7185, "step": 4316 }, { "epoch": 0.511917467093561, "grad_norm": 1.3440039791447256, "learning_rate": 4.875181577117144e-05, "loss": 0.53, "step": 4317 }, { "epoch": 0.512036048855686, "grad_norm": 1.6952333574965321, "learning_rate": 4.87510666797941e-05, "loss": 0.8539, "step": 4318 }, { "epoch": 0.512154630617811, "grad_norm": 1.694943913024686, "learning_rate": 4.875031736946104e-05, "loss": 0.6808, "step": 4319 }, { "epoch": 0.512273212379936, "grad_norm": 1.4261773998506648, "learning_rate": 4.8749567840179185e-05, "loss": 0.4835, "step": 4320 }, { "epoch": 0.5123917941420609, "grad_norm": 1.7274971706842293, "learning_rate": 4.8748818091955434e-05, "loss": 0.6794, "step": 4321 }, { "epoch": 0.5125103759041859, "grad_norm": 1.795531929554698, "learning_rate": 4.8748068124796706e-05, "loss": 0.8452, "step": 4322 }, { "epoch": 0.5126289576663109, "grad_norm": 1.407552464228538, "learning_rate": 4.8747317938709905e-05, "loss": 0.6288, "step": 4323 }, { "epoch": 0.5127475394284359, "grad_norm": 1.6506024859839095, "learning_rate": 4.8746567533701957e-05, "loss": 0.6969, "step": 4324 }, { "epoch": 0.5128661211905609, "grad_norm": 1.2598084894610087, "learning_rate": 4.8745816909779786e-05, "loss": 0.5747, "step": 4325 }, { "epoch": 0.5129847029526858, "grad_norm": 1.2269588253904922, "learning_rate": 4.874506606695029e-05, "loss": 0.4926, "step": 4326 }, { "epoch": 0.5131032847148108, "grad_norm": 1.7530810964390664, "learning_rate": 4.874431500522041e-05, "loss": 0.7086, "step": 4327 }, { "epoch": 0.5132218664769358, "grad_norm": 1.3065926719476002, "learning_rate": 4.8743563724597056e-05, "loss": 0.5385, "step": 4328 }, { "epoch": 0.5133404482390609, "grad_norm": 1.2566848999354667, "learning_rate": 4.8742812225087164e-05, "loss": 0.3724, "step": 4329 }, { "epoch": 0.5134590300011859, "grad_norm": 1.8048932031989933, "learning_rate": 4.874206050669766e-05, "loss": 0.5741, "step": 4330 }, { "epoch": 0.5135776117633108, "grad_norm": 1.4327277197720851, "learning_rate": 4.874130856943547e-05, "loss": 0.4936, "step": 4331 }, { "epoch": 0.5136961935254358, "grad_norm": 2.0533666083397892, "learning_rate": 4.8740556413307536e-05, "loss": 0.6106, "step": 4332 }, { "epoch": 0.5138147752875608, "grad_norm": 1.7843774945246158, "learning_rate": 4.873980403832078e-05, "loss": 0.7307, "step": 4333 }, { "epoch": 0.5139333570496858, "grad_norm": 1.4792530379858109, "learning_rate": 4.873905144448214e-05, "loss": 0.6169, "step": 4334 }, { "epoch": 0.5140519388118108, "grad_norm": 1.835368481503766, "learning_rate": 4.873829863179856e-05, "loss": 0.7943, "step": 4335 }, { "epoch": 0.5141705205739358, "grad_norm": 1.7910123407167908, "learning_rate": 4.873754560027697e-05, "loss": 0.4681, "step": 4336 }, { "epoch": 0.5142891023360607, "grad_norm": 1.6340724223139678, "learning_rate": 4.873679234992432e-05, "loss": 0.7023, "step": 4337 }, { "epoch": 0.5144076840981857, "grad_norm": 1.5833296410382676, "learning_rate": 4.873603888074756e-05, "loss": 0.613, "step": 4338 }, { "epoch": 0.5145262658603107, "grad_norm": 1.18941440568035, "learning_rate": 4.873528519275362e-05, "loss": 0.4358, "step": 4339 }, { "epoch": 0.5146448476224357, "grad_norm": 1.239768102946135, "learning_rate": 4.8734531285949464e-05, "loss": 0.5141, "step": 4340 }, { "epoch": 0.5147634293845607, "grad_norm": 1.3547619010597018, "learning_rate": 4.873377716034203e-05, "loss": 0.6689, "step": 4341 }, { "epoch": 0.5148820111466856, "grad_norm": 1.513421177250175, "learning_rate": 4.8733022815938276e-05, "loss": 0.7213, "step": 4342 }, { "epoch": 0.5150005929088106, "grad_norm": 1.452987781700197, "learning_rate": 4.873226825274516e-05, "loss": 0.6429, "step": 4343 }, { "epoch": 0.5151191746709356, "grad_norm": 1.479006782101966, "learning_rate": 4.8731513470769624e-05, "loss": 0.7134, "step": 4344 }, { "epoch": 0.5152377564330606, "grad_norm": 1.4103549864026725, "learning_rate": 4.873075847001864e-05, "loss": 0.451, "step": 4345 }, { "epoch": 0.5153563381951856, "grad_norm": 1.3421869823528818, "learning_rate": 4.873000325049916e-05, "loss": 0.4948, "step": 4346 }, { "epoch": 0.5154749199573105, "grad_norm": 1.212312606478388, "learning_rate": 4.8729247812218154e-05, "loss": 0.4737, "step": 4347 }, { "epoch": 0.5155935017194355, "grad_norm": 1.423683265756171, "learning_rate": 4.872849215518258e-05, "loss": 0.637, "step": 4348 }, { "epoch": 0.5157120834815605, "grad_norm": 2.2363116010565225, "learning_rate": 4.8727736279399404e-05, "loss": 0.7408, "step": 4349 }, { "epoch": 0.5158306652436855, "grad_norm": 1.2298470032179871, "learning_rate": 4.87269801848756e-05, "loss": 0.5297, "step": 4350 }, { "epoch": 0.5159492470058105, "grad_norm": 1.7169089715602739, "learning_rate": 4.872622387161814e-05, "loss": 0.7142, "step": 4351 }, { "epoch": 0.5160678287679354, "grad_norm": 1.8538833576270843, "learning_rate": 4.872546733963398e-05, "loss": 0.5517, "step": 4352 }, { "epoch": 0.5161864105300604, "grad_norm": 2.055110615270066, "learning_rate": 4.8724710588930114e-05, "loss": 0.7678, "step": 4353 }, { "epoch": 0.5163049922921855, "grad_norm": 1.7505100473095558, "learning_rate": 4.87239536195135e-05, "loss": 0.6489, "step": 4354 }, { "epoch": 0.5164235740543105, "grad_norm": 1.719659719884238, "learning_rate": 4.872319643139113e-05, "loss": 0.5574, "step": 4355 }, { "epoch": 0.5165421558164355, "grad_norm": 1.2815322830499518, "learning_rate": 4.8722439024569986e-05, "loss": 0.5784, "step": 4356 }, { "epoch": 0.5166607375785605, "grad_norm": 1.4048186273630272, "learning_rate": 4.872168139905704e-05, "loss": 0.6516, "step": 4357 }, { "epoch": 0.5167793193406854, "grad_norm": 1.4863377727791536, "learning_rate": 4.872092355485928e-05, "loss": 0.7097, "step": 4358 }, { "epoch": 0.5168979011028104, "grad_norm": 1.402777851749483, "learning_rate": 4.872016549198369e-05, "loss": 0.5733, "step": 4359 }, { "epoch": 0.5170164828649354, "grad_norm": 1.4310797835071725, "learning_rate": 4.871940721043727e-05, "loss": 0.7479, "step": 4360 }, { "epoch": 0.5171350646270604, "grad_norm": 1.4056118446343133, "learning_rate": 4.8718648710227e-05, "loss": 0.5605, "step": 4361 }, { "epoch": 0.5172536463891854, "grad_norm": 1.6034416104079923, "learning_rate": 4.8717889991359873e-05, "loss": 0.599, "step": 4362 }, { "epoch": 0.5173722281513103, "grad_norm": 1.421445906078631, "learning_rate": 4.871713105384288e-05, "loss": 0.5901, "step": 4363 }, { "epoch": 0.5174908099134353, "grad_norm": 1.733640423948289, "learning_rate": 4.871637189768303e-05, "loss": 0.6949, "step": 4364 }, { "epoch": 0.5176093916755603, "grad_norm": 1.2853813523999746, "learning_rate": 4.8715612522887314e-05, "loss": 0.5228, "step": 4365 }, { "epoch": 0.5177279734376853, "grad_norm": 1.8764146874812087, "learning_rate": 4.871485292946273e-05, "loss": 0.6983, "step": 4366 }, { "epoch": 0.5178465551998103, "grad_norm": 1.2681443587754253, "learning_rate": 4.871409311741628e-05, "loss": 0.4408, "step": 4367 }, { "epoch": 0.5179651369619352, "grad_norm": 1.6384801968529032, "learning_rate": 4.8713333086754975e-05, "loss": 0.6928, "step": 4368 }, { "epoch": 0.5180837187240602, "grad_norm": 1.3750824198102838, "learning_rate": 4.871257283748582e-05, "loss": 0.5717, "step": 4369 }, { "epoch": 0.5182023004861852, "grad_norm": 1.5850602281076362, "learning_rate": 4.871181236961582e-05, "loss": 0.6826, "step": 4370 }, { "epoch": 0.5183208822483102, "grad_norm": 1.615717861638507, "learning_rate": 4.871105168315199e-05, "loss": 0.5857, "step": 4371 }, { "epoch": 0.5184394640104352, "grad_norm": 1.6014217047581945, "learning_rate": 4.871029077810133e-05, "loss": 0.6551, "step": 4372 }, { "epoch": 0.5185580457725601, "grad_norm": 1.596441921016555, "learning_rate": 4.8709529654470874e-05, "loss": 0.6224, "step": 4373 }, { "epoch": 0.5186766275346851, "grad_norm": 1.6422005677022933, "learning_rate": 4.870876831226763e-05, "loss": 0.6878, "step": 4374 }, { "epoch": 0.5187952092968101, "grad_norm": 1.1944226113604948, "learning_rate": 4.8708006751498607e-05, "loss": 0.4179, "step": 4375 }, { "epoch": 0.5189137910589351, "grad_norm": 1.7688920948212643, "learning_rate": 4.870724497217084e-05, "loss": 0.757, "step": 4376 }, { "epoch": 0.5190323728210601, "grad_norm": 1.567790241085552, "learning_rate": 4.870648297429134e-05, "loss": 0.6371, "step": 4377 }, { "epoch": 0.519150954583185, "grad_norm": 1.3964084687908715, "learning_rate": 4.870572075786715e-05, "loss": 0.5215, "step": 4378 }, { "epoch": 0.5192695363453101, "grad_norm": 1.5268418666301682, "learning_rate": 4.8704958322905284e-05, "loss": 0.653, "step": 4379 }, { "epoch": 0.5193881181074351, "grad_norm": 1.4719155273625835, "learning_rate": 4.8704195669412754e-05, "loss": 0.5743, "step": 4380 }, { "epoch": 0.5195066998695601, "grad_norm": 1.4249703400761256, "learning_rate": 4.8703432797396626e-05, "loss": 0.6077, "step": 4381 }, { "epoch": 0.5196252816316851, "grad_norm": 1.584314998404907, "learning_rate": 4.870266970686391e-05, "loss": 0.7967, "step": 4382 }, { "epoch": 0.5197438633938101, "grad_norm": 1.5201360748304293, "learning_rate": 4.870190639782164e-05, "loss": 0.6638, "step": 4383 }, { "epoch": 0.519862445155935, "grad_norm": 1.5792573939940566, "learning_rate": 4.870114287027686e-05, "loss": 0.6002, "step": 4384 }, { "epoch": 0.51998102691806, "grad_norm": 1.3670244656283548, "learning_rate": 4.870037912423661e-05, "loss": 0.4267, "step": 4385 }, { "epoch": 0.520099608680185, "grad_norm": 1.7266240762366005, "learning_rate": 4.8699615159707925e-05, "loss": 0.5855, "step": 4386 }, { "epoch": 0.52021819044231, "grad_norm": 2.0213895654162513, "learning_rate": 4.8698850976697854e-05, "loss": 0.8816, "step": 4387 }, { "epoch": 0.520336772204435, "grad_norm": 1.608368658650489, "learning_rate": 4.8698086575213436e-05, "loss": 0.6886, "step": 4388 }, { "epoch": 0.52045535396656, "grad_norm": 1.4906512669942593, "learning_rate": 4.869732195526172e-05, "loss": 0.5743, "step": 4389 }, { "epoch": 0.5205739357286849, "grad_norm": 1.231440785445485, "learning_rate": 4.869655711684975e-05, "loss": 0.4757, "step": 4390 }, { "epoch": 0.5206925174908099, "grad_norm": 1.6043355989429942, "learning_rate": 4.869579205998459e-05, "loss": 0.5134, "step": 4391 }, { "epoch": 0.5208110992529349, "grad_norm": 1.476836755291894, "learning_rate": 4.869502678467329e-05, "loss": 0.5482, "step": 4392 }, { "epoch": 0.5209296810150599, "grad_norm": 1.5454196814220742, "learning_rate": 4.869426129092289e-05, "loss": 0.5682, "step": 4393 }, { "epoch": 0.5210482627771849, "grad_norm": 1.5800482959728221, "learning_rate": 4.8693495578740455e-05, "loss": 0.5445, "step": 4394 }, { "epoch": 0.5211668445393098, "grad_norm": 2.0692935688005814, "learning_rate": 4.869272964813305e-05, "loss": 0.7565, "step": 4395 }, { "epoch": 0.5212854263014348, "grad_norm": 1.4852198544233544, "learning_rate": 4.869196349910773e-05, "loss": 0.6052, "step": 4396 }, { "epoch": 0.5214040080635598, "grad_norm": 1.660823847328371, "learning_rate": 4.869119713167156e-05, "loss": 0.5963, "step": 4397 }, { "epoch": 0.5215225898256848, "grad_norm": 1.243286114023559, "learning_rate": 4.869043054583161e-05, "loss": 0.447, "step": 4398 }, { "epoch": 0.5216411715878098, "grad_norm": 1.678068850240401, "learning_rate": 4.868966374159494e-05, "loss": 0.6476, "step": 4399 }, { "epoch": 0.5217597533499347, "grad_norm": 1.445100279676624, "learning_rate": 4.8688896718968616e-05, "loss": 0.6506, "step": 4400 }, { "epoch": 0.5218783351120597, "grad_norm": 2.038456547215106, "learning_rate": 4.868812947795971e-05, "loss": 0.8992, "step": 4401 }, { "epoch": 0.5219969168741847, "grad_norm": 1.5332951920831164, "learning_rate": 4.868736201857531e-05, "loss": 0.4276, "step": 4402 }, { "epoch": 0.5221154986363097, "grad_norm": 1.5139872669907983, "learning_rate": 4.868659434082247e-05, "loss": 0.71, "step": 4403 }, { "epoch": 0.5222340803984348, "grad_norm": 1.6006944262613803, "learning_rate": 4.868582644470828e-05, "loss": 0.6845, "step": 4404 }, { "epoch": 0.5223526621605598, "grad_norm": 1.7769887828234796, "learning_rate": 4.868505833023982e-05, "loss": 0.7685, "step": 4405 }, { "epoch": 0.5224712439226847, "grad_norm": 1.3576119350879052, "learning_rate": 4.868428999742416e-05, "loss": 0.4362, "step": 4406 }, { "epoch": 0.5225898256848097, "grad_norm": 1.308231904349106, "learning_rate": 4.868352144626839e-05, "loss": 0.5516, "step": 4407 }, { "epoch": 0.5227084074469347, "grad_norm": 1.5768439955419067, "learning_rate": 4.86827526767796e-05, "loss": 0.618, "step": 4408 }, { "epoch": 0.5228269892090597, "grad_norm": 1.4994561336583552, "learning_rate": 4.868198368896487e-05, "loss": 0.7538, "step": 4409 }, { "epoch": 0.5229455709711847, "grad_norm": 1.4866490704201185, "learning_rate": 4.8681214482831286e-05, "loss": 0.5659, "step": 4410 }, { "epoch": 0.5230641527333096, "grad_norm": 1.5778425787056913, "learning_rate": 4.8680445058385946e-05, "loss": 0.6206, "step": 4411 }, { "epoch": 0.5231827344954346, "grad_norm": 1.6963734844616165, "learning_rate": 4.867967541563594e-05, "loss": 0.623, "step": 4412 }, { "epoch": 0.5233013162575596, "grad_norm": 1.420854388197455, "learning_rate": 4.867890555458837e-05, "loss": 0.5316, "step": 4413 }, { "epoch": 0.5234198980196846, "grad_norm": 1.4783436884954324, "learning_rate": 4.867813547525033e-05, "loss": 0.5288, "step": 4414 }, { "epoch": 0.5235384797818096, "grad_norm": 1.49724289146969, "learning_rate": 4.867736517762891e-05, "loss": 0.4132, "step": 4415 }, { "epoch": 0.5236570615439345, "grad_norm": 1.682038987345143, "learning_rate": 4.867659466173122e-05, "loss": 0.6077, "step": 4416 }, { "epoch": 0.5237756433060595, "grad_norm": 1.9428196826213506, "learning_rate": 4.867582392756437e-05, "loss": 0.8046, "step": 4417 }, { "epoch": 0.5238942250681845, "grad_norm": 1.5279885282713979, "learning_rate": 4.867505297513545e-05, "loss": 0.5574, "step": 4418 }, { "epoch": 0.5240128068303095, "grad_norm": 1.5314906046574306, "learning_rate": 4.867428180445157e-05, "loss": 0.5091, "step": 4419 }, { "epoch": 0.5241313885924345, "grad_norm": 1.7760739042591318, "learning_rate": 4.867351041551984e-05, "loss": 0.6293, "step": 4420 }, { "epoch": 0.5242499703545594, "grad_norm": 1.4129206158173455, "learning_rate": 4.8672738808347384e-05, "loss": 0.4576, "step": 4421 }, { "epoch": 0.5243685521166844, "grad_norm": 1.4479174937295485, "learning_rate": 4.8671966982941306e-05, "loss": 0.5571, "step": 4422 }, { "epoch": 0.5244871338788094, "grad_norm": 1.8224902975448736, "learning_rate": 4.8671194939308716e-05, "loss": 0.5339, "step": 4423 }, { "epoch": 0.5246057156409344, "grad_norm": 1.7952334135203418, "learning_rate": 4.8670422677456735e-05, "loss": 0.6919, "step": 4424 }, { "epoch": 0.5247242974030594, "grad_norm": 1.33303578988859, "learning_rate": 4.8669650197392486e-05, "loss": 0.4483, "step": 4425 }, { "epoch": 0.5248428791651844, "grad_norm": 1.3571959830512428, "learning_rate": 4.866887749912309e-05, "loss": 0.464, "step": 4426 }, { "epoch": 0.5249614609273093, "grad_norm": 1.552878108234724, "learning_rate": 4.866810458265566e-05, "loss": 0.6021, "step": 4427 }, { "epoch": 0.5250800426894343, "grad_norm": 1.7853948568256417, "learning_rate": 4.866733144799734e-05, "loss": 0.7552, "step": 4428 }, { "epoch": 0.5251986244515594, "grad_norm": 1.4469313468748655, "learning_rate": 4.866655809515524e-05, "loss": 0.4424, "step": 4429 }, { "epoch": 0.5253172062136844, "grad_norm": 1.4555099461536596, "learning_rate": 4.86657845241365e-05, "loss": 0.5232, "step": 4430 }, { "epoch": 0.5254357879758094, "grad_norm": 2.001668353118628, "learning_rate": 4.8665010734948245e-05, "loss": 0.7381, "step": 4431 }, { "epoch": 0.5255543697379343, "grad_norm": 1.59099077803781, "learning_rate": 4.8664236727597615e-05, "loss": 0.5631, "step": 4432 }, { "epoch": 0.5256729515000593, "grad_norm": 2.0847687272798905, "learning_rate": 4.866346250209173e-05, "loss": 0.8796, "step": 4433 }, { "epoch": 0.5257915332621843, "grad_norm": 1.2139686609665075, "learning_rate": 4.866268805843775e-05, "loss": 0.4307, "step": 4434 }, { "epoch": 0.5259101150243093, "grad_norm": 1.75236246527139, "learning_rate": 4.86619133966428e-05, "loss": 0.7721, "step": 4435 }, { "epoch": 0.5260286967864343, "grad_norm": 1.7146225047154127, "learning_rate": 4.866113851671402e-05, "loss": 0.8131, "step": 4436 }, { "epoch": 0.5261472785485592, "grad_norm": 1.438602205053691, "learning_rate": 4.8660363418658575e-05, "loss": 0.6005, "step": 4437 }, { "epoch": 0.5262658603106842, "grad_norm": 1.6661042257577836, "learning_rate": 4.865958810248358e-05, "loss": 0.7491, "step": 4438 }, { "epoch": 0.5263844420728092, "grad_norm": 1.2587764999281459, "learning_rate": 4.8658812568196195e-05, "loss": 0.5335, "step": 4439 }, { "epoch": 0.5265030238349342, "grad_norm": 1.3591468933557243, "learning_rate": 4.8658036815803573e-05, "loss": 0.5218, "step": 4440 }, { "epoch": 0.5266216055970592, "grad_norm": 1.1845006969337681, "learning_rate": 4.865726084531286e-05, "loss": 0.4206, "step": 4441 }, { "epoch": 0.5267401873591842, "grad_norm": 1.3101098756465026, "learning_rate": 4.8656484656731214e-05, "loss": 0.4367, "step": 4442 }, { "epoch": 0.5268587691213091, "grad_norm": 1.286824883534715, "learning_rate": 4.865570825006579e-05, "loss": 0.532, "step": 4443 }, { "epoch": 0.5269773508834341, "grad_norm": 1.9545118725163724, "learning_rate": 4.8654931625323746e-05, "loss": 0.7781, "step": 4444 }, { "epoch": 0.5270959326455591, "grad_norm": 1.3976575411226106, "learning_rate": 4.865415478251224e-05, "loss": 0.6011, "step": 4445 }, { "epoch": 0.5272145144076841, "grad_norm": 1.4652432206964596, "learning_rate": 4.8653377721638435e-05, "loss": 0.6347, "step": 4446 }, { "epoch": 0.5273330961698091, "grad_norm": 1.1703396970276103, "learning_rate": 4.8652600442709484e-05, "loss": 0.3956, "step": 4447 }, { "epoch": 0.527451677931934, "grad_norm": 1.8154968278186074, "learning_rate": 4.8651822945732575e-05, "loss": 0.574, "step": 4448 }, { "epoch": 0.527570259694059, "grad_norm": 1.7647361481699273, "learning_rate": 4.8651045230714857e-05, "loss": 0.5506, "step": 4449 }, { "epoch": 0.527688841456184, "grad_norm": 1.6353746349683205, "learning_rate": 4.86502672976635e-05, "loss": 0.5793, "step": 4450 }, { "epoch": 0.527807423218309, "grad_norm": 1.4846798224644913, "learning_rate": 4.864948914658568e-05, "loss": 0.6354, "step": 4451 }, { "epoch": 0.527926004980434, "grad_norm": 1.5826525768354285, "learning_rate": 4.8648710777488576e-05, "loss": 0.5599, "step": 4452 }, { "epoch": 0.5280445867425589, "grad_norm": 1.4254534292732386, "learning_rate": 4.8647932190379355e-05, "loss": 0.5717, "step": 4453 }, { "epoch": 0.528163168504684, "grad_norm": 1.636891898032809, "learning_rate": 4.86471533852652e-05, "loss": 0.7132, "step": 4454 }, { "epoch": 0.528281750266809, "grad_norm": 1.4810434232465695, "learning_rate": 4.864637436215329e-05, "loss": 0.4975, "step": 4455 }, { "epoch": 0.528400332028934, "grad_norm": 1.8671196121855447, "learning_rate": 4.86455951210508e-05, "loss": 0.7043, "step": 4456 }, { "epoch": 0.528518913791059, "grad_norm": 1.8399867544031365, "learning_rate": 4.864481566196493e-05, "loss": 0.6749, "step": 4457 }, { "epoch": 0.528637495553184, "grad_norm": 1.6316633399417124, "learning_rate": 4.8644035984902846e-05, "loss": 0.7841, "step": 4458 }, { "epoch": 0.5287560773153089, "grad_norm": 1.546258909723542, "learning_rate": 4.864325608987175e-05, "loss": 0.6653, "step": 4459 }, { "epoch": 0.5288746590774339, "grad_norm": 1.601330454369243, "learning_rate": 4.864247597687882e-05, "loss": 0.6869, "step": 4460 }, { "epoch": 0.5289932408395589, "grad_norm": 1.2041070205524016, "learning_rate": 4.8641695645931264e-05, "loss": 0.4076, "step": 4461 }, { "epoch": 0.5291118226016839, "grad_norm": 1.3710377494112709, "learning_rate": 4.8640915097036255e-05, "loss": 0.5934, "step": 4462 }, { "epoch": 0.5292304043638089, "grad_norm": 1.4190468532166456, "learning_rate": 4.8640134330201004e-05, "loss": 0.621, "step": 4463 }, { "epoch": 0.5293489861259338, "grad_norm": 1.0811306885788852, "learning_rate": 4.86393533454327e-05, "loss": 0.3705, "step": 4464 }, { "epoch": 0.5294675678880588, "grad_norm": 1.4354605043642081, "learning_rate": 4.8638572142738545e-05, "loss": 0.6095, "step": 4465 }, { "epoch": 0.5295861496501838, "grad_norm": 1.792087480721045, "learning_rate": 4.863779072212575e-05, "loss": 0.8468, "step": 4466 }, { "epoch": 0.5297047314123088, "grad_norm": 1.6378610706041166, "learning_rate": 4.86370090836015e-05, "loss": 0.6032, "step": 4467 }, { "epoch": 0.5298233131744338, "grad_norm": 1.3982686477806978, "learning_rate": 4.8636227227173024e-05, "loss": 0.5256, "step": 4468 }, { "epoch": 0.5299418949365587, "grad_norm": 1.5318600235278557, "learning_rate": 4.863544515284752e-05, "loss": 0.6063, "step": 4469 }, { "epoch": 0.5300604766986837, "grad_norm": 1.4953853685168577, "learning_rate": 4.863466286063218e-05, "loss": 0.6626, "step": 4470 }, { "epoch": 0.5301790584608087, "grad_norm": 1.3024439760060738, "learning_rate": 4.8633880350534245e-05, "loss": 0.4093, "step": 4471 }, { "epoch": 0.5302976402229337, "grad_norm": 1.5799478365202173, "learning_rate": 4.863309762256091e-05, "loss": 0.5423, "step": 4472 }, { "epoch": 0.5304162219850587, "grad_norm": 1.313413430077652, "learning_rate": 4.863231467671939e-05, "loss": 0.4018, "step": 4473 }, { "epoch": 0.5305348037471836, "grad_norm": 1.7397068473489572, "learning_rate": 4.863153151301692e-05, "loss": 0.9764, "step": 4474 }, { "epoch": 0.5306533855093086, "grad_norm": 1.280254851410496, "learning_rate": 4.86307481314607e-05, "loss": 0.4616, "step": 4475 }, { "epoch": 0.5307719672714336, "grad_norm": 1.787276865710569, "learning_rate": 4.862996453205796e-05, "loss": 0.8909, "step": 4476 }, { "epoch": 0.5308905490335586, "grad_norm": 1.630146750970933, "learning_rate": 4.8629180714815926e-05, "loss": 0.5798, "step": 4477 }, { "epoch": 0.5310091307956836, "grad_norm": 1.8237607381914631, "learning_rate": 4.8628396679741825e-05, "loss": 0.738, "step": 4478 }, { "epoch": 0.5311277125578087, "grad_norm": 1.353789499643375, "learning_rate": 4.8627612426842875e-05, "loss": 0.551, "step": 4479 }, { "epoch": 0.5312462943199336, "grad_norm": 1.860142452235117, "learning_rate": 4.862682795612632e-05, "loss": 0.7932, "step": 4480 }, { "epoch": 0.5313648760820586, "grad_norm": 1.5224150679079351, "learning_rate": 4.862604326759938e-05, "loss": 0.5583, "step": 4481 }, { "epoch": 0.5314834578441836, "grad_norm": 1.1972188330289004, "learning_rate": 4.86252583612693e-05, "loss": 0.4846, "step": 4482 }, { "epoch": 0.5316020396063086, "grad_norm": 1.340221200696147, "learning_rate": 4.86244732371433e-05, "loss": 0.4552, "step": 4483 }, { "epoch": 0.5317206213684336, "grad_norm": 1.6678770263135654, "learning_rate": 4.862368789522863e-05, "loss": 0.8071, "step": 4484 }, { "epoch": 0.5318392031305585, "grad_norm": 1.4869153711237448, "learning_rate": 4.862290233553253e-05, "loss": 0.7134, "step": 4485 }, { "epoch": 0.5319577848926835, "grad_norm": 1.6989992856069864, "learning_rate": 4.862211655806223e-05, "loss": 0.8094, "step": 4486 }, { "epoch": 0.5320763666548085, "grad_norm": 1.5296606323292, "learning_rate": 4.862133056282499e-05, "loss": 0.6472, "step": 4487 }, { "epoch": 0.5321949484169335, "grad_norm": 1.517409423047397, "learning_rate": 4.862054434982804e-05, "loss": 0.6976, "step": 4488 }, { "epoch": 0.5323135301790585, "grad_norm": 1.8003615121850975, "learning_rate": 4.861975791907865e-05, "loss": 0.7889, "step": 4489 }, { "epoch": 0.5324321119411835, "grad_norm": 1.7327397917690093, "learning_rate": 4.861897127058405e-05, "loss": 0.6698, "step": 4490 }, { "epoch": 0.5325506937033084, "grad_norm": 1.136977040545438, "learning_rate": 4.86181844043515e-05, "loss": 0.5029, "step": 4491 }, { "epoch": 0.5326692754654334, "grad_norm": 1.3722089214981026, "learning_rate": 4.8617397320388245e-05, "loss": 0.5926, "step": 4492 }, { "epoch": 0.5327878572275584, "grad_norm": 1.5475499665914598, "learning_rate": 4.861661001870156e-05, "loss": 0.7534, "step": 4493 }, { "epoch": 0.5329064389896834, "grad_norm": 1.4771305097509935, "learning_rate": 4.8615822499298685e-05, "loss": 0.5686, "step": 4494 }, { "epoch": 0.5330250207518084, "grad_norm": 1.3673161311932296, "learning_rate": 4.8615034762186884e-05, "loss": 0.564, "step": 4495 }, { "epoch": 0.5331436025139333, "grad_norm": 1.4271981794223385, "learning_rate": 4.861424680737343e-05, "loss": 0.6364, "step": 4496 }, { "epoch": 0.5332621842760583, "grad_norm": 1.366274636109926, "learning_rate": 4.861345863486557e-05, "loss": 0.6445, "step": 4497 }, { "epoch": 0.5333807660381833, "grad_norm": 1.3985172631840035, "learning_rate": 4.861267024467058e-05, "loss": 0.5857, "step": 4498 }, { "epoch": 0.5334993478003083, "grad_norm": 1.2002096316971793, "learning_rate": 4.861188163679572e-05, "loss": 0.4708, "step": 4499 }, { "epoch": 0.5336179295624333, "grad_norm": 1.3621755441598689, "learning_rate": 4.8611092811248276e-05, "loss": 0.4241, "step": 4500 }, { "epoch": 0.5337365113245582, "grad_norm": 1.4595258304678125, "learning_rate": 4.86103037680355e-05, "loss": 0.4027, "step": 4501 }, { "epoch": 0.5338550930866832, "grad_norm": 1.671460675986169, "learning_rate": 4.860951450716469e-05, "loss": 0.6597, "step": 4502 }, { "epoch": 0.5339736748488082, "grad_norm": 1.8442624663088294, "learning_rate": 4.86087250286431e-05, "loss": 0.5986, "step": 4503 }, { "epoch": 0.5340922566109333, "grad_norm": 1.323794113837236, "learning_rate": 4.860793533247802e-05, "loss": 0.4808, "step": 4504 }, { "epoch": 0.5342108383730583, "grad_norm": 1.5107610694247755, "learning_rate": 4.860714541867672e-05, "loss": 0.5817, "step": 4505 }, { "epoch": 0.5343294201351833, "grad_norm": 1.7863992132424475, "learning_rate": 4.8606355287246484e-05, "loss": 0.7367, "step": 4506 }, { "epoch": 0.5344480018973082, "grad_norm": 1.5563343380681909, "learning_rate": 4.860556493819461e-05, "loss": 0.649, "step": 4507 }, { "epoch": 0.5345665836594332, "grad_norm": 1.742828077740644, "learning_rate": 4.860477437152837e-05, "loss": 0.8467, "step": 4508 }, { "epoch": 0.5346851654215582, "grad_norm": 1.7124348705341195, "learning_rate": 4.860398358725506e-05, "loss": 0.8007, "step": 4509 }, { "epoch": 0.5348037471836832, "grad_norm": 1.657080396614561, "learning_rate": 4.860319258538197e-05, "loss": 0.5213, "step": 4510 }, { "epoch": 0.5349223289458082, "grad_norm": 1.4225477720166364, "learning_rate": 4.860240136591639e-05, "loss": 0.6349, "step": 4511 }, { "epoch": 0.5350409107079331, "grad_norm": 1.4595090070937666, "learning_rate": 4.86016099288656e-05, "loss": 0.5302, "step": 4512 }, { "epoch": 0.5351594924700581, "grad_norm": 1.3798087519975897, "learning_rate": 4.860081827423691e-05, "loss": 0.5068, "step": 4513 }, { "epoch": 0.5352780742321831, "grad_norm": 1.4031454881732235, "learning_rate": 4.860002640203762e-05, "loss": 0.7924, "step": 4514 }, { "epoch": 0.5353966559943081, "grad_norm": 1.648309404417123, "learning_rate": 4.8599234312275034e-05, "loss": 0.6164, "step": 4515 }, { "epoch": 0.5355152377564331, "grad_norm": 1.7082430628021479, "learning_rate": 4.859844200495644e-05, "loss": 0.6831, "step": 4516 }, { "epoch": 0.535633819518558, "grad_norm": 1.6864370943632745, "learning_rate": 4.8597649480089145e-05, "loss": 0.679, "step": 4517 }, { "epoch": 0.535752401280683, "grad_norm": 1.5030761845828482, "learning_rate": 4.859685673768046e-05, "loss": 0.6223, "step": 4518 }, { "epoch": 0.535870983042808, "grad_norm": 1.4004484345929218, "learning_rate": 4.8596063777737696e-05, "loss": 0.5802, "step": 4519 }, { "epoch": 0.535989564804933, "grad_norm": 1.5409929354888585, "learning_rate": 4.8595270600268163e-05, "loss": 0.6202, "step": 4520 }, { "epoch": 0.536108146567058, "grad_norm": 1.4774107959411877, "learning_rate": 4.859447720527917e-05, "loss": 0.5905, "step": 4521 }, { "epoch": 0.536226728329183, "grad_norm": 1.7787169900058584, "learning_rate": 4.8593683592778026e-05, "loss": 0.5795, "step": 4522 }, { "epoch": 0.5363453100913079, "grad_norm": 1.7857562651066252, "learning_rate": 4.859288976277205e-05, "loss": 0.6754, "step": 4523 }, { "epoch": 0.5364638918534329, "grad_norm": 1.844227814672596, "learning_rate": 4.859209571526857e-05, "loss": 0.6247, "step": 4524 }, { "epoch": 0.5365824736155579, "grad_norm": 1.901479811499885, "learning_rate": 4.859130145027488e-05, "loss": 0.8008, "step": 4525 }, { "epoch": 0.5367010553776829, "grad_norm": 1.3934750170791916, "learning_rate": 4.859050696779834e-05, "loss": 0.5063, "step": 4526 }, { "epoch": 0.5368196371398078, "grad_norm": 1.5384934157456482, "learning_rate": 4.8589712267846244e-05, "loss": 0.7642, "step": 4527 }, { "epoch": 0.5369382189019328, "grad_norm": 1.4707245854077236, "learning_rate": 4.858891735042593e-05, "loss": 0.7965, "step": 4528 }, { "epoch": 0.5370568006640579, "grad_norm": 1.4481291311832314, "learning_rate": 4.858812221554473e-05, "loss": 0.6285, "step": 4529 }, { "epoch": 0.5371753824261829, "grad_norm": 1.4822541988510196, "learning_rate": 4.858732686320997e-05, "loss": 0.5763, "step": 4530 }, { "epoch": 0.5372939641883079, "grad_norm": 1.6973982384199575, "learning_rate": 4.858653129342897e-05, "loss": 0.7594, "step": 4531 }, { "epoch": 0.5374125459504329, "grad_norm": 1.5699154064362828, "learning_rate": 4.858573550620908e-05, "loss": 0.5418, "step": 4532 }, { "epoch": 0.5375311277125578, "grad_norm": 1.5213510043525413, "learning_rate": 4.8584939501557634e-05, "loss": 0.5525, "step": 4533 }, { "epoch": 0.5376497094746828, "grad_norm": 1.5765305943780945, "learning_rate": 4.858414327948196e-05, "loss": 0.8182, "step": 4534 }, { "epoch": 0.5377682912368078, "grad_norm": 1.510408828778692, "learning_rate": 4.858334683998942e-05, "loss": 0.5502, "step": 4535 }, { "epoch": 0.5378868729989328, "grad_norm": 1.524344193847123, "learning_rate": 4.858255018308733e-05, "loss": 0.5779, "step": 4536 }, { "epoch": 0.5380054547610578, "grad_norm": 1.5596379090061872, "learning_rate": 4.858175330878305e-05, "loss": 0.5943, "step": 4537 }, { "epoch": 0.5381240365231827, "grad_norm": 1.6721294318190811, "learning_rate": 4.858095621708393e-05, "loss": 0.6473, "step": 4538 }, { "epoch": 0.5382426182853077, "grad_norm": 1.700351629336764, "learning_rate": 4.8580158907997295e-05, "loss": 0.9848, "step": 4539 }, { "epoch": 0.5383612000474327, "grad_norm": 1.4145495283065948, "learning_rate": 4.857936138153052e-05, "loss": 0.5433, "step": 4540 }, { "epoch": 0.5384797818095577, "grad_norm": 2.001175553349072, "learning_rate": 4.857856363769095e-05, "loss": 0.8281, "step": 4541 }, { "epoch": 0.5385983635716827, "grad_norm": 1.3672911746088445, "learning_rate": 4.857776567648593e-05, "loss": 0.4978, "step": 4542 }, { "epoch": 0.5387169453338077, "grad_norm": 1.465039645425579, "learning_rate": 4.857696749792283e-05, "loss": 0.7244, "step": 4543 }, { "epoch": 0.5388355270959326, "grad_norm": 1.279671997198366, "learning_rate": 4.8576169102009e-05, "loss": 0.6594, "step": 4544 }, { "epoch": 0.5389541088580576, "grad_norm": 1.2647805066759357, "learning_rate": 4.85753704887518e-05, "loss": 0.6247, "step": 4545 }, { "epoch": 0.5390726906201826, "grad_norm": 1.6110394791778222, "learning_rate": 4.8574571658158594e-05, "loss": 0.9907, "step": 4546 }, { "epoch": 0.5391912723823076, "grad_norm": 1.4993085343428467, "learning_rate": 4.8573772610236744e-05, "loss": 0.6429, "step": 4547 }, { "epoch": 0.5393098541444326, "grad_norm": 1.5389457475036012, "learning_rate": 4.857297334499362e-05, "loss": 0.7495, "step": 4548 }, { "epoch": 0.5394284359065575, "grad_norm": 1.4203145801526837, "learning_rate": 4.8572173862436596e-05, "loss": 0.626, "step": 4549 }, { "epoch": 0.5395470176686825, "grad_norm": 1.4256003600834117, "learning_rate": 4.857137416257303e-05, "loss": 0.6833, "step": 4550 }, { "epoch": 0.5396655994308075, "grad_norm": 1.5497445940406736, "learning_rate": 4.85705742454103e-05, "loss": 0.6396, "step": 4551 }, { "epoch": 0.5397841811929325, "grad_norm": 1.4338265249968913, "learning_rate": 4.856977411095578e-05, "loss": 0.5476, "step": 4552 }, { "epoch": 0.5399027629550575, "grad_norm": 1.304666068720877, "learning_rate": 4.856897375921684e-05, "loss": 0.4012, "step": 4553 }, { "epoch": 0.5400213447171825, "grad_norm": 1.345022828888145, "learning_rate": 4.856817319020087e-05, "loss": 0.615, "step": 4554 }, { "epoch": 0.5401399264793075, "grad_norm": 1.9320226815715766, "learning_rate": 4.8567372403915246e-05, "loss": 0.8215, "step": 4555 }, { "epoch": 0.5402585082414325, "grad_norm": 1.6554837950547712, "learning_rate": 4.8566571400367345e-05, "loss": 0.7498, "step": 4556 }, { "epoch": 0.5403770900035575, "grad_norm": 1.513809841285043, "learning_rate": 4.8565770179564554e-05, "loss": 0.5893, "step": 4557 }, { "epoch": 0.5404956717656825, "grad_norm": 1.4524262532412162, "learning_rate": 4.856496874151426e-05, "loss": 0.4446, "step": 4558 }, { "epoch": 0.5406142535278075, "grad_norm": 2.0805819836113932, "learning_rate": 4.8564167086223856e-05, "loss": 0.7636, "step": 4559 }, { "epoch": 0.5407328352899324, "grad_norm": 1.7068089375369258, "learning_rate": 4.856336521370073e-05, "loss": 0.6354, "step": 4560 }, { "epoch": 0.5408514170520574, "grad_norm": 1.6527353729288665, "learning_rate": 4.856256312395227e-05, "loss": 0.4953, "step": 4561 }, { "epoch": 0.5409699988141824, "grad_norm": 1.5325358208548603, "learning_rate": 4.856176081698586e-05, "loss": 0.5759, "step": 4562 }, { "epoch": 0.5410885805763074, "grad_norm": 1.3712259098753012, "learning_rate": 4.8560958292808925e-05, "loss": 0.6332, "step": 4563 }, { "epoch": 0.5412071623384324, "grad_norm": 1.4830517171258473, "learning_rate": 4.8560155551428837e-05, "loss": 0.5893, "step": 4564 }, { "epoch": 0.5413257441005573, "grad_norm": 1.5539116933746375, "learning_rate": 4.855935259285301e-05, "loss": 0.5408, "step": 4565 }, { "epoch": 0.5414443258626823, "grad_norm": 1.8225975591005366, "learning_rate": 4.855854941708884e-05, "loss": 0.6158, "step": 4566 }, { "epoch": 0.5415629076248073, "grad_norm": 1.4953083124956517, "learning_rate": 4.8557746024143736e-05, "loss": 0.6045, "step": 4567 }, { "epoch": 0.5416814893869323, "grad_norm": 1.5670407121280645, "learning_rate": 4.85569424140251e-05, "loss": 0.5554, "step": 4568 }, { "epoch": 0.5418000711490573, "grad_norm": 1.9093041853009627, "learning_rate": 4.8556138586740344e-05, "loss": 0.829, "step": 4569 }, { "epoch": 0.5419186529111822, "grad_norm": 1.199880427591941, "learning_rate": 4.8555334542296885e-05, "loss": 0.3598, "step": 4570 }, { "epoch": 0.5420372346733072, "grad_norm": 1.5597833551139135, "learning_rate": 4.855453028070212e-05, "loss": 0.5221, "step": 4571 }, { "epoch": 0.5421558164354322, "grad_norm": 1.6736452601432108, "learning_rate": 4.855372580196346e-05, "loss": 0.4581, "step": 4572 }, { "epoch": 0.5422743981975572, "grad_norm": 1.5696202275843527, "learning_rate": 4.855292110608835e-05, "loss": 0.6765, "step": 4573 }, { "epoch": 0.5423929799596822, "grad_norm": 1.266090208846806, "learning_rate": 4.855211619308417e-05, "loss": 0.548, "step": 4574 }, { "epoch": 0.5425115617218071, "grad_norm": 3.748677711905531, "learning_rate": 4.8551311062958374e-05, "loss": 0.503, "step": 4575 }, { "epoch": 0.5426301434839321, "grad_norm": 1.6031353901749559, "learning_rate": 4.855050571571837e-05, "loss": 0.6942, "step": 4576 }, { "epoch": 0.5427487252460571, "grad_norm": 1.9424007887873835, "learning_rate": 4.854970015137158e-05, "loss": 0.7993, "step": 4577 }, { "epoch": 0.5428673070081821, "grad_norm": 2.057405537222361, "learning_rate": 4.8548894369925426e-05, "loss": 0.7838, "step": 4578 }, { "epoch": 0.5429858887703072, "grad_norm": 2.1087385339597446, "learning_rate": 4.854808837138736e-05, "loss": 0.7553, "step": 4579 }, { "epoch": 0.5431044705324322, "grad_norm": 1.5995196439629515, "learning_rate": 4.8547282155764784e-05, "loss": 0.5758, "step": 4580 }, { "epoch": 0.5432230522945571, "grad_norm": 1.5627636921531796, "learning_rate": 4.854647572306514e-05, "loss": 0.6802, "step": 4581 }, { "epoch": 0.5433416340566821, "grad_norm": 1.435792822202445, "learning_rate": 4.854566907329587e-05, "loss": 0.5722, "step": 4582 }, { "epoch": 0.5434602158188071, "grad_norm": 1.6145852056353225, "learning_rate": 4.85448622064644e-05, "loss": 0.5525, "step": 4583 }, { "epoch": 0.5435787975809321, "grad_norm": 1.2926664456706392, "learning_rate": 4.854405512257818e-05, "loss": 0.5524, "step": 4584 }, { "epoch": 0.5436973793430571, "grad_norm": 1.379386956991928, "learning_rate": 4.854324782164464e-05, "loss": 0.543, "step": 4585 }, { "epoch": 0.543815961105182, "grad_norm": 1.4340811902686066, "learning_rate": 4.8542440303671226e-05, "loss": 0.8497, "step": 4586 }, { "epoch": 0.543934542867307, "grad_norm": 1.42394526021585, "learning_rate": 4.8541632568665385e-05, "loss": 0.5296, "step": 4587 }, { "epoch": 0.544053124629432, "grad_norm": 1.6041244510431334, "learning_rate": 4.854082461663455e-05, "loss": 0.6074, "step": 4588 }, { "epoch": 0.544171706391557, "grad_norm": 2.043163493707532, "learning_rate": 4.854001644758619e-05, "loss": 0.703, "step": 4589 }, { "epoch": 0.544290288153682, "grad_norm": 1.3550393164625205, "learning_rate": 4.853920806152774e-05, "loss": 0.524, "step": 4590 }, { "epoch": 0.544408869915807, "grad_norm": 2.0747483982910673, "learning_rate": 4.8538399458466666e-05, "loss": 0.9109, "step": 4591 }, { "epoch": 0.5445274516779319, "grad_norm": 1.5238513630205874, "learning_rate": 4.85375906384104e-05, "loss": 0.6593, "step": 4592 }, { "epoch": 0.5446460334400569, "grad_norm": 1.5221841855082954, "learning_rate": 4.8536781601366424e-05, "loss": 0.631, "step": 4593 }, { "epoch": 0.5447646152021819, "grad_norm": 1.4639806396358668, "learning_rate": 4.853597234734218e-05, "loss": 0.571, "step": 4594 }, { "epoch": 0.5448831969643069, "grad_norm": 1.5013713815499385, "learning_rate": 4.853516287634513e-05, "loss": 0.5912, "step": 4595 }, { "epoch": 0.5450017787264319, "grad_norm": 1.8578979102017428, "learning_rate": 4.853435318838274e-05, "loss": 0.8344, "step": 4596 }, { "epoch": 0.5451203604885568, "grad_norm": 1.3112386535825042, "learning_rate": 4.853354328346248e-05, "loss": 0.6306, "step": 4597 }, { "epoch": 0.5452389422506818, "grad_norm": 1.266581678151801, "learning_rate": 4.85327331615918e-05, "loss": 0.5989, "step": 4598 }, { "epoch": 0.5453575240128068, "grad_norm": 1.43080876654775, "learning_rate": 4.853192282277818e-05, "loss": 0.4892, "step": 4599 }, { "epoch": 0.5454761057749318, "grad_norm": 1.3247113555229515, "learning_rate": 4.853111226702909e-05, "loss": 0.613, "step": 4600 }, { "epoch": 0.5455946875370568, "grad_norm": 1.5555763399140856, "learning_rate": 4.8530301494352004e-05, "loss": 0.5953, "step": 4601 }, { "epoch": 0.5457132692991817, "grad_norm": 1.3705638385522079, "learning_rate": 4.852949050475439e-05, "loss": 0.6699, "step": 4602 }, { "epoch": 0.5458318510613067, "grad_norm": 1.3759747949781405, "learning_rate": 4.852867929824373e-05, "loss": 0.6054, "step": 4603 }, { "epoch": 0.5459504328234318, "grad_norm": 1.481210708410744, "learning_rate": 4.852786787482749e-05, "loss": 0.8254, "step": 4604 }, { "epoch": 0.5460690145855568, "grad_norm": 1.1940831467489172, "learning_rate": 4.852705623451317e-05, "loss": 0.3735, "step": 4605 }, { "epoch": 0.5461875963476818, "grad_norm": 1.4326360498307749, "learning_rate": 4.852624437730824e-05, "loss": 0.7431, "step": 4606 }, { "epoch": 0.5463061781098068, "grad_norm": 1.8471006512679942, "learning_rate": 4.8525432303220186e-05, "loss": 0.7113, "step": 4607 }, { "epoch": 0.5464247598719317, "grad_norm": 1.626825725420159, "learning_rate": 4.852462001225649e-05, "loss": 0.7306, "step": 4608 }, { "epoch": 0.5465433416340567, "grad_norm": 1.959271360131848, "learning_rate": 4.852380750442466e-05, "loss": 0.768, "step": 4609 }, { "epoch": 0.5466619233961817, "grad_norm": 1.35295276556761, "learning_rate": 4.8522994779732154e-05, "loss": 0.6207, "step": 4610 }, { "epoch": 0.5467805051583067, "grad_norm": 1.3987928426264178, "learning_rate": 4.852218183818649e-05, "loss": 0.5558, "step": 4611 }, { "epoch": 0.5468990869204317, "grad_norm": 1.5404127949910325, "learning_rate": 4.8521368679795154e-05, "loss": 0.6791, "step": 4612 }, { "epoch": 0.5470176686825566, "grad_norm": 1.636783925818582, "learning_rate": 4.8520555304565646e-05, "loss": 0.5459, "step": 4613 }, { "epoch": 0.5471362504446816, "grad_norm": 1.3270388970451197, "learning_rate": 4.8519741712505455e-05, "loss": 0.4578, "step": 4614 }, { "epoch": 0.5472548322068066, "grad_norm": 1.8371891466297028, "learning_rate": 4.851892790362209e-05, "loss": 0.6889, "step": 4615 }, { "epoch": 0.5473734139689316, "grad_norm": 1.8381220910372034, "learning_rate": 4.8518113877923054e-05, "loss": 0.7627, "step": 4616 }, { "epoch": 0.5474919957310566, "grad_norm": 1.3218563459848849, "learning_rate": 4.851729963541584e-05, "loss": 0.5165, "step": 4617 }, { "epoch": 0.5476105774931815, "grad_norm": 1.629902695086319, "learning_rate": 4.851648517610797e-05, "loss": 0.8131, "step": 4618 }, { "epoch": 0.5477291592553065, "grad_norm": 1.3314720565563267, "learning_rate": 4.8515670500006935e-05, "loss": 0.4345, "step": 4619 }, { "epoch": 0.5478477410174315, "grad_norm": 1.3665419691198926, "learning_rate": 4.8514855607120266e-05, "loss": 0.6458, "step": 4620 }, { "epoch": 0.5479663227795565, "grad_norm": 1.7710392519737554, "learning_rate": 4.851404049745546e-05, "loss": 0.8572, "step": 4621 }, { "epoch": 0.5480849045416815, "grad_norm": 1.4450706464752117, "learning_rate": 4.851322517102003e-05, "loss": 0.6613, "step": 4622 }, { "epoch": 0.5482034863038064, "grad_norm": 1.2646704610345536, "learning_rate": 4.85124096278215e-05, "loss": 0.4361, "step": 4623 }, { "epoch": 0.5483220680659314, "grad_norm": 1.6154187263908246, "learning_rate": 4.851159386786739e-05, "loss": 0.7703, "step": 4624 }, { "epoch": 0.5484406498280564, "grad_norm": 1.5436568167590163, "learning_rate": 4.8510777891165214e-05, "loss": 0.7988, "step": 4625 }, { "epoch": 0.5485592315901814, "grad_norm": 1.6124992786089767, "learning_rate": 4.8509961697722494e-05, "loss": 0.6282, "step": 4626 }, { "epoch": 0.5486778133523064, "grad_norm": 1.5571841946311238, "learning_rate": 4.850914528754676e-05, "loss": 0.6455, "step": 4627 }, { "epoch": 0.5487963951144313, "grad_norm": 1.4406391110076393, "learning_rate": 4.8508328660645544e-05, "loss": 0.598, "step": 4628 }, { "epoch": 0.5489149768765564, "grad_norm": 1.534214814261759, "learning_rate": 4.850751181702635e-05, "loss": 0.7429, "step": 4629 }, { "epoch": 0.5490335586386814, "grad_norm": 1.485976411771782, "learning_rate": 4.850669475669674e-05, "loss": 0.7817, "step": 4630 }, { "epoch": 0.5491521404008064, "grad_norm": 1.4144126757985458, "learning_rate": 4.850587747966421e-05, "loss": 0.6631, "step": 4631 }, { "epoch": 0.5492707221629314, "grad_norm": 1.654672630981689, "learning_rate": 4.850505998593633e-05, "loss": 0.7824, "step": 4632 }, { "epoch": 0.5493893039250564, "grad_norm": 1.5067149626030398, "learning_rate": 4.850424227552062e-05, "loss": 0.5677, "step": 4633 }, { "epoch": 0.5495078856871813, "grad_norm": 1.3087298132083809, "learning_rate": 4.850342434842461e-05, "loss": 0.4576, "step": 4634 }, { "epoch": 0.5496264674493063, "grad_norm": 1.421116745263161, "learning_rate": 4.8502606204655854e-05, "loss": 0.5428, "step": 4635 }, { "epoch": 0.5497450492114313, "grad_norm": 1.4667154064970254, "learning_rate": 4.850178784422189e-05, "loss": 0.613, "step": 4636 }, { "epoch": 0.5498636309735563, "grad_norm": 1.5191311181763694, "learning_rate": 4.850096926713026e-05, "loss": 0.5944, "step": 4637 }, { "epoch": 0.5499822127356813, "grad_norm": 1.4913363938637343, "learning_rate": 4.850015047338852e-05, "loss": 0.5489, "step": 4638 }, { "epoch": 0.5501007944978062, "grad_norm": 1.5453091345302459, "learning_rate": 4.84993314630042e-05, "loss": 0.6337, "step": 4639 }, { "epoch": 0.5502193762599312, "grad_norm": 1.4735449918341383, "learning_rate": 4.849851223598486e-05, "loss": 0.5379, "step": 4640 }, { "epoch": 0.5503379580220562, "grad_norm": 1.7777655359549152, "learning_rate": 4.8497692792338064e-05, "loss": 0.5701, "step": 4641 }, { "epoch": 0.5504565397841812, "grad_norm": 1.4978570225269476, "learning_rate": 4.849687313207135e-05, "loss": 0.4802, "step": 4642 }, { "epoch": 0.5505751215463062, "grad_norm": 1.4481843596496, "learning_rate": 4.849605325519227e-05, "loss": 0.6916, "step": 4643 }, { "epoch": 0.5506937033084311, "grad_norm": 1.7247600070035276, "learning_rate": 4.849523316170841e-05, "loss": 0.6928, "step": 4644 }, { "epoch": 0.5508122850705561, "grad_norm": 1.8117250593739083, "learning_rate": 4.84944128516273e-05, "loss": 0.5815, "step": 4645 }, { "epoch": 0.5509308668326811, "grad_norm": 1.437510525277727, "learning_rate": 4.849359232495652e-05, "loss": 0.6572, "step": 4646 }, { "epoch": 0.5510494485948061, "grad_norm": 1.7884800094653588, "learning_rate": 4.849277158170362e-05, "loss": 0.7655, "step": 4647 }, { "epoch": 0.5511680303569311, "grad_norm": 1.7814238512343543, "learning_rate": 4.849195062187618e-05, "loss": 0.486, "step": 4648 }, { "epoch": 0.551286612119056, "grad_norm": 1.3723979022327824, "learning_rate": 4.8491129445481766e-05, "loss": 0.5038, "step": 4649 }, { "epoch": 0.551405193881181, "grad_norm": 1.3725248342736727, "learning_rate": 4.8490308052527936e-05, "loss": 0.4731, "step": 4650 }, { "epoch": 0.551523775643306, "grad_norm": 1.2718068279408583, "learning_rate": 4.848948644302228e-05, "loss": 0.5683, "step": 4651 }, { "epoch": 0.551642357405431, "grad_norm": 1.378205646778303, "learning_rate": 4.8488664616972365e-05, "loss": 0.4383, "step": 4652 }, { "epoch": 0.551760939167556, "grad_norm": 1.5557979870047707, "learning_rate": 4.848784257438576e-05, "loss": 0.7173, "step": 4653 }, { "epoch": 0.5518795209296811, "grad_norm": 1.4708995358421324, "learning_rate": 4.848702031527005e-05, "loss": 0.5448, "step": 4654 }, { "epoch": 0.551998102691806, "grad_norm": 1.5862606452688908, "learning_rate": 4.848619783963282e-05, "loss": 0.5746, "step": 4655 }, { "epoch": 0.552116684453931, "grad_norm": 1.3570112103228538, "learning_rate": 4.8485375147481636e-05, "loss": 0.5887, "step": 4656 }, { "epoch": 0.552235266216056, "grad_norm": 1.3192960878927074, "learning_rate": 4.84845522388241e-05, "loss": 0.4946, "step": 4657 }, { "epoch": 0.552353847978181, "grad_norm": 1.1100948018697705, "learning_rate": 4.84837291136678e-05, "loss": 0.4363, "step": 4658 }, { "epoch": 0.552472429740306, "grad_norm": 1.1618763729669603, "learning_rate": 4.8482905772020296e-05, "loss": 0.4306, "step": 4659 }, { "epoch": 0.552591011502431, "grad_norm": 1.4838317130767715, "learning_rate": 4.8482082213889214e-05, "loss": 0.608, "step": 4660 }, { "epoch": 0.5527095932645559, "grad_norm": 1.465500839212871, "learning_rate": 4.848125843928212e-05, "loss": 0.7675, "step": 4661 }, { "epoch": 0.5528281750266809, "grad_norm": 1.3787459229974652, "learning_rate": 4.848043444820662e-05, "loss": 0.6611, "step": 4662 }, { "epoch": 0.5529467567888059, "grad_norm": 1.592739776022908, "learning_rate": 4.84796102406703e-05, "loss": 0.6465, "step": 4663 }, { "epoch": 0.5530653385509309, "grad_norm": 1.8181612569413934, "learning_rate": 4.847878581668078e-05, "loss": 0.6718, "step": 4664 }, { "epoch": 0.5531839203130559, "grad_norm": 1.572554949515031, "learning_rate": 4.847796117624565e-05, "loss": 0.5006, "step": 4665 }, { "epoch": 0.5533025020751808, "grad_norm": 1.4664958797035863, "learning_rate": 4.8477136319372494e-05, "loss": 0.5049, "step": 4666 }, { "epoch": 0.5534210838373058, "grad_norm": 1.417737222512323, "learning_rate": 4.847631124606893e-05, "loss": 0.5159, "step": 4667 }, { "epoch": 0.5535396655994308, "grad_norm": 1.5379533131922494, "learning_rate": 4.8475485956342575e-05, "loss": 0.6252, "step": 4668 }, { "epoch": 0.5536582473615558, "grad_norm": 1.3735876928837583, "learning_rate": 4.847466045020102e-05, "loss": 0.6743, "step": 4669 }, { "epoch": 0.5537768291236808, "grad_norm": 1.5440398032493083, "learning_rate": 4.847383472765189e-05, "loss": 0.6194, "step": 4670 }, { "epoch": 0.5538954108858057, "grad_norm": 1.5087751063996322, "learning_rate": 4.8473008788702786e-05, "loss": 0.4916, "step": 4671 }, { "epoch": 0.5540139926479307, "grad_norm": 1.3835571049237725, "learning_rate": 4.847218263336132e-05, "loss": 0.5027, "step": 4672 }, { "epoch": 0.5541325744100557, "grad_norm": 1.3514424438852948, "learning_rate": 4.847135626163511e-05, "loss": 0.6236, "step": 4673 }, { "epoch": 0.5542511561721807, "grad_norm": 1.8779877575413155, "learning_rate": 4.8470529673531796e-05, "loss": 0.6995, "step": 4674 }, { "epoch": 0.5543697379343057, "grad_norm": 1.3948193924559493, "learning_rate": 4.846970286905896e-05, "loss": 0.5496, "step": 4675 }, { "epoch": 0.5544883196964306, "grad_norm": 1.4197887700049203, "learning_rate": 4.846887584822426e-05, "loss": 0.4846, "step": 4676 }, { "epoch": 0.5546069014585556, "grad_norm": 1.7061204244017096, "learning_rate": 4.84680486110353e-05, "loss": 0.6688, "step": 4677 }, { "epoch": 0.5547254832206806, "grad_norm": 1.7532678636480827, "learning_rate": 4.846722115749971e-05, "loss": 0.8254, "step": 4678 }, { "epoch": 0.5548440649828057, "grad_norm": 1.230650002435006, "learning_rate": 4.846639348762511e-05, "loss": 0.5609, "step": 4679 }, { "epoch": 0.5549626467449307, "grad_norm": 1.4142387976366944, "learning_rate": 4.846556560141915e-05, "loss": 0.5596, "step": 4680 }, { "epoch": 0.5550812285070557, "grad_norm": 1.3097042300343653, "learning_rate": 4.846473749888944e-05, "loss": 0.4881, "step": 4681 }, { "epoch": 0.5551998102691806, "grad_norm": 1.25289964416459, "learning_rate": 4.846390918004363e-05, "loss": 0.4854, "step": 4682 }, { "epoch": 0.5553183920313056, "grad_norm": 1.3622091259624007, "learning_rate": 4.8463080644889346e-05, "loss": 0.5836, "step": 4683 }, { "epoch": 0.5554369737934306, "grad_norm": 2.033151753569081, "learning_rate": 4.846225189343424e-05, "loss": 0.601, "step": 4684 }, { "epoch": 0.5555555555555556, "grad_norm": 1.6818109487391475, "learning_rate": 4.8461422925685937e-05, "loss": 0.5596, "step": 4685 }, { "epoch": 0.5556741373176806, "grad_norm": 1.3900598457850954, "learning_rate": 4.846059374165208e-05, "loss": 0.4947, "step": 4686 }, { "epoch": 0.5557927190798055, "grad_norm": 1.652638580117675, "learning_rate": 4.8459764341340334e-05, "loss": 0.5035, "step": 4687 }, { "epoch": 0.5559113008419305, "grad_norm": 1.568445144867368, "learning_rate": 4.845893472475832e-05, "loss": 0.5453, "step": 4688 }, { "epoch": 0.5560298826040555, "grad_norm": 1.6056317835735008, "learning_rate": 4.845810489191369e-05, "loss": 0.7067, "step": 4689 }, { "epoch": 0.5561484643661805, "grad_norm": 2.252388874015356, "learning_rate": 4.84572748428141e-05, "loss": 0.8045, "step": 4690 }, { "epoch": 0.5562670461283055, "grad_norm": 1.2569445382281208, "learning_rate": 4.8456444577467205e-05, "loss": 0.4281, "step": 4691 }, { "epoch": 0.5563856278904304, "grad_norm": 1.436047509478209, "learning_rate": 4.8455614095880656e-05, "loss": 0.5263, "step": 4692 }, { "epoch": 0.5565042096525554, "grad_norm": 1.685276282305297, "learning_rate": 4.8454783398062106e-05, "loss": 0.6148, "step": 4693 }, { "epoch": 0.5566227914146804, "grad_norm": 2.0011247234410243, "learning_rate": 4.845395248401921e-05, "loss": 0.8917, "step": 4694 }, { "epoch": 0.5567413731768054, "grad_norm": 1.4461293790502088, "learning_rate": 4.845312135375965e-05, "loss": 0.5344, "step": 4695 }, { "epoch": 0.5568599549389304, "grad_norm": 1.711145297475681, "learning_rate": 4.845229000729106e-05, "loss": 0.697, "step": 4696 }, { "epoch": 0.5569785367010553, "grad_norm": 1.7393647887081458, "learning_rate": 4.845145844462111e-05, "loss": 0.7389, "step": 4697 }, { "epoch": 0.5570971184631803, "grad_norm": 1.333362153747004, "learning_rate": 4.845062666575748e-05, "loss": 0.4318, "step": 4698 }, { "epoch": 0.5572157002253053, "grad_norm": 1.6545421383184948, "learning_rate": 4.844979467070783e-05, "loss": 0.6979, "step": 4699 }, { "epoch": 0.5573342819874303, "grad_norm": 1.1268896779327935, "learning_rate": 4.844896245947982e-05, "loss": 0.4277, "step": 4700 }, { "epoch": 0.5574528637495553, "grad_norm": 1.3272964697442295, "learning_rate": 4.844813003208113e-05, "loss": 0.4994, "step": 4701 }, { "epoch": 0.5575714455116803, "grad_norm": 1.5825628757853425, "learning_rate": 4.844729738851945e-05, "loss": 0.9193, "step": 4702 }, { "epoch": 0.5576900272738052, "grad_norm": 1.3459667922437653, "learning_rate": 4.8446464528802424e-05, "loss": 0.4887, "step": 4703 }, { "epoch": 0.5578086090359303, "grad_norm": 1.2766478998110884, "learning_rate": 4.844563145293776e-05, "loss": 0.5471, "step": 4704 }, { "epoch": 0.5579271907980553, "grad_norm": 1.406882937258525, "learning_rate": 4.844479816093312e-05, "loss": 0.6037, "step": 4705 }, { "epoch": 0.5580457725601803, "grad_norm": 1.6933303970551348, "learning_rate": 4.844396465279619e-05, "loss": 0.6191, "step": 4706 }, { "epoch": 0.5581643543223053, "grad_norm": 1.5386064235388461, "learning_rate": 4.844313092853466e-05, "loss": 0.6303, "step": 4707 }, { "epoch": 0.5582829360844302, "grad_norm": 1.2455152767985185, "learning_rate": 4.84422969881562e-05, "loss": 0.4464, "step": 4708 }, { "epoch": 0.5584015178465552, "grad_norm": 1.4263966404465287, "learning_rate": 4.844146283166852e-05, "loss": 0.5418, "step": 4709 }, { "epoch": 0.5585200996086802, "grad_norm": 1.59294352986505, "learning_rate": 4.8440628459079296e-05, "loss": 0.6001, "step": 4710 }, { "epoch": 0.5586386813708052, "grad_norm": 1.6031613022146738, "learning_rate": 4.8439793870396214e-05, "loss": 0.5417, "step": 4711 }, { "epoch": 0.5587572631329302, "grad_norm": 1.6887126982323588, "learning_rate": 4.843895906562699e-05, "loss": 0.518, "step": 4712 }, { "epoch": 0.5588758448950552, "grad_norm": 1.5029864895759364, "learning_rate": 4.8438124044779297e-05, "loss": 0.6802, "step": 4713 }, { "epoch": 0.5589944266571801, "grad_norm": 1.5880976756775973, "learning_rate": 4.8437288807860836e-05, "loss": 0.7062, "step": 4714 }, { "epoch": 0.5591130084193051, "grad_norm": 1.9453882148092791, "learning_rate": 4.843645335487932e-05, "loss": 0.8142, "step": 4715 }, { "epoch": 0.5592315901814301, "grad_norm": 1.5855143360761945, "learning_rate": 4.843561768584245e-05, "loss": 0.737, "step": 4716 }, { "epoch": 0.5593501719435551, "grad_norm": 1.8650019382239083, "learning_rate": 4.843478180075792e-05, "loss": 0.7356, "step": 4717 }, { "epoch": 0.55946875370568, "grad_norm": 1.3330071835986503, "learning_rate": 4.843394569963343e-05, "loss": 0.5695, "step": 4718 }, { "epoch": 0.559587335467805, "grad_norm": 1.268471120248681, "learning_rate": 4.843310938247671e-05, "loss": 0.5086, "step": 4719 }, { "epoch": 0.55970591722993, "grad_norm": 1.743544632564371, "learning_rate": 4.843227284929545e-05, "loss": 0.7171, "step": 4720 }, { "epoch": 0.559824498992055, "grad_norm": 1.3831640952292945, "learning_rate": 4.843143610009737e-05, "loss": 0.5205, "step": 4721 }, { "epoch": 0.55994308075418, "grad_norm": 1.968358051109738, "learning_rate": 4.843059913489019e-05, "loss": 0.7948, "step": 4722 }, { "epoch": 0.560061662516305, "grad_norm": 1.3426533177863889, "learning_rate": 4.842976195368161e-05, "loss": 0.5066, "step": 4723 }, { "epoch": 0.5601802442784299, "grad_norm": 1.5651902997881924, "learning_rate": 4.8428924556479354e-05, "loss": 0.6285, "step": 4724 }, { "epoch": 0.5602988260405549, "grad_norm": 1.4239696680602174, "learning_rate": 4.8428086943291153e-05, "loss": 0.6066, "step": 4725 }, { "epoch": 0.5604174078026799, "grad_norm": 1.0554433953802749, "learning_rate": 4.842724911412471e-05, "loss": 0.4432, "step": 4726 }, { "epoch": 0.5605359895648049, "grad_norm": 1.6259876325984461, "learning_rate": 4.842641106898776e-05, "loss": 0.5761, "step": 4727 }, { "epoch": 0.56065457132693, "grad_norm": 1.5507685335836658, "learning_rate": 4.8425572807888034e-05, "loss": 0.607, "step": 4728 }, { "epoch": 0.560773153089055, "grad_norm": 1.1633739207168885, "learning_rate": 4.842473433083325e-05, "loss": 0.4129, "step": 4729 }, { "epoch": 0.5608917348511799, "grad_norm": 2.503725586544328, "learning_rate": 4.842389563783114e-05, "loss": 0.8213, "step": 4730 }, { "epoch": 0.5610103166133049, "grad_norm": 1.1477729893822515, "learning_rate": 4.8423056728889436e-05, "loss": 0.394, "step": 4731 }, { "epoch": 0.5611288983754299, "grad_norm": 1.3363624447267668, "learning_rate": 4.842221760401587e-05, "loss": 0.4036, "step": 4732 }, { "epoch": 0.5612474801375549, "grad_norm": 1.8180172140413364, "learning_rate": 4.842137826321819e-05, "loss": 0.7912, "step": 4733 }, { "epoch": 0.5613660618996799, "grad_norm": 1.6318346633882967, "learning_rate": 4.8420538706504115e-05, "loss": 0.6826, "step": 4734 }, { "epoch": 0.5614846436618048, "grad_norm": 1.5000196535253845, "learning_rate": 4.84196989338814e-05, "loss": 0.4979, "step": 4735 }, { "epoch": 0.5616032254239298, "grad_norm": 1.9539195300356078, "learning_rate": 4.841885894535777e-05, "loss": 0.5689, "step": 4736 }, { "epoch": 0.5617218071860548, "grad_norm": 1.7550890054268293, "learning_rate": 4.8418018740940985e-05, "loss": 0.6404, "step": 4737 }, { "epoch": 0.5618403889481798, "grad_norm": 0.9798494563074818, "learning_rate": 4.841717832063878e-05, "loss": 0.3949, "step": 4738 }, { "epoch": 0.5619589707103048, "grad_norm": 1.8223535306150611, "learning_rate": 4.841633768445891e-05, "loss": 0.9005, "step": 4739 }, { "epoch": 0.5620775524724297, "grad_norm": 1.6247666035661312, "learning_rate": 4.841549683240913e-05, "loss": 0.5935, "step": 4740 }, { "epoch": 0.5621961342345547, "grad_norm": 1.3391693512383898, "learning_rate": 4.841465576449717e-05, "loss": 0.4234, "step": 4741 }, { "epoch": 0.5623147159966797, "grad_norm": 1.3294420812500403, "learning_rate": 4.8413814480730804e-05, "loss": 0.4427, "step": 4742 }, { "epoch": 0.5624332977588047, "grad_norm": 1.387040981635194, "learning_rate": 4.841297298111778e-05, "loss": 0.5338, "step": 4743 }, { "epoch": 0.5625518795209297, "grad_norm": 1.7385073060511986, "learning_rate": 4.841213126566585e-05, "loss": 0.6415, "step": 4744 }, { "epoch": 0.5626704612830546, "grad_norm": 1.3972870876051866, "learning_rate": 4.841128933438278e-05, "loss": 0.5391, "step": 4745 }, { "epoch": 0.5627890430451796, "grad_norm": 1.4282435188573608, "learning_rate": 4.841044718727633e-05, "loss": 0.4641, "step": 4746 }, { "epoch": 0.5629076248073046, "grad_norm": 1.6772967185691308, "learning_rate": 4.840960482435427e-05, "loss": 0.8723, "step": 4747 }, { "epoch": 0.5630262065694296, "grad_norm": 1.3003424369656504, "learning_rate": 4.840876224562436e-05, "loss": 0.5178, "step": 4748 }, { "epoch": 0.5631447883315546, "grad_norm": 1.3592832465302598, "learning_rate": 4.840791945109437e-05, "loss": 0.4712, "step": 4749 }, { "epoch": 0.5632633700936795, "grad_norm": 1.7409042990156294, "learning_rate": 4.840707644077207e-05, "loss": 0.5227, "step": 4750 }, { "epoch": 0.5633819518558045, "grad_norm": 1.3834563986530066, "learning_rate": 4.840623321466522e-05, "loss": 0.4417, "step": 4751 }, { "epoch": 0.5635005336179295, "grad_norm": 1.5743968641708326, "learning_rate": 4.8405389772781604e-05, "loss": 0.658, "step": 4752 }, { "epoch": 0.5636191153800546, "grad_norm": 1.4946443049600469, "learning_rate": 4.8404546115129e-05, "loss": 0.6127, "step": 4753 }, { "epoch": 0.5637376971421796, "grad_norm": 1.5343673568153964, "learning_rate": 4.8403702241715174e-05, "loss": 0.6689, "step": 4754 }, { "epoch": 0.5638562789043046, "grad_norm": 2.018980568926531, "learning_rate": 4.8402858152547925e-05, "loss": 0.8016, "step": 4755 }, { "epoch": 0.5639748606664295, "grad_norm": 1.7339979206308598, "learning_rate": 4.840201384763501e-05, "loss": 0.8267, "step": 4756 }, { "epoch": 0.5640934424285545, "grad_norm": 1.4941538325678674, "learning_rate": 4.840116932698423e-05, "loss": 0.6551, "step": 4757 }, { "epoch": 0.5642120241906795, "grad_norm": 1.2041908726621509, "learning_rate": 4.840032459060336e-05, "loss": 0.4446, "step": 4758 }, { "epoch": 0.5643306059528045, "grad_norm": 1.770585855121199, "learning_rate": 4.8399479638500204e-05, "loss": 0.7724, "step": 4759 }, { "epoch": 0.5644491877149295, "grad_norm": 1.0882064892022583, "learning_rate": 4.8398634470682535e-05, "loss": 0.3883, "step": 4760 }, { "epoch": 0.5645677694770544, "grad_norm": 1.635651968994243, "learning_rate": 4.8397789087158155e-05, "loss": 0.6636, "step": 4761 }, { "epoch": 0.5646863512391794, "grad_norm": 1.4499353026304567, "learning_rate": 4.839694348793484e-05, "loss": 0.6702, "step": 4762 }, { "epoch": 0.5648049330013044, "grad_norm": 1.4592886735342656, "learning_rate": 4.8396097673020404e-05, "loss": 0.6178, "step": 4763 }, { "epoch": 0.5649235147634294, "grad_norm": 1.3886414655596917, "learning_rate": 4.839525164242263e-05, "loss": 0.4692, "step": 4764 }, { "epoch": 0.5650420965255544, "grad_norm": 1.5319432267792714, "learning_rate": 4.839440539614933e-05, "loss": 0.7857, "step": 4765 }, { "epoch": 0.5651606782876794, "grad_norm": 1.4527857393827315, "learning_rate": 4.8393558934208306e-05, "loss": 0.6159, "step": 4766 }, { "epoch": 0.5652792600498043, "grad_norm": 1.3981707300645687, "learning_rate": 4.839271225660735e-05, "loss": 0.5045, "step": 4767 }, { "epoch": 0.5653978418119293, "grad_norm": 1.6417564114427956, "learning_rate": 4.8391865363354276e-05, "loss": 0.8158, "step": 4768 }, { "epoch": 0.5655164235740543, "grad_norm": 1.447321410698706, "learning_rate": 4.839101825445689e-05, "loss": 0.4194, "step": 4769 }, { "epoch": 0.5656350053361793, "grad_norm": 1.6137992842584659, "learning_rate": 4.8390170929922996e-05, "loss": 0.6676, "step": 4770 }, { "epoch": 0.5657535870983043, "grad_norm": 1.4412999180797266, "learning_rate": 4.838932338976041e-05, "loss": 0.6983, "step": 4771 }, { "epoch": 0.5658721688604292, "grad_norm": 1.3089931996286113, "learning_rate": 4.838847563397694e-05, "loss": 0.4258, "step": 4772 }, { "epoch": 0.5659907506225542, "grad_norm": 1.354614232310771, "learning_rate": 4.838762766258041e-05, "loss": 0.4004, "step": 4773 }, { "epoch": 0.5661093323846792, "grad_norm": 1.6229047366852642, "learning_rate": 4.8386779475578634e-05, "loss": 0.6694, "step": 4774 }, { "epoch": 0.5662279141468042, "grad_norm": 1.237000712125034, "learning_rate": 4.838593107297943e-05, "loss": 0.4017, "step": 4775 }, { "epoch": 0.5663464959089292, "grad_norm": 1.731393118404643, "learning_rate": 4.8385082454790607e-05, "loss": 0.6467, "step": 4776 }, { "epoch": 0.5664650776710541, "grad_norm": 2.0433069948636176, "learning_rate": 4.8384233621020014e-05, "loss": 0.7634, "step": 4777 }, { "epoch": 0.5665836594331792, "grad_norm": 1.4422596845204285, "learning_rate": 4.838338457167546e-05, "loss": 0.4358, "step": 4778 }, { "epoch": 0.5667022411953042, "grad_norm": 1.7644370197805088, "learning_rate": 4.8382535306764775e-05, "loss": 0.6541, "step": 4779 }, { "epoch": 0.5668208229574292, "grad_norm": 2.6601285494283116, "learning_rate": 4.838168582629579e-05, "loss": 0.7526, "step": 4780 }, { "epoch": 0.5669394047195542, "grad_norm": 1.3288981878863133, "learning_rate": 4.838083613027633e-05, "loss": 0.5614, "step": 4781 }, { "epoch": 0.5670579864816792, "grad_norm": 1.5775407125323604, "learning_rate": 4.837998621871424e-05, "loss": 0.5134, "step": 4782 }, { "epoch": 0.5671765682438041, "grad_norm": 1.4348938273809067, "learning_rate": 4.837913609161733e-05, "loss": 0.4587, "step": 4783 }, { "epoch": 0.5672951500059291, "grad_norm": 1.3490908365432095, "learning_rate": 4.837828574899347e-05, "loss": 0.5812, "step": 4784 }, { "epoch": 0.5674137317680541, "grad_norm": 1.356587603427273, "learning_rate": 4.837743519085048e-05, "loss": 0.5454, "step": 4785 }, { "epoch": 0.5675323135301791, "grad_norm": 1.2704706964790042, "learning_rate": 4.83765844171962e-05, "loss": 0.503, "step": 4786 }, { "epoch": 0.5676508952923041, "grad_norm": 1.339491676843473, "learning_rate": 4.8375733428038486e-05, "loss": 0.4458, "step": 4787 }, { "epoch": 0.567769477054429, "grad_norm": 1.4618347600618564, "learning_rate": 4.837488222338517e-05, "loss": 0.5593, "step": 4788 }, { "epoch": 0.567888058816554, "grad_norm": 1.403965826210278, "learning_rate": 4.8374030803244105e-05, "loss": 0.4938, "step": 4789 }, { "epoch": 0.568006640578679, "grad_norm": 1.2494360523289174, "learning_rate": 4.837317916762314e-05, "loss": 0.501, "step": 4790 }, { "epoch": 0.568125222340804, "grad_norm": 1.237857500938287, "learning_rate": 4.8372327316530125e-05, "loss": 0.386, "step": 4791 }, { "epoch": 0.568243804102929, "grad_norm": 1.7492511135738555, "learning_rate": 4.837147524997291e-05, "loss": 0.6377, "step": 4792 }, { "epoch": 0.5683623858650539, "grad_norm": 1.751645263389289, "learning_rate": 4.8370622967959356e-05, "loss": 0.6275, "step": 4793 }, { "epoch": 0.5684809676271789, "grad_norm": 1.624975742164375, "learning_rate": 4.836977047049732e-05, "loss": 0.6145, "step": 4794 }, { "epoch": 0.5685995493893039, "grad_norm": 1.4316480031103396, "learning_rate": 4.8368917757594654e-05, "loss": 0.4117, "step": 4795 }, { "epoch": 0.5687181311514289, "grad_norm": 1.5419529746291927, "learning_rate": 4.836806482925923e-05, "loss": 0.5081, "step": 4796 }, { "epoch": 0.5688367129135539, "grad_norm": 1.4269507270524802, "learning_rate": 4.836721168549889e-05, "loss": 0.4771, "step": 4797 }, { "epoch": 0.5689552946756788, "grad_norm": 1.6029893055680104, "learning_rate": 4.836635832632153e-05, "loss": 0.4848, "step": 4798 }, { "epoch": 0.5690738764378038, "grad_norm": 1.5445006162504786, "learning_rate": 4.836550475173499e-05, "loss": 0.5116, "step": 4799 }, { "epoch": 0.5691924581999288, "grad_norm": 1.502185585772934, "learning_rate": 4.8364650961747146e-05, "loss": 0.5192, "step": 4800 }, { "epoch": 0.5693110399620538, "grad_norm": 1.7086379684746391, "learning_rate": 4.836379695636588e-05, "loss": 0.5508, "step": 4801 }, { "epoch": 0.5694296217241788, "grad_norm": 1.4074781314720364, "learning_rate": 4.836294273559905e-05, "loss": 0.5024, "step": 4802 }, { "epoch": 0.5695482034863039, "grad_norm": 1.855059781593241, "learning_rate": 4.836208829945454e-05, "loss": 0.7017, "step": 4803 }, { "epoch": 0.5696667852484288, "grad_norm": 1.2440513454133826, "learning_rate": 4.836123364794023e-05, "loss": 0.3513, "step": 4804 }, { "epoch": 0.5697853670105538, "grad_norm": 1.4409402767150834, "learning_rate": 4.836037878106399e-05, "loss": 0.5128, "step": 4805 }, { "epoch": 0.5699039487726788, "grad_norm": 1.3500115409600408, "learning_rate": 4.83595236988337e-05, "loss": 0.5079, "step": 4806 }, { "epoch": 0.5700225305348038, "grad_norm": 2.09348074297985, "learning_rate": 4.835866840125725e-05, "loss": 1.0849, "step": 4807 }, { "epoch": 0.5701411122969288, "grad_norm": 1.4319208906596437, "learning_rate": 4.835781288834251e-05, "loss": 0.4667, "step": 4808 }, { "epoch": 0.5702596940590537, "grad_norm": 1.1776032007354145, "learning_rate": 4.835695716009739e-05, "loss": 0.4756, "step": 4809 }, { "epoch": 0.5703782758211787, "grad_norm": 1.5882373105671523, "learning_rate": 4.835610121652977e-05, "loss": 0.6918, "step": 4810 }, { "epoch": 0.5704968575833037, "grad_norm": 1.483576580982176, "learning_rate": 4.8355245057647535e-05, "loss": 0.5241, "step": 4811 }, { "epoch": 0.5706154393454287, "grad_norm": 1.3828462724350303, "learning_rate": 4.835438868345858e-05, "loss": 0.5974, "step": 4812 }, { "epoch": 0.5707340211075537, "grad_norm": 1.742183237128271, "learning_rate": 4.83535320939708e-05, "loss": 0.6456, "step": 4813 }, { "epoch": 0.5708526028696786, "grad_norm": 1.3167680761724174, "learning_rate": 4.835267528919209e-05, "loss": 0.6312, "step": 4814 }, { "epoch": 0.5709711846318036, "grad_norm": 1.4243607218591852, "learning_rate": 4.8351818269130356e-05, "loss": 0.6578, "step": 4815 }, { "epoch": 0.5710897663939286, "grad_norm": 1.5903253628833416, "learning_rate": 4.835096103379348e-05, "loss": 0.5378, "step": 4816 }, { "epoch": 0.5712083481560536, "grad_norm": 1.5103955404738643, "learning_rate": 4.83501035831894e-05, "loss": 0.5794, "step": 4817 }, { "epoch": 0.5713269299181786, "grad_norm": 1.4801726843092147, "learning_rate": 4.834924591732598e-05, "loss": 0.4829, "step": 4818 }, { "epoch": 0.5714455116803036, "grad_norm": 1.3714042197381975, "learning_rate": 4.834838803621115e-05, "loss": 0.6635, "step": 4819 }, { "epoch": 0.5715640934424285, "grad_norm": 1.3490181247544406, "learning_rate": 4.834752993985282e-05, "loss": 0.6085, "step": 4820 }, { "epoch": 0.5716826752045535, "grad_norm": 1.4270539644102322, "learning_rate": 4.8346671628258896e-05, "loss": 0.7319, "step": 4821 }, { "epoch": 0.5718012569666785, "grad_norm": 1.4508676104723428, "learning_rate": 4.834581310143728e-05, "loss": 0.5925, "step": 4822 }, { "epoch": 0.5719198387288035, "grad_norm": 1.4625955935839727, "learning_rate": 4.83449543593959e-05, "loss": 0.5281, "step": 4823 }, { "epoch": 0.5720384204909285, "grad_norm": 1.3399936912690542, "learning_rate": 4.834409540214267e-05, "loss": 0.6165, "step": 4824 }, { "epoch": 0.5721570022530534, "grad_norm": 1.5446371523837137, "learning_rate": 4.8343236229685506e-05, "loss": 0.6953, "step": 4825 }, { "epoch": 0.5722755840151784, "grad_norm": 1.4771672213875835, "learning_rate": 4.834237684203233e-05, "loss": 0.6779, "step": 4826 }, { "epoch": 0.5723941657773034, "grad_norm": 1.7909951036044593, "learning_rate": 4.834151723919106e-05, "loss": 0.837, "step": 4827 }, { "epoch": 0.5725127475394285, "grad_norm": 1.3642387076290443, "learning_rate": 4.834065742116962e-05, "loss": 0.529, "step": 4828 }, { "epoch": 0.5726313293015535, "grad_norm": 1.5104856199540138, "learning_rate": 4.8339797387975946e-05, "loss": 0.5356, "step": 4829 }, { "epoch": 0.5727499110636785, "grad_norm": 1.3724178144977826, "learning_rate": 4.8338937139617966e-05, "loss": 0.4764, "step": 4830 }, { "epoch": 0.5728684928258034, "grad_norm": 1.429346299228928, "learning_rate": 4.833807667610361e-05, "loss": 0.6614, "step": 4831 }, { "epoch": 0.5729870745879284, "grad_norm": 1.1783626037822665, "learning_rate": 4.8337215997440794e-05, "loss": 0.4015, "step": 4832 }, { "epoch": 0.5731056563500534, "grad_norm": 1.532441301078974, "learning_rate": 4.8336355103637467e-05, "loss": 0.6641, "step": 4833 }, { "epoch": 0.5732242381121784, "grad_norm": 1.7186878417362013, "learning_rate": 4.833549399470157e-05, "loss": 0.7059, "step": 4834 }, { "epoch": 0.5733428198743034, "grad_norm": 1.5725811830755916, "learning_rate": 4.833463267064102e-05, "loss": 0.6757, "step": 4835 }, { "epoch": 0.5734614016364283, "grad_norm": 1.6018501065712376, "learning_rate": 4.833377113146379e-05, "loss": 0.5661, "step": 4836 }, { "epoch": 0.5735799833985533, "grad_norm": 1.4558711381852625, "learning_rate": 4.833290937717779e-05, "loss": 0.5581, "step": 4837 }, { "epoch": 0.5736985651606783, "grad_norm": 1.7372117374703748, "learning_rate": 4.833204740779098e-05, "loss": 0.6422, "step": 4838 }, { "epoch": 0.5738171469228033, "grad_norm": 1.5390687407327341, "learning_rate": 4.8331185223311314e-05, "loss": 0.5437, "step": 4839 }, { "epoch": 0.5739357286849283, "grad_norm": 1.5383781286764457, "learning_rate": 4.833032282374672e-05, "loss": 0.5266, "step": 4840 }, { "epoch": 0.5740543104470532, "grad_norm": 1.4064972067128003, "learning_rate": 4.832946020910516e-05, "loss": 0.4996, "step": 4841 }, { "epoch": 0.5741728922091782, "grad_norm": 1.6782330732049084, "learning_rate": 4.832859737939459e-05, "loss": 0.676, "step": 4842 }, { "epoch": 0.5742914739713032, "grad_norm": 1.5288524495336786, "learning_rate": 4.832773433462296e-05, "loss": 0.5151, "step": 4843 }, { "epoch": 0.5744100557334282, "grad_norm": 1.4646171075380927, "learning_rate": 4.832687107479823e-05, "loss": 0.4943, "step": 4844 }, { "epoch": 0.5745286374955532, "grad_norm": 1.5785370476225866, "learning_rate": 4.832600759992835e-05, "loss": 0.4718, "step": 4845 }, { "epoch": 0.5746472192576781, "grad_norm": 2.081196254034374, "learning_rate": 4.832514391002128e-05, "loss": 0.5636, "step": 4846 }, { "epoch": 0.5747658010198031, "grad_norm": 1.4689289371789283, "learning_rate": 4.832428000508498e-05, "loss": 0.5522, "step": 4847 }, { "epoch": 0.5748843827819281, "grad_norm": 1.4572796675293904, "learning_rate": 4.8323415885127434e-05, "loss": 0.6254, "step": 4848 }, { "epoch": 0.5750029645440531, "grad_norm": 1.2066909395011887, "learning_rate": 4.832255155015659e-05, "loss": 0.4543, "step": 4849 }, { "epoch": 0.5751215463061781, "grad_norm": 1.294971829524541, "learning_rate": 4.832168700018042e-05, "loss": 0.6465, "step": 4850 }, { "epoch": 0.575240128068303, "grad_norm": 1.3934296154693546, "learning_rate": 4.8320822235206894e-05, "loss": 0.6258, "step": 4851 }, { "epoch": 0.575358709830428, "grad_norm": 1.4605865133660587, "learning_rate": 4.831995725524399e-05, "loss": 0.4917, "step": 4852 }, { "epoch": 0.5754772915925531, "grad_norm": 1.3047208604074407, "learning_rate": 4.831909206029967e-05, "loss": 0.3776, "step": 4853 }, { "epoch": 0.5755958733546781, "grad_norm": 1.4203365445979046, "learning_rate": 4.8318226650381914e-05, "loss": 0.6028, "step": 4854 }, { "epoch": 0.5757144551168031, "grad_norm": 1.3692200425246621, "learning_rate": 4.8317361025498706e-05, "loss": 0.5687, "step": 4855 }, { "epoch": 0.5758330368789281, "grad_norm": 1.5018778378169475, "learning_rate": 4.831649518565803e-05, "loss": 0.4905, "step": 4856 }, { "epoch": 0.575951618641053, "grad_norm": 1.791021420538173, "learning_rate": 4.831562913086786e-05, "loss": 0.7953, "step": 4857 }, { "epoch": 0.576070200403178, "grad_norm": 1.8302943990569807, "learning_rate": 4.831476286113618e-05, "loss": 0.6317, "step": 4858 }, { "epoch": 0.576188782165303, "grad_norm": 1.3034777713381278, "learning_rate": 4.831389637647097e-05, "loss": 0.4473, "step": 4859 }, { "epoch": 0.576307363927428, "grad_norm": 1.2148598598879456, "learning_rate": 4.8313029676880226e-05, "loss": 0.4011, "step": 4860 }, { "epoch": 0.576425945689553, "grad_norm": 2.001698884347611, "learning_rate": 4.831216276237194e-05, "loss": 0.8231, "step": 4861 }, { "epoch": 0.576544527451678, "grad_norm": 1.7160727286238915, "learning_rate": 4.8311295632954103e-05, "loss": 0.6059, "step": 4862 }, { "epoch": 0.5766631092138029, "grad_norm": 1.465416724556384, "learning_rate": 4.8310428288634704e-05, "loss": 0.6369, "step": 4863 }, { "epoch": 0.5767816909759279, "grad_norm": 1.5785509865479557, "learning_rate": 4.8309560729421745e-05, "loss": 0.7225, "step": 4864 }, { "epoch": 0.5769002727380529, "grad_norm": 1.7649696416874066, "learning_rate": 4.8308692955323215e-05, "loss": 0.6663, "step": 4865 }, { "epoch": 0.5770188545001779, "grad_norm": 1.4939002469595135, "learning_rate": 4.830782496634712e-05, "loss": 0.5848, "step": 4866 }, { "epoch": 0.5771374362623028, "grad_norm": 1.4417408068000643, "learning_rate": 4.830695676250146e-05, "loss": 0.6056, "step": 4867 }, { "epoch": 0.5772560180244278, "grad_norm": 1.3491585682881249, "learning_rate": 4.830608834379424e-05, "loss": 0.5059, "step": 4868 }, { "epoch": 0.5773745997865528, "grad_norm": 1.6677528219985802, "learning_rate": 4.8305219710233465e-05, "loss": 0.7372, "step": 4869 }, { "epoch": 0.5774931815486778, "grad_norm": 1.4175237891988401, "learning_rate": 4.8304350861827145e-05, "loss": 0.6367, "step": 4870 }, { "epoch": 0.5776117633108028, "grad_norm": 1.1290033384211742, "learning_rate": 4.830348179858329e-05, "loss": 0.5079, "step": 4871 }, { "epoch": 0.5777303450729278, "grad_norm": 1.3794354136245188, "learning_rate": 4.8302612520509905e-05, "loss": 0.5143, "step": 4872 }, { "epoch": 0.5778489268350527, "grad_norm": 1.348214567955498, "learning_rate": 4.830174302761501e-05, "loss": 0.5502, "step": 4873 }, { "epoch": 0.5779675085971777, "grad_norm": 1.3002658754122565, "learning_rate": 4.8300873319906614e-05, "loss": 0.5729, "step": 4874 }, { "epoch": 0.5780860903593027, "grad_norm": 1.43187834388376, "learning_rate": 4.830000339739275e-05, "loss": 0.5805, "step": 4875 }, { "epoch": 0.5782046721214277, "grad_norm": 1.681641316296796, "learning_rate": 4.829913326008142e-05, "loss": 0.9402, "step": 4876 }, { "epoch": 0.5783232538835527, "grad_norm": 1.5014871931669835, "learning_rate": 4.829826290798065e-05, "loss": 0.4971, "step": 4877 }, { "epoch": 0.5784418356456777, "grad_norm": 1.7353059919276639, "learning_rate": 4.829739234109847e-05, "loss": 0.7456, "step": 4878 }, { "epoch": 0.5785604174078027, "grad_norm": 1.8220243907551628, "learning_rate": 4.82965215594429e-05, "loss": 0.7658, "step": 4879 }, { "epoch": 0.5786789991699277, "grad_norm": 2.5123982332767842, "learning_rate": 4.829565056302197e-05, "loss": 0.8431, "step": 4880 }, { "epoch": 0.5787975809320527, "grad_norm": 1.5265359858221195, "learning_rate": 4.829477935184371e-05, "loss": 0.4993, "step": 4881 }, { "epoch": 0.5789161626941777, "grad_norm": 1.4847234182651676, "learning_rate": 4.829390792591615e-05, "loss": 0.6811, "step": 4882 }, { "epoch": 0.5790347444563027, "grad_norm": 1.2364768714494359, "learning_rate": 4.829303628524732e-05, "loss": 0.5412, "step": 4883 }, { "epoch": 0.5791533262184276, "grad_norm": 1.2376614496573963, "learning_rate": 4.8292164429845266e-05, "loss": 0.5348, "step": 4884 }, { "epoch": 0.5792719079805526, "grad_norm": 1.9906586451505632, "learning_rate": 4.8291292359718014e-05, "loss": 0.6483, "step": 4885 }, { "epoch": 0.5793904897426776, "grad_norm": 1.7061451496650775, "learning_rate": 4.829042007487361e-05, "loss": 0.7485, "step": 4886 }, { "epoch": 0.5795090715048026, "grad_norm": 1.2467610140821408, "learning_rate": 4.828954757532009e-05, "loss": 0.6076, "step": 4887 }, { "epoch": 0.5796276532669276, "grad_norm": 1.4763793485777257, "learning_rate": 4.828867486106551e-05, "loss": 0.7074, "step": 4888 }, { "epoch": 0.5797462350290525, "grad_norm": 1.2236173759463442, "learning_rate": 4.828780193211789e-05, "loss": 0.4563, "step": 4889 }, { "epoch": 0.5798648167911775, "grad_norm": 1.2750074759086636, "learning_rate": 4.828692878848531e-05, "loss": 0.4611, "step": 4890 }, { "epoch": 0.5799833985533025, "grad_norm": 1.7650976204135476, "learning_rate": 4.8286055430175794e-05, "loss": 0.7461, "step": 4891 }, { "epoch": 0.5801019803154275, "grad_norm": 1.709284016971087, "learning_rate": 4.828518185719741e-05, "loss": 0.767, "step": 4892 }, { "epoch": 0.5802205620775525, "grad_norm": 1.3724087995714866, "learning_rate": 4.828430806955819e-05, "loss": 0.5897, "step": 4893 }, { "epoch": 0.5803391438396774, "grad_norm": 1.2858287207298587, "learning_rate": 4.828343406726622e-05, "loss": 0.5592, "step": 4894 }, { "epoch": 0.5804577256018024, "grad_norm": 1.6962204296770302, "learning_rate": 4.828255985032953e-05, "loss": 0.7195, "step": 4895 }, { "epoch": 0.5805763073639274, "grad_norm": 1.3551283545324055, "learning_rate": 4.8281685418756184e-05, "loss": 0.4472, "step": 4896 }, { "epoch": 0.5806948891260524, "grad_norm": 1.4174282369468612, "learning_rate": 4.8280810772554256e-05, "loss": 0.4871, "step": 4897 }, { "epoch": 0.5808134708881774, "grad_norm": 1.5523607403460744, "learning_rate": 4.82799359117318e-05, "loss": 0.6806, "step": 4898 }, { "epoch": 0.5809320526503023, "grad_norm": 1.6277294374772822, "learning_rate": 4.8279060836296887e-05, "loss": 0.696, "step": 4899 }, { "epoch": 0.5810506344124273, "grad_norm": 1.2411936235414647, "learning_rate": 4.827818554625757e-05, "loss": 0.3197, "step": 4900 }, { "epoch": 0.5811692161745523, "grad_norm": 1.4946612232004128, "learning_rate": 4.827731004162194e-05, "loss": 0.5203, "step": 4901 }, { "epoch": 0.5812877979366773, "grad_norm": 1.3031251426609378, "learning_rate": 4.827643432239804e-05, "loss": 0.5806, "step": 4902 }, { "epoch": 0.5814063796988024, "grad_norm": 1.672332650845054, "learning_rate": 4.827555838859397e-05, "loss": 0.6177, "step": 4903 }, { "epoch": 0.5815249614609274, "grad_norm": 1.491810752581244, "learning_rate": 4.8274682240217796e-05, "loss": 0.5896, "step": 4904 }, { "epoch": 0.5816435432230523, "grad_norm": 1.7958790336490063, "learning_rate": 4.827380587727759e-05, "loss": 0.7725, "step": 4905 }, { "epoch": 0.5817621249851773, "grad_norm": 1.237186784610053, "learning_rate": 4.8272929299781436e-05, "loss": 0.47, "step": 4906 }, { "epoch": 0.5818807067473023, "grad_norm": 1.4772997688547798, "learning_rate": 4.8272052507737404e-05, "loss": 0.6071, "step": 4907 }, { "epoch": 0.5819992885094273, "grad_norm": 1.4568872642056592, "learning_rate": 4.8271175501153594e-05, "loss": 0.6089, "step": 4908 }, { "epoch": 0.5821178702715523, "grad_norm": 1.5142635627058525, "learning_rate": 4.8270298280038076e-05, "loss": 0.5942, "step": 4909 }, { "epoch": 0.5822364520336772, "grad_norm": 1.278251935284489, "learning_rate": 4.826942084439895e-05, "loss": 0.3848, "step": 4910 }, { "epoch": 0.5823550337958022, "grad_norm": 1.2796707713353495, "learning_rate": 4.82685431942443e-05, "loss": 0.3911, "step": 4911 }, { "epoch": 0.5824736155579272, "grad_norm": 1.2421889716053645, "learning_rate": 4.826766532958221e-05, "loss": 0.4456, "step": 4912 }, { "epoch": 0.5825921973200522, "grad_norm": 1.2465262725294888, "learning_rate": 4.8266787250420786e-05, "loss": 0.5373, "step": 4913 }, { "epoch": 0.5827107790821772, "grad_norm": 1.7212458919102307, "learning_rate": 4.826590895676811e-05, "loss": 0.7864, "step": 4914 }, { "epoch": 0.5828293608443021, "grad_norm": 1.5982046664302494, "learning_rate": 4.826503044863228e-05, "loss": 0.4658, "step": 4915 }, { "epoch": 0.5829479426064271, "grad_norm": 1.5409932073947756, "learning_rate": 4.8264151726021406e-05, "loss": 0.7047, "step": 4916 }, { "epoch": 0.5830665243685521, "grad_norm": 1.528264294478824, "learning_rate": 4.826327278894358e-05, "loss": 0.5087, "step": 4917 }, { "epoch": 0.5831851061306771, "grad_norm": 1.5136365235774667, "learning_rate": 4.82623936374069e-05, "loss": 0.5912, "step": 4918 }, { "epoch": 0.5833036878928021, "grad_norm": 1.5453141096085028, "learning_rate": 4.826151427141948e-05, "loss": 0.4833, "step": 4919 }, { "epoch": 0.583422269654927, "grad_norm": 1.9146128166401783, "learning_rate": 4.8260634690989424e-05, "loss": 0.6219, "step": 4920 }, { "epoch": 0.583540851417052, "grad_norm": 1.457959066505368, "learning_rate": 4.8259754896124845e-05, "loss": 0.5451, "step": 4921 }, { "epoch": 0.583659433179177, "grad_norm": 1.209992708492431, "learning_rate": 4.825887488683385e-05, "loss": 0.4083, "step": 4922 }, { "epoch": 0.583778014941302, "grad_norm": 1.320595721456107, "learning_rate": 4.825799466312454e-05, "loss": 0.3837, "step": 4923 }, { "epoch": 0.583896596703427, "grad_norm": 1.6044715009048895, "learning_rate": 4.8257114225005054e-05, "loss": 0.5695, "step": 4924 }, { "epoch": 0.584015178465552, "grad_norm": 1.6327763504325006, "learning_rate": 4.825623357248348e-05, "loss": 0.5824, "step": 4925 }, { "epoch": 0.5841337602276769, "grad_norm": 1.907440586977788, "learning_rate": 4.8255352705567956e-05, "loss": 0.7961, "step": 4926 }, { "epoch": 0.5842523419898019, "grad_norm": 1.7577270539680532, "learning_rate": 4.825447162426661e-05, "loss": 0.6645, "step": 4927 }, { "epoch": 0.584370923751927, "grad_norm": 1.3533206572565841, "learning_rate": 4.8253590328587534e-05, "loss": 0.4939, "step": 4928 }, { "epoch": 0.584489505514052, "grad_norm": 1.477625250675079, "learning_rate": 4.8252708818538876e-05, "loss": 0.3235, "step": 4929 }, { "epoch": 0.584608087276177, "grad_norm": 1.2720623407303047, "learning_rate": 4.825182709412876e-05, "loss": 0.4572, "step": 4930 }, { "epoch": 0.584726669038302, "grad_norm": 1.4111563928870146, "learning_rate": 4.825094515536531e-05, "loss": 0.4253, "step": 4931 }, { "epoch": 0.5848452508004269, "grad_norm": 1.2850744846249977, "learning_rate": 4.8250063002256665e-05, "loss": 0.4387, "step": 4932 }, { "epoch": 0.5849638325625519, "grad_norm": 1.4407256947563034, "learning_rate": 4.824918063481094e-05, "loss": 0.4781, "step": 4933 }, { "epoch": 0.5850824143246769, "grad_norm": 1.4627077910974255, "learning_rate": 4.824829805303628e-05, "loss": 0.5704, "step": 4934 }, { "epoch": 0.5852009960868019, "grad_norm": 1.616998837186328, "learning_rate": 4.824741525694083e-05, "loss": 0.6087, "step": 4935 }, { "epoch": 0.5853195778489269, "grad_norm": 1.7573537873370957, "learning_rate": 4.824653224653271e-05, "loss": 0.7319, "step": 4936 }, { "epoch": 0.5854381596110518, "grad_norm": 1.7875488432993492, "learning_rate": 4.824564902182007e-05, "loss": 0.5707, "step": 4937 }, { "epoch": 0.5855567413731768, "grad_norm": 1.8596330290759115, "learning_rate": 4.8244765582811055e-05, "loss": 0.8066, "step": 4938 }, { "epoch": 0.5856753231353018, "grad_norm": 1.5386017555599303, "learning_rate": 4.8243881929513806e-05, "loss": 0.5972, "step": 4939 }, { "epoch": 0.5857939048974268, "grad_norm": 1.618275291694729, "learning_rate": 4.8242998061936465e-05, "loss": 0.6311, "step": 4940 }, { "epoch": 0.5859124866595518, "grad_norm": 1.5911571473570507, "learning_rate": 4.824211398008719e-05, "loss": 0.5098, "step": 4941 }, { "epoch": 0.5860310684216767, "grad_norm": 1.3264386395090593, "learning_rate": 4.824122968397412e-05, "loss": 0.4223, "step": 4942 }, { "epoch": 0.5861496501838017, "grad_norm": 1.497922970845622, "learning_rate": 4.8240345173605416e-05, "loss": 0.5207, "step": 4943 }, { "epoch": 0.5862682319459267, "grad_norm": 1.4870085530922106, "learning_rate": 4.823946044898923e-05, "loss": 0.5525, "step": 4944 }, { "epoch": 0.5863868137080517, "grad_norm": 1.341605095592941, "learning_rate": 4.823857551013372e-05, "loss": 0.5122, "step": 4945 }, { "epoch": 0.5865053954701767, "grad_norm": 1.4482539332360331, "learning_rate": 4.823769035704704e-05, "loss": 0.5254, "step": 4946 }, { "epoch": 0.5866239772323016, "grad_norm": 1.3644827985664498, "learning_rate": 4.823680498973734e-05, "loss": 0.5206, "step": 4947 }, { "epoch": 0.5867425589944266, "grad_norm": 1.6178064567117574, "learning_rate": 4.8235919408212806e-05, "loss": 0.6114, "step": 4948 }, { "epoch": 0.5868611407565516, "grad_norm": 1.819789341404011, "learning_rate": 4.823503361248158e-05, "loss": 0.5999, "step": 4949 }, { "epoch": 0.5869797225186766, "grad_norm": 1.4902867539268205, "learning_rate": 4.823414760255184e-05, "loss": 0.6032, "step": 4950 }, { "epoch": 0.5870983042808016, "grad_norm": 1.6149339418010462, "learning_rate": 4.823326137843175e-05, "loss": 0.6059, "step": 4951 }, { "epoch": 0.5872168860429265, "grad_norm": 1.3815233879204223, "learning_rate": 4.8232374940129476e-05, "loss": 0.4664, "step": 4952 }, { "epoch": 0.5873354678050516, "grad_norm": 1.6781942553517675, "learning_rate": 4.82314882876532e-05, "loss": 0.7298, "step": 4953 }, { "epoch": 0.5874540495671766, "grad_norm": 1.4619533068127624, "learning_rate": 4.823060142101109e-05, "loss": 0.3982, "step": 4954 }, { "epoch": 0.5875726313293016, "grad_norm": 1.7030783033466592, "learning_rate": 4.822971434021133e-05, "loss": 0.5932, "step": 4955 }, { "epoch": 0.5876912130914266, "grad_norm": 1.6074995225559945, "learning_rate": 4.822882704526207e-05, "loss": 0.4573, "step": 4956 }, { "epoch": 0.5878097948535516, "grad_norm": 1.4244886061663535, "learning_rate": 4.822793953617153e-05, "loss": 0.543, "step": 4957 }, { "epoch": 0.5879283766156765, "grad_norm": 1.7175738909380698, "learning_rate": 4.822705181294786e-05, "loss": 0.6094, "step": 4958 }, { "epoch": 0.5880469583778015, "grad_norm": 1.4467631430827608, "learning_rate": 4.822616387559926e-05, "loss": 0.5011, "step": 4959 }, { "epoch": 0.5881655401399265, "grad_norm": 1.4514160374905725, "learning_rate": 4.822527572413391e-05, "loss": 0.4559, "step": 4960 }, { "epoch": 0.5882841219020515, "grad_norm": 1.4473463579580572, "learning_rate": 4.822438735856e-05, "loss": 0.4995, "step": 4961 }, { "epoch": 0.5884027036641765, "grad_norm": 1.754711607251629, "learning_rate": 4.822349877888571e-05, "loss": 0.554, "step": 4962 }, { "epoch": 0.5885212854263014, "grad_norm": 1.3504422328642773, "learning_rate": 4.8222609985119246e-05, "loss": 0.6406, "step": 4963 }, { "epoch": 0.5886398671884264, "grad_norm": 1.5499165521212537, "learning_rate": 4.822172097726879e-05, "loss": 0.5446, "step": 4964 }, { "epoch": 0.5887584489505514, "grad_norm": 1.592515051568355, "learning_rate": 4.822083175534255e-05, "loss": 0.4602, "step": 4965 }, { "epoch": 0.5888770307126764, "grad_norm": 1.5556564806774136, "learning_rate": 4.821994231934872e-05, "loss": 0.5139, "step": 4966 }, { "epoch": 0.5889956124748014, "grad_norm": 1.4603666284415884, "learning_rate": 4.8219052669295486e-05, "loss": 0.6581, "step": 4967 }, { "epoch": 0.5891141942369263, "grad_norm": 1.7060853053175797, "learning_rate": 4.821816280519106e-05, "loss": 0.6731, "step": 4968 }, { "epoch": 0.5892327759990513, "grad_norm": 1.4016686350949437, "learning_rate": 4.821727272704365e-05, "loss": 0.505, "step": 4969 }, { "epoch": 0.5893513577611763, "grad_norm": 1.7723583783415964, "learning_rate": 4.821638243486145e-05, "loss": 0.7017, "step": 4970 }, { "epoch": 0.5894699395233013, "grad_norm": 1.2811194732013829, "learning_rate": 4.8215491928652684e-05, "loss": 0.5106, "step": 4971 }, { "epoch": 0.5895885212854263, "grad_norm": 1.322377238091314, "learning_rate": 4.821460120842555e-05, "loss": 0.5125, "step": 4972 }, { "epoch": 0.5897071030475513, "grad_norm": 1.139025068738447, "learning_rate": 4.821371027418825e-05, "loss": 0.4502, "step": 4973 }, { "epoch": 0.5898256848096762, "grad_norm": 1.314113897093728, "learning_rate": 4.8212819125949014e-05, "loss": 0.5854, "step": 4974 }, { "epoch": 0.5899442665718012, "grad_norm": 1.33884531763015, "learning_rate": 4.8211927763716055e-05, "loss": 0.5843, "step": 4975 }, { "epoch": 0.5900628483339262, "grad_norm": 1.4795076650833305, "learning_rate": 4.821103618749758e-05, "loss": 0.677, "step": 4976 }, { "epoch": 0.5901814300960512, "grad_norm": 1.288852601890635, "learning_rate": 4.8210144397301815e-05, "loss": 0.5311, "step": 4977 }, { "epoch": 0.5903000118581763, "grad_norm": 1.0097546511317559, "learning_rate": 4.820925239313698e-05, "loss": 0.3021, "step": 4978 }, { "epoch": 0.5904185936203012, "grad_norm": 1.1988651343845003, "learning_rate": 4.8208360175011306e-05, "loss": 0.5135, "step": 4979 }, { "epoch": 0.5905371753824262, "grad_norm": 1.356345268493025, "learning_rate": 4.820746774293301e-05, "loss": 0.5099, "step": 4980 }, { "epoch": 0.5906557571445512, "grad_norm": 1.407120066057279, "learning_rate": 4.820657509691032e-05, "loss": 0.5495, "step": 4981 }, { "epoch": 0.5907743389066762, "grad_norm": 1.365321287874761, "learning_rate": 4.820568223695146e-05, "loss": 0.5503, "step": 4982 }, { "epoch": 0.5908929206688012, "grad_norm": 1.3491492936160716, "learning_rate": 4.820478916306467e-05, "loss": 0.4661, "step": 4983 }, { "epoch": 0.5910115024309261, "grad_norm": 1.5255851747424745, "learning_rate": 4.820389587525817e-05, "loss": 0.6205, "step": 4984 }, { "epoch": 0.5911300841930511, "grad_norm": 1.456832334799871, "learning_rate": 4.820300237354022e-05, "loss": 0.5399, "step": 4985 }, { "epoch": 0.5912486659551761, "grad_norm": 1.535963031926081, "learning_rate": 4.820210865791904e-05, "loss": 0.5216, "step": 4986 }, { "epoch": 0.5913672477173011, "grad_norm": 2.0268605010777496, "learning_rate": 4.820121472840286e-05, "loss": 0.8168, "step": 4987 }, { "epoch": 0.5914858294794261, "grad_norm": 1.589261815186201, "learning_rate": 4.820032058499994e-05, "loss": 0.7101, "step": 4988 }, { "epoch": 0.591604411241551, "grad_norm": 1.8683914871233251, "learning_rate": 4.819942622771851e-05, "loss": 0.7882, "step": 4989 }, { "epoch": 0.591722993003676, "grad_norm": 1.8113528801407002, "learning_rate": 4.819853165656682e-05, "loss": 0.4888, "step": 4990 }, { "epoch": 0.591841574765801, "grad_norm": 1.6207678613892398, "learning_rate": 4.8197636871553114e-05, "loss": 0.5498, "step": 4991 }, { "epoch": 0.591960156527926, "grad_norm": 1.590397159740747, "learning_rate": 4.819674187268565e-05, "loss": 0.6903, "step": 4992 }, { "epoch": 0.592078738290051, "grad_norm": 1.2108344148713195, "learning_rate": 4.819584665997266e-05, "loss": 0.3907, "step": 4993 }, { "epoch": 0.592197320052176, "grad_norm": 1.6242475188544707, "learning_rate": 4.819495123342242e-05, "loss": 0.6247, "step": 4994 }, { "epoch": 0.5923159018143009, "grad_norm": 1.4751523860161722, "learning_rate": 4.819405559304316e-05, "loss": 0.5935, "step": 4995 }, { "epoch": 0.5924344835764259, "grad_norm": 1.5586573031421582, "learning_rate": 4.819315973884316e-05, "loss": 0.6489, "step": 4996 }, { "epoch": 0.5925530653385509, "grad_norm": 1.3447268438941193, "learning_rate": 4.8192263670830675e-05, "loss": 0.4068, "step": 4997 }, { "epoch": 0.5926716471006759, "grad_norm": 1.5205767686251805, "learning_rate": 4.8191367389013946e-05, "loss": 0.4971, "step": 4998 }, { "epoch": 0.5927902288628009, "grad_norm": 1.036501958788195, "learning_rate": 4.8190470893401255e-05, "loss": 0.293, "step": 4999 }, { "epoch": 0.5929088106249258, "grad_norm": 1.304283859209261, "learning_rate": 4.8189574184000865e-05, "loss": 0.4701, "step": 5000 }, { "epoch": 0.5930273923870508, "grad_norm": 1.5925884145244469, "learning_rate": 4.8188677260821027e-05, "loss": 0.5358, "step": 5001 }, { "epoch": 0.5931459741491758, "grad_norm": 1.3731194112450575, "learning_rate": 4.818778012387002e-05, "loss": 0.4784, "step": 5002 }, { "epoch": 0.5932645559113009, "grad_norm": 1.655299439064095, "learning_rate": 4.8186882773156125e-05, "loss": 0.5839, "step": 5003 }, { "epoch": 0.5933831376734259, "grad_norm": 1.381934959238323, "learning_rate": 4.81859852086876e-05, "loss": 0.4568, "step": 5004 }, { "epoch": 0.5935017194355509, "grad_norm": 1.6590105470167593, "learning_rate": 4.8185087430472727e-05, "loss": 0.5548, "step": 5005 }, { "epoch": 0.5936203011976758, "grad_norm": 2.009023871064922, "learning_rate": 4.818418943851978e-05, "loss": 0.6168, "step": 5006 }, { "epoch": 0.5937388829598008, "grad_norm": 2.0528108940107805, "learning_rate": 4.818329123283702e-05, "loss": 0.6673, "step": 5007 }, { "epoch": 0.5938574647219258, "grad_norm": 1.4913889646436398, "learning_rate": 4.818239281343276e-05, "loss": 0.48, "step": 5008 }, { "epoch": 0.5939760464840508, "grad_norm": 2.0361704625406727, "learning_rate": 4.8181494180315266e-05, "loss": 0.7281, "step": 5009 }, { "epoch": 0.5940946282461758, "grad_norm": 1.0951507274817396, "learning_rate": 4.818059533349282e-05, "loss": 0.3338, "step": 5010 }, { "epoch": 0.5942132100083007, "grad_norm": 1.4371995806300184, "learning_rate": 4.817969627297372e-05, "loss": 0.7081, "step": 5011 }, { "epoch": 0.5943317917704257, "grad_norm": 1.3951470256183967, "learning_rate": 4.817879699876623e-05, "loss": 0.5001, "step": 5012 }, { "epoch": 0.5944503735325507, "grad_norm": 1.2482106993029958, "learning_rate": 4.817789751087866e-05, "loss": 0.3636, "step": 5013 }, { "epoch": 0.5945689552946757, "grad_norm": 1.2272823607269867, "learning_rate": 4.81769978093193e-05, "loss": 0.4199, "step": 5014 }, { "epoch": 0.5946875370568007, "grad_norm": 1.4579172828149722, "learning_rate": 4.817609789409644e-05, "loss": 0.3917, "step": 5015 }, { "epoch": 0.5948061188189256, "grad_norm": 1.350376592082812, "learning_rate": 4.8175197765218385e-05, "loss": 0.5415, "step": 5016 }, { "epoch": 0.5949247005810506, "grad_norm": 1.147667523341512, "learning_rate": 4.817429742269342e-05, "loss": 0.3403, "step": 5017 }, { "epoch": 0.5950432823431756, "grad_norm": 1.5236424173203509, "learning_rate": 4.817339686652985e-05, "loss": 0.5902, "step": 5018 }, { "epoch": 0.5951618641053006, "grad_norm": 1.6132229551059678, "learning_rate": 4.8172496096735976e-05, "loss": 0.5698, "step": 5019 }, { "epoch": 0.5952804458674256, "grad_norm": 1.5071274358419369, "learning_rate": 4.8171595113320104e-05, "loss": 0.4969, "step": 5020 }, { "epoch": 0.5953990276295505, "grad_norm": 1.7766606087607815, "learning_rate": 4.8170693916290546e-05, "loss": 0.5888, "step": 5021 }, { "epoch": 0.5955176093916755, "grad_norm": 1.6802536101754872, "learning_rate": 4.8169792505655595e-05, "loss": 0.5, "step": 5022 }, { "epoch": 0.5956361911538005, "grad_norm": 1.7475861796650667, "learning_rate": 4.816889088142358e-05, "loss": 0.7127, "step": 5023 }, { "epoch": 0.5957547729159255, "grad_norm": 1.2816485424257849, "learning_rate": 4.8167989043602797e-05, "loss": 0.4385, "step": 5024 }, { "epoch": 0.5958733546780505, "grad_norm": 1.6013429040675669, "learning_rate": 4.8167086992201564e-05, "loss": 0.4201, "step": 5025 }, { "epoch": 0.5959919364401755, "grad_norm": 2.0827835955798863, "learning_rate": 4.81661847272282e-05, "loss": 0.7108, "step": 5026 }, { "epoch": 0.5961105182023004, "grad_norm": 1.7877122123699725, "learning_rate": 4.816528224869102e-05, "loss": 0.7152, "step": 5027 }, { "epoch": 0.5962290999644255, "grad_norm": 1.1697880232255828, "learning_rate": 4.816437955659835e-05, "loss": 0.376, "step": 5028 }, { "epoch": 0.5963476817265505, "grad_norm": 1.5406976529876533, "learning_rate": 4.81634766509585e-05, "loss": 0.621, "step": 5029 }, { "epoch": 0.5964662634886755, "grad_norm": 1.3486942314516916, "learning_rate": 4.816257353177981e-05, "loss": 0.3697, "step": 5030 }, { "epoch": 0.5965848452508005, "grad_norm": 1.395307489759187, "learning_rate": 4.8161670199070586e-05, "loss": 0.4806, "step": 5031 }, { "epoch": 0.5967034270129254, "grad_norm": 1.599650537995054, "learning_rate": 4.816076665283917e-05, "loss": 0.5306, "step": 5032 }, { "epoch": 0.5968220087750504, "grad_norm": 1.1676089629735835, "learning_rate": 4.815986289309389e-05, "loss": 0.3826, "step": 5033 }, { "epoch": 0.5969405905371754, "grad_norm": 1.1269684152675676, "learning_rate": 4.815895891984307e-05, "loss": 0.4442, "step": 5034 }, { "epoch": 0.5970591722993004, "grad_norm": 1.4461293942964697, "learning_rate": 4.8158054733095045e-05, "loss": 0.535, "step": 5035 }, { "epoch": 0.5971777540614254, "grad_norm": 2.112066157513325, "learning_rate": 4.815715033285816e-05, "loss": 0.7253, "step": 5036 }, { "epoch": 0.5972963358235504, "grad_norm": 1.8450571824618847, "learning_rate": 4.815624571914074e-05, "loss": 0.7515, "step": 5037 }, { "epoch": 0.5974149175856753, "grad_norm": 1.6674886579166792, "learning_rate": 4.815534089195113e-05, "loss": 0.562, "step": 5038 }, { "epoch": 0.5975334993478003, "grad_norm": 1.1630090547709313, "learning_rate": 4.8154435851297673e-05, "loss": 0.4729, "step": 5039 }, { "epoch": 0.5976520811099253, "grad_norm": 1.7221406184015344, "learning_rate": 4.815353059718872e-05, "loss": 0.5069, "step": 5040 }, { "epoch": 0.5977706628720503, "grad_norm": 1.202078816308715, "learning_rate": 4.81526251296326e-05, "loss": 0.3556, "step": 5041 }, { "epoch": 0.5978892446341753, "grad_norm": 1.3701079388096837, "learning_rate": 4.815171944863767e-05, "loss": 0.4533, "step": 5042 }, { "epoch": 0.5980078263963002, "grad_norm": 1.3295549401634659, "learning_rate": 4.815081355421228e-05, "loss": 0.4942, "step": 5043 }, { "epoch": 0.5981264081584252, "grad_norm": 1.5257462490018319, "learning_rate": 4.814990744636477e-05, "loss": 0.6476, "step": 5044 }, { "epoch": 0.5982449899205502, "grad_norm": 1.8060255446536868, "learning_rate": 4.814900112510351e-05, "loss": 0.6456, "step": 5045 }, { "epoch": 0.5983635716826752, "grad_norm": 1.5143404463942465, "learning_rate": 4.814809459043684e-05, "loss": 0.4752, "step": 5046 }, { "epoch": 0.5984821534448002, "grad_norm": 1.6755462619347488, "learning_rate": 4.814718784237313e-05, "loss": 0.7621, "step": 5047 }, { "epoch": 0.5986007352069251, "grad_norm": 1.2205560495921137, "learning_rate": 4.814628088092073e-05, "loss": 0.3825, "step": 5048 }, { "epoch": 0.5987193169690501, "grad_norm": 1.5061517408543588, "learning_rate": 4.8145373706088e-05, "loss": 0.4932, "step": 5049 }, { "epoch": 0.5988378987311751, "grad_norm": 1.3708880758975066, "learning_rate": 4.814446631788332e-05, "loss": 0.5724, "step": 5050 }, { "epoch": 0.5989564804933001, "grad_norm": 1.6305563377604562, "learning_rate": 4.814355871631503e-05, "loss": 0.5918, "step": 5051 }, { "epoch": 0.5990750622554251, "grad_norm": 1.523597534166945, "learning_rate": 4.8142650901391516e-05, "loss": 0.5747, "step": 5052 }, { "epoch": 0.5991936440175502, "grad_norm": 1.6243422643459395, "learning_rate": 4.814174287312113e-05, "loss": 0.7525, "step": 5053 }, { "epoch": 0.5993122257796751, "grad_norm": 1.368847099186889, "learning_rate": 4.814083463151227e-05, "loss": 0.5065, "step": 5054 }, { "epoch": 0.5994308075418001, "grad_norm": 1.3765580247103222, "learning_rate": 4.813992617657328e-05, "loss": 0.3931, "step": 5055 }, { "epoch": 0.5995493893039251, "grad_norm": 1.4335668156992916, "learning_rate": 4.813901750831254e-05, "loss": 0.5606, "step": 5056 }, { "epoch": 0.5996679710660501, "grad_norm": 1.207092782522536, "learning_rate": 4.813810862673845e-05, "loss": 0.4633, "step": 5057 }, { "epoch": 0.5997865528281751, "grad_norm": 1.586137675844984, "learning_rate": 4.813719953185937e-05, "loss": 0.6844, "step": 5058 }, { "epoch": 0.5999051345903, "grad_norm": 2.043314230762433, "learning_rate": 4.813629022368367e-05, "loss": 0.5798, "step": 5059 }, { "epoch": 0.600023716352425, "grad_norm": 1.4685239147452585, "learning_rate": 4.8135380702219755e-05, "loss": 0.5759, "step": 5060 }, { "epoch": 0.60014229811455, "grad_norm": 1.534315762735501, "learning_rate": 4.8134470967476e-05, "loss": 0.482, "step": 5061 }, { "epoch": 0.600260879876675, "grad_norm": 2.014959559902432, "learning_rate": 4.8133561019460795e-05, "loss": 0.6582, "step": 5062 }, { "epoch": 0.6003794616388, "grad_norm": 1.1957021034173538, "learning_rate": 4.8132650858182514e-05, "loss": 0.3154, "step": 5063 }, { "epoch": 0.6004980434009249, "grad_norm": 1.280188073057867, "learning_rate": 4.8131740483649567e-05, "loss": 0.4791, "step": 5064 }, { "epoch": 0.6006166251630499, "grad_norm": 2.084445902131785, "learning_rate": 4.8130829895870335e-05, "loss": 0.646, "step": 5065 }, { "epoch": 0.6007352069251749, "grad_norm": 1.4665718207375038, "learning_rate": 4.812991909485323e-05, "loss": 0.5222, "step": 5066 }, { "epoch": 0.6008537886872999, "grad_norm": 1.5475381866587354, "learning_rate": 4.812900808060662e-05, "loss": 0.5606, "step": 5067 }, { "epoch": 0.6009723704494249, "grad_norm": 1.7782790579332706, "learning_rate": 4.812809685313892e-05, "loss": 0.6577, "step": 5068 }, { "epoch": 0.6010909522115498, "grad_norm": 1.5189955314164496, "learning_rate": 4.812718541245853e-05, "loss": 0.6553, "step": 5069 }, { "epoch": 0.6012095339736748, "grad_norm": 1.446299832130682, "learning_rate": 4.8126273758573854e-05, "loss": 0.5396, "step": 5070 }, { "epoch": 0.6013281157357998, "grad_norm": 1.803972743755853, "learning_rate": 4.812536189149329e-05, "loss": 0.6776, "step": 5071 }, { "epoch": 0.6014466974979248, "grad_norm": 2.034633867002782, "learning_rate": 4.812444981122525e-05, "loss": 0.6492, "step": 5072 }, { "epoch": 0.6015652792600498, "grad_norm": 1.3264444989925404, "learning_rate": 4.812353751777814e-05, "loss": 0.4247, "step": 5073 }, { "epoch": 0.6016838610221747, "grad_norm": 1.170876866577602, "learning_rate": 4.812262501116037e-05, "loss": 0.4952, "step": 5074 }, { "epoch": 0.6018024427842997, "grad_norm": 1.3611782355399467, "learning_rate": 4.812171229138035e-05, "loss": 0.5397, "step": 5075 }, { "epoch": 0.6019210245464247, "grad_norm": 1.454323748334674, "learning_rate": 4.81207993584465e-05, "loss": 0.5909, "step": 5076 }, { "epoch": 0.6020396063085497, "grad_norm": 1.4979392136927356, "learning_rate": 4.811988621236723e-05, "loss": 0.5057, "step": 5077 }, { "epoch": 0.6021581880706748, "grad_norm": 1.2290751377485638, "learning_rate": 4.811897285315096e-05, "loss": 0.4264, "step": 5078 }, { "epoch": 0.6022767698327998, "grad_norm": 1.3270491265591648, "learning_rate": 4.81180592808061e-05, "loss": 0.5107, "step": 5079 }, { "epoch": 0.6023953515949247, "grad_norm": 1.2720075368407704, "learning_rate": 4.81171454953411e-05, "loss": 0.4438, "step": 5080 }, { "epoch": 0.6025139333570497, "grad_norm": 1.4428491984928162, "learning_rate": 4.811623149676436e-05, "loss": 0.6836, "step": 5081 }, { "epoch": 0.6026325151191747, "grad_norm": 1.3771981484159757, "learning_rate": 4.811531728508432e-05, "loss": 0.4773, "step": 5082 }, { "epoch": 0.6027510968812997, "grad_norm": 1.5259425503507265, "learning_rate": 4.8114402860309396e-05, "loss": 0.41, "step": 5083 }, { "epoch": 0.6028696786434247, "grad_norm": 1.255963532930276, "learning_rate": 4.811348822244801e-05, "loss": 0.3973, "step": 5084 }, { "epoch": 0.6029882604055496, "grad_norm": 1.463816366164392, "learning_rate": 4.811257337150862e-05, "loss": 0.4047, "step": 5085 }, { "epoch": 0.6031068421676746, "grad_norm": 1.322751692876159, "learning_rate": 4.811165830749964e-05, "loss": 0.4266, "step": 5086 }, { "epoch": 0.6032254239297996, "grad_norm": 2.283883748747858, "learning_rate": 4.8110743030429514e-05, "loss": 0.7769, "step": 5087 }, { "epoch": 0.6033440056919246, "grad_norm": 2.090669614735181, "learning_rate": 4.8109827540306676e-05, "loss": 0.5406, "step": 5088 }, { "epoch": 0.6034625874540496, "grad_norm": 1.845831134186188, "learning_rate": 4.810891183713957e-05, "loss": 0.5868, "step": 5089 }, { "epoch": 0.6035811692161746, "grad_norm": 1.7525573940139887, "learning_rate": 4.8107995920936633e-05, "loss": 0.5753, "step": 5090 }, { "epoch": 0.6036997509782995, "grad_norm": 1.6485296637588127, "learning_rate": 4.810707979170631e-05, "loss": 0.5443, "step": 5091 }, { "epoch": 0.6038183327404245, "grad_norm": 1.3287627646672227, "learning_rate": 4.8106163449457057e-05, "loss": 0.4508, "step": 5092 }, { "epoch": 0.6039369145025495, "grad_norm": 1.7404530258525939, "learning_rate": 4.81052468941973e-05, "loss": 0.5225, "step": 5093 }, { "epoch": 0.6040554962646745, "grad_norm": 1.6955720314758524, "learning_rate": 4.81043301259355e-05, "loss": 0.8031, "step": 5094 }, { "epoch": 0.6041740780267995, "grad_norm": 1.248630116551128, "learning_rate": 4.810341314468011e-05, "loss": 0.3369, "step": 5095 }, { "epoch": 0.6042926597889244, "grad_norm": 1.3354499394473458, "learning_rate": 4.8102495950439585e-05, "loss": 0.4917, "step": 5096 }, { "epoch": 0.6044112415510494, "grad_norm": 1.705364132502535, "learning_rate": 4.8101578543222375e-05, "loss": 0.7415, "step": 5097 }, { "epoch": 0.6045298233131744, "grad_norm": 1.2119152177979147, "learning_rate": 4.8100660923036945e-05, "loss": 0.5734, "step": 5098 }, { "epoch": 0.6046484050752994, "grad_norm": 1.3310192005446184, "learning_rate": 4.809974308989174e-05, "loss": 0.5632, "step": 5099 }, { "epoch": 0.6047669868374244, "grad_norm": 1.2851467890658483, "learning_rate": 4.809882504379524e-05, "loss": 0.4588, "step": 5100 }, { "epoch": 0.6048855685995493, "grad_norm": 1.1475512823446283, "learning_rate": 4.809790678475589e-05, "loss": 0.4402, "step": 5101 }, { "epoch": 0.6050041503616743, "grad_norm": 1.6553213736038617, "learning_rate": 4.8096988312782174e-05, "loss": 0.6156, "step": 5102 }, { "epoch": 0.6051227321237994, "grad_norm": 1.5164183733783878, "learning_rate": 4.809606962788254e-05, "loss": 0.5389, "step": 5103 }, { "epoch": 0.6052413138859244, "grad_norm": 1.4729074338556065, "learning_rate": 4.809515073006547e-05, "loss": 0.5824, "step": 5104 }, { "epoch": 0.6053598956480494, "grad_norm": 1.2646764134269506, "learning_rate": 4.8094231619339435e-05, "loss": 0.4538, "step": 5105 }, { "epoch": 0.6054784774101744, "grad_norm": 1.5081382738102669, "learning_rate": 4.80933122957129e-05, "loss": 0.4854, "step": 5106 }, { "epoch": 0.6055970591722993, "grad_norm": 1.2250886490878297, "learning_rate": 4.809239275919434e-05, "loss": 0.4201, "step": 5107 }, { "epoch": 0.6057156409344243, "grad_norm": 1.3157920222591222, "learning_rate": 4.809147300979224e-05, "loss": 0.4371, "step": 5108 }, { "epoch": 0.6058342226965493, "grad_norm": 1.3092736601165913, "learning_rate": 4.809055304751509e-05, "loss": 0.4836, "step": 5109 }, { "epoch": 0.6059528044586743, "grad_norm": 1.3869044162792492, "learning_rate": 4.808963287237134e-05, "loss": 0.6041, "step": 5110 }, { "epoch": 0.6060713862207993, "grad_norm": 1.6228777284260985, "learning_rate": 4.8088712484369494e-05, "loss": 0.4997, "step": 5111 }, { "epoch": 0.6061899679829242, "grad_norm": 1.4427988729910017, "learning_rate": 4.808779188351803e-05, "loss": 0.5446, "step": 5112 }, { "epoch": 0.6063085497450492, "grad_norm": 2.227934020482448, "learning_rate": 4.8086871069825435e-05, "loss": 0.8166, "step": 5113 }, { "epoch": 0.6064271315071742, "grad_norm": 1.6808964006028664, "learning_rate": 4.80859500433002e-05, "loss": 0.6543, "step": 5114 }, { "epoch": 0.6065457132692992, "grad_norm": 1.4034665807392264, "learning_rate": 4.808502880395082e-05, "loss": 0.5773, "step": 5115 }, { "epoch": 0.6066642950314242, "grad_norm": 1.5314651436677196, "learning_rate": 4.808410735178578e-05, "loss": 0.621, "step": 5116 }, { "epoch": 0.6067828767935491, "grad_norm": 1.3412577030774167, "learning_rate": 4.808318568681358e-05, "loss": 0.4374, "step": 5117 }, { "epoch": 0.6069014585556741, "grad_norm": 1.200046615929627, "learning_rate": 4.808226380904272e-05, "loss": 0.4401, "step": 5118 }, { "epoch": 0.6070200403177991, "grad_norm": 1.3545963892312394, "learning_rate": 4.808134171848168e-05, "loss": 0.4434, "step": 5119 }, { "epoch": 0.6071386220799241, "grad_norm": 1.306121381545117, "learning_rate": 4.8080419415138986e-05, "loss": 0.3969, "step": 5120 }, { "epoch": 0.6072572038420491, "grad_norm": 1.3590499409311205, "learning_rate": 4.807949689902312e-05, "loss": 0.4967, "step": 5121 }, { "epoch": 0.607375785604174, "grad_norm": 1.679793443183019, "learning_rate": 4.80785741701426e-05, "loss": 0.406, "step": 5122 }, { "epoch": 0.607494367366299, "grad_norm": 1.6556580616080594, "learning_rate": 4.807765122850592e-05, "loss": 0.4159, "step": 5123 }, { "epoch": 0.607612949128424, "grad_norm": 1.2735924703557584, "learning_rate": 4.80767280741216e-05, "loss": 0.4884, "step": 5124 }, { "epoch": 0.607731530890549, "grad_norm": 1.7318688957363033, "learning_rate": 4.807580470699814e-05, "loss": 0.6042, "step": 5125 }, { "epoch": 0.607850112652674, "grad_norm": 1.4601316998654799, "learning_rate": 4.807488112714407e-05, "loss": 0.5996, "step": 5126 }, { "epoch": 0.607968694414799, "grad_norm": 1.5438255111418846, "learning_rate": 4.807395733456788e-05, "loss": 0.5591, "step": 5127 }, { "epoch": 0.608087276176924, "grad_norm": 1.2824193444668521, "learning_rate": 4.8073033329278104e-05, "loss": 0.4232, "step": 5128 }, { "epoch": 0.608205857939049, "grad_norm": 1.4757143740395897, "learning_rate": 4.807210911128326e-05, "loss": 0.5815, "step": 5129 }, { "epoch": 0.608324439701174, "grad_norm": 1.5958787077249146, "learning_rate": 4.807118468059185e-05, "loss": 0.4734, "step": 5130 }, { "epoch": 0.608443021463299, "grad_norm": 2.0296200293057987, "learning_rate": 4.8070260037212424e-05, "loss": 0.6734, "step": 5131 }, { "epoch": 0.608561603225424, "grad_norm": 1.6131794578410847, "learning_rate": 4.806933518115348e-05, "loss": 0.6257, "step": 5132 }, { "epoch": 0.608680184987549, "grad_norm": 1.2646315449483865, "learning_rate": 4.8068410112423555e-05, "loss": 0.4488, "step": 5133 }, { "epoch": 0.6087987667496739, "grad_norm": 1.6432191514603876, "learning_rate": 4.806748483103118e-05, "loss": 0.51, "step": 5134 }, { "epoch": 0.6089173485117989, "grad_norm": 1.6247178936227242, "learning_rate": 4.806655933698488e-05, "loss": 0.5586, "step": 5135 }, { "epoch": 0.6090359302739239, "grad_norm": 1.495842626160169, "learning_rate": 4.80656336302932e-05, "loss": 0.5205, "step": 5136 }, { "epoch": 0.6091545120360489, "grad_norm": 1.6563708630979666, "learning_rate": 4.806470771096465e-05, "loss": 0.6364, "step": 5137 }, { "epoch": 0.6092730937981738, "grad_norm": 1.1918572658453652, "learning_rate": 4.806378157900779e-05, "loss": 0.4134, "step": 5138 }, { "epoch": 0.6093916755602988, "grad_norm": 1.823178871433116, "learning_rate": 4.8062855234431134e-05, "loss": 0.5548, "step": 5139 }, { "epoch": 0.6095102573224238, "grad_norm": 1.6771415979459658, "learning_rate": 4.806192867724324e-05, "loss": 0.6072, "step": 5140 }, { "epoch": 0.6096288390845488, "grad_norm": 1.4395101602316298, "learning_rate": 4.806100190745265e-05, "loss": 0.5975, "step": 5141 }, { "epoch": 0.6097474208466738, "grad_norm": 1.2113407184956972, "learning_rate": 4.80600749250679e-05, "loss": 0.4883, "step": 5142 }, { "epoch": 0.6098660026087988, "grad_norm": 1.2680769663525484, "learning_rate": 4.805914773009753e-05, "loss": 0.5614, "step": 5143 }, { "epoch": 0.6099845843709237, "grad_norm": 1.3358051694707977, "learning_rate": 4.8058220322550094e-05, "loss": 0.4739, "step": 5144 }, { "epoch": 0.6101031661330487, "grad_norm": 1.5752538821877609, "learning_rate": 4.8057292702434147e-05, "loss": 0.5615, "step": 5145 }, { "epoch": 0.6102217478951737, "grad_norm": 1.2554048610845336, "learning_rate": 4.8056364869758236e-05, "loss": 0.5755, "step": 5146 }, { "epoch": 0.6103403296572987, "grad_norm": 1.5992112813743595, "learning_rate": 4.805543682453091e-05, "loss": 0.5736, "step": 5147 }, { "epoch": 0.6104589114194237, "grad_norm": 1.3776964998674834, "learning_rate": 4.805450856676074e-05, "loss": 0.5165, "step": 5148 }, { "epoch": 0.6105774931815486, "grad_norm": 1.8055149877469596, "learning_rate": 4.8053580096456265e-05, "loss": 0.6152, "step": 5149 }, { "epoch": 0.6106960749436736, "grad_norm": 1.3763332249235518, "learning_rate": 4.805265141362605e-05, "loss": 0.5127, "step": 5150 }, { "epoch": 0.6108146567057986, "grad_norm": 1.4485612680719788, "learning_rate": 4.8051722518278664e-05, "loss": 0.5133, "step": 5151 }, { "epoch": 0.6109332384679236, "grad_norm": 1.6429563401720764, "learning_rate": 4.805079341042266e-05, "loss": 0.5892, "step": 5152 }, { "epoch": 0.6110518202300487, "grad_norm": 1.3003741727557958, "learning_rate": 4.80498640900666e-05, "loss": 0.3985, "step": 5153 }, { "epoch": 0.6111704019921737, "grad_norm": 1.4314448943887212, "learning_rate": 4.804893455721907e-05, "loss": 0.6161, "step": 5154 }, { "epoch": 0.6112889837542986, "grad_norm": 1.4687152518408269, "learning_rate": 4.8048004811888624e-05, "loss": 0.5328, "step": 5155 }, { "epoch": 0.6114075655164236, "grad_norm": 1.5951057997564002, "learning_rate": 4.804707485408384e-05, "loss": 0.6635, "step": 5156 }, { "epoch": 0.6115261472785486, "grad_norm": 1.5184467909990438, "learning_rate": 4.804614468381329e-05, "loss": 0.7319, "step": 5157 }, { "epoch": 0.6116447290406736, "grad_norm": 1.2006157893956546, "learning_rate": 4.804521430108554e-05, "loss": 0.4571, "step": 5158 }, { "epoch": 0.6117633108027986, "grad_norm": 1.4682327984056156, "learning_rate": 4.804428370590918e-05, "loss": 0.4983, "step": 5159 }, { "epoch": 0.6118818925649235, "grad_norm": 1.4388321601640484, "learning_rate": 4.804335289829277e-05, "loss": 0.4815, "step": 5160 }, { "epoch": 0.6120004743270485, "grad_norm": 1.68211222151509, "learning_rate": 4.8042421878244915e-05, "loss": 0.5805, "step": 5161 }, { "epoch": 0.6121190560891735, "grad_norm": 1.8390806269762374, "learning_rate": 4.804149064577419e-05, "loss": 0.7274, "step": 5162 }, { "epoch": 0.6122376378512985, "grad_norm": 1.2740463950572787, "learning_rate": 4.804055920088917e-05, "loss": 0.5688, "step": 5163 }, { "epoch": 0.6123562196134235, "grad_norm": 1.564295333640591, "learning_rate": 4.8039627543598444e-05, "loss": 0.5073, "step": 5164 }, { "epoch": 0.6124748013755484, "grad_norm": 1.4010758169918685, "learning_rate": 4.803869567391062e-05, "loss": 0.5881, "step": 5165 }, { "epoch": 0.6125933831376734, "grad_norm": 1.7071053775573315, "learning_rate": 4.803776359183426e-05, "loss": 0.6914, "step": 5166 }, { "epoch": 0.6127119648997984, "grad_norm": 2.054779043526917, "learning_rate": 4.803683129737798e-05, "loss": 0.6838, "step": 5167 }, { "epoch": 0.6128305466619234, "grad_norm": 1.4190222045602578, "learning_rate": 4.803589879055036e-05, "loss": 0.6484, "step": 5168 }, { "epoch": 0.6129491284240484, "grad_norm": 1.3618126331403309, "learning_rate": 4.803496607136e-05, "loss": 0.495, "step": 5169 }, { "epoch": 0.6130677101861733, "grad_norm": 1.1509822957298423, "learning_rate": 4.8034033139815505e-05, "loss": 0.5288, "step": 5170 }, { "epoch": 0.6131862919482983, "grad_norm": 1.1875282847795874, "learning_rate": 4.8033099995925466e-05, "loss": 0.4257, "step": 5171 }, { "epoch": 0.6133048737104233, "grad_norm": 1.3974707128650843, "learning_rate": 4.803216663969849e-05, "loss": 0.6168, "step": 5172 }, { "epoch": 0.6134234554725483, "grad_norm": 1.3375323310327434, "learning_rate": 4.8031233071143184e-05, "loss": 0.4502, "step": 5173 }, { "epoch": 0.6135420372346733, "grad_norm": 1.4539077153064321, "learning_rate": 4.803029929026815e-05, "loss": 0.613, "step": 5174 }, { "epoch": 0.6136606189967982, "grad_norm": 1.3237519671060893, "learning_rate": 4.8029365297082e-05, "loss": 0.4789, "step": 5175 }, { "epoch": 0.6137792007589232, "grad_norm": 1.2961344081980335, "learning_rate": 4.802843109159334e-05, "loss": 0.5592, "step": 5176 }, { "epoch": 0.6138977825210482, "grad_norm": 1.2586755603118107, "learning_rate": 4.802749667381079e-05, "loss": 0.3885, "step": 5177 }, { "epoch": 0.6140163642831733, "grad_norm": 1.700934675478272, "learning_rate": 4.802656204374296e-05, "loss": 0.5543, "step": 5178 }, { "epoch": 0.6141349460452983, "grad_norm": 1.2312058048800438, "learning_rate": 4.802562720139846e-05, "loss": 0.4855, "step": 5179 }, { "epoch": 0.6142535278074233, "grad_norm": 1.2940083801497975, "learning_rate": 4.802469214678591e-05, "loss": 0.4376, "step": 5180 }, { "epoch": 0.6143721095695482, "grad_norm": 1.2518996598134267, "learning_rate": 4.802375687991394e-05, "loss": 0.3781, "step": 5181 }, { "epoch": 0.6144906913316732, "grad_norm": 1.3161944717652352, "learning_rate": 4.802282140079116e-05, "loss": 0.516, "step": 5182 }, { "epoch": 0.6146092730937982, "grad_norm": 1.2198231595549482, "learning_rate": 4.80218857094262e-05, "loss": 0.5449, "step": 5183 }, { "epoch": 0.6147278548559232, "grad_norm": 2.015855901107701, "learning_rate": 4.802094980582769e-05, "loss": 0.7629, "step": 5184 }, { "epoch": 0.6148464366180482, "grad_norm": 1.5393474726112903, "learning_rate": 4.802001369000425e-05, "loss": 0.4966, "step": 5185 }, { "epoch": 0.6149650183801731, "grad_norm": 2.144833383921898, "learning_rate": 4.8019077361964507e-05, "loss": 0.7549, "step": 5186 }, { "epoch": 0.6150836001422981, "grad_norm": 1.810907509388293, "learning_rate": 4.801814082171711e-05, "loss": 0.5941, "step": 5187 }, { "epoch": 0.6152021819044231, "grad_norm": 1.4412754687430986, "learning_rate": 4.801720406927067e-05, "loss": 0.5888, "step": 5188 }, { "epoch": 0.6153207636665481, "grad_norm": 1.518589572930433, "learning_rate": 4.801626710463384e-05, "loss": 0.5193, "step": 5189 }, { "epoch": 0.6154393454286731, "grad_norm": 1.63115004289834, "learning_rate": 4.8015329927815245e-05, "loss": 0.625, "step": 5190 }, { "epoch": 0.615557927190798, "grad_norm": 1.38023197790358, "learning_rate": 4.801439253882354e-05, "loss": 0.5813, "step": 5191 }, { "epoch": 0.615676508952923, "grad_norm": 1.5094069168227893, "learning_rate": 4.8013454937667354e-05, "loss": 0.6756, "step": 5192 }, { "epoch": 0.615795090715048, "grad_norm": 1.2429148414529627, "learning_rate": 4.801251712435534e-05, "loss": 0.418, "step": 5193 }, { "epoch": 0.615913672477173, "grad_norm": 1.4793392823217313, "learning_rate": 4.8011579098896133e-05, "loss": 0.6597, "step": 5194 }, { "epoch": 0.616032254239298, "grad_norm": 1.602949514458845, "learning_rate": 4.8010640861298385e-05, "loss": 0.6783, "step": 5195 }, { "epoch": 0.616150836001423, "grad_norm": 1.192940029114069, "learning_rate": 4.800970241157074e-05, "loss": 0.4723, "step": 5196 }, { "epoch": 0.6162694177635479, "grad_norm": 1.9375481194456907, "learning_rate": 4.8008763749721864e-05, "loss": 0.6617, "step": 5197 }, { "epoch": 0.6163879995256729, "grad_norm": 1.3285995089012554, "learning_rate": 4.80078248757604e-05, "loss": 0.4455, "step": 5198 }, { "epoch": 0.6165065812877979, "grad_norm": 1.8530405728766532, "learning_rate": 4.8006885789695e-05, "loss": 0.7117, "step": 5199 }, { "epoch": 0.6166251630499229, "grad_norm": 1.2311028382268137, "learning_rate": 4.800594649153433e-05, "loss": 0.4284, "step": 5200 }, { "epoch": 0.6167437448120479, "grad_norm": 1.453170854914148, "learning_rate": 4.800500698128705e-05, "loss": 0.5454, "step": 5201 }, { "epoch": 0.6168623265741728, "grad_norm": 1.2923622946712152, "learning_rate": 4.8004067258961804e-05, "loss": 0.586, "step": 5202 }, { "epoch": 0.6169809083362979, "grad_norm": 1.4827790428823606, "learning_rate": 4.800312732456728e-05, "loss": 0.4296, "step": 5203 }, { "epoch": 0.6170994900984229, "grad_norm": 1.1771660476046084, "learning_rate": 4.800218717811213e-05, "loss": 0.438, "step": 5204 }, { "epoch": 0.6172180718605479, "grad_norm": 1.3120265328464134, "learning_rate": 4.800124681960501e-05, "loss": 0.4572, "step": 5205 }, { "epoch": 0.6173366536226729, "grad_norm": 1.4274768825943103, "learning_rate": 4.800030624905461e-05, "loss": 0.4599, "step": 5206 }, { "epoch": 0.6174552353847979, "grad_norm": 1.5852538817939432, "learning_rate": 4.799936546646958e-05, "loss": 0.7439, "step": 5207 }, { "epoch": 0.6175738171469228, "grad_norm": 1.6253354775753432, "learning_rate": 4.799842447185862e-05, "loss": 0.5808, "step": 5208 }, { "epoch": 0.6176923989090478, "grad_norm": 1.491964456725777, "learning_rate": 4.799748326523037e-05, "loss": 0.462, "step": 5209 }, { "epoch": 0.6178109806711728, "grad_norm": 1.7656878350355303, "learning_rate": 4.799654184659353e-05, "loss": 0.6902, "step": 5210 }, { "epoch": 0.6179295624332978, "grad_norm": 1.6540136315847704, "learning_rate": 4.799560021595679e-05, "loss": 0.5757, "step": 5211 }, { "epoch": 0.6180481441954228, "grad_norm": 1.4330364142853929, "learning_rate": 4.7994658373328804e-05, "loss": 0.6828, "step": 5212 }, { "epoch": 0.6181667259575477, "grad_norm": 1.188715352179939, "learning_rate": 4.799371631871827e-05, "loss": 0.3853, "step": 5213 }, { "epoch": 0.6182853077196727, "grad_norm": 1.5531137280959875, "learning_rate": 4.799277405213386e-05, "loss": 0.598, "step": 5214 }, { "epoch": 0.6184038894817977, "grad_norm": 1.3596991854182263, "learning_rate": 4.799183157358428e-05, "loss": 0.4502, "step": 5215 }, { "epoch": 0.6185224712439227, "grad_norm": 1.2682220929396502, "learning_rate": 4.79908888830782e-05, "loss": 0.3503, "step": 5216 }, { "epoch": 0.6186410530060477, "grad_norm": 1.6152658151749104, "learning_rate": 4.7989945980624316e-05, "loss": 0.6165, "step": 5217 }, { "epoch": 0.6187596347681726, "grad_norm": 1.4928378141660166, "learning_rate": 4.7989002866231325e-05, "loss": 0.6361, "step": 5218 }, { "epoch": 0.6188782165302976, "grad_norm": 2.116836739659109, "learning_rate": 4.7988059539907925e-05, "loss": 0.6442, "step": 5219 }, { "epoch": 0.6189967982924226, "grad_norm": 1.5715249091356747, "learning_rate": 4.7987116001662804e-05, "loss": 0.6779, "step": 5220 }, { "epoch": 0.6191153800545476, "grad_norm": 1.3270125809588837, "learning_rate": 4.798617225150465e-05, "loss": 0.4091, "step": 5221 }, { "epoch": 0.6192339618166726, "grad_norm": 1.1953556167546557, "learning_rate": 4.798522828944218e-05, "loss": 0.3607, "step": 5222 }, { "epoch": 0.6193525435787975, "grad_norm": 1.3274089814673415, "learning_rate": 4.79842841154841e-05, "loss": 0.4251, "step": 5223 }, { "epoch": 0.6194711253409225, "grad_norm": 1.1965923880788638, "learning_rate": 4.79833397296391e-05, "loss": 0.3778, "step": 5224 }, { "epoch": 0.6195897071030475, "grad_norm": 1.655606942835597, "learning_rate": 4.798239513191589e-05, "loss": 0.5837, "step": 5225 }, { "epoch": 0.6197082888651725, "grad_norm": 1.367628512959878, "learning_rate": 4.798145032232319e-05, "loss": 0.4299, "step": 5226 }, { "epoch": 0.6198268706272976, "grad_norm": 1.7302250399300894, "learning_rate": 4.798050530086969e-05, "loss": 0.5681, "step": 5227 }, { "epoch": 0.6199454523894226, "grad_norm": 1.3939233160302065, "learning_rate": 4.797956006756411e-05, "loss": 0.4819, "step": 5228 }, { "epoch": 0.6200640341515475, "grad_norm": 1.3601445546394562, "learning_rate": 4.797861462241517e-05, "loss": 0.4351, "step": 5229 }, { "epoch": 0.6201826159136725, "grad_norm": 1.2433793157266273, "learning_rate": 4.7977668965431584e-05, "loss": 0.4173, "step": 5230 }, { "epoch": 0.6203011976757975, "grad_norm": 1.6020965579517303, "learning_rate": 4.797672309662206e-05, "loss": 0.5502, "step": 5231 }, { "epoch": 0.6204197794379225, "grad_norm": 1.6778860197342216, "learning_rate": 4.797577701599533e-05, "loss": 0.6392, "step": 5232 }, { "epoch": 0.6205383612000475, "grad_norm": 1.077994781475414, "learning_rate": 4.797483072356011e-05, "loss": 0.3416, "step": 5233 }, { "epoch": 0.6206569429621724, "grad_norm": 1.9310318167058182, "learning_rate": 4.797388421932513e-05, "loss": 0.6625, "step": 5234 }, { "epoch": 0.6207755247242974, "grad_norm": 1.7579325934771286, "learning_rate": 4.79729375032991e-05, "loss": 0.5411, "step": 5235 }, { "epoch": 0.6208941064864224, "grad_norm": 1.60394214135641, "learning_rate": 4.797199057549076e-05, "loss": 0.6265, "step": 5236 }, { "epoch": 0.6210126882485474, "grad_norm": 1.5044085247749917, "learning_rate": 4.797104343590883e-05, "loss": 0.4975, "step": 5237 }, { "epoch": 0.6211312700106724, "grad_norm": 2.4618772764711974, "learning_rate": 4.797009608456206e-05, "loss": 0.5313, "step": 5238 }, { "epoch": 0.6212498517727973, "grad_norm": 1.9685998396889728, "learning_rate": 4.796914852145917e-05, "loss": 0.6543, "step": 5239 }, { "epoch": 0.6213684335349223, "grad_norm": 2.123503393469146, "learning_rate": 4.7968200746608896e-05, "loss": 0.8104, "step": 5240 }, { "epoch": 0.6214870152970473, "grad_norm": 1.5245313250283807, "learning_rate": 4.7967252760019976e-05, "loss": 0.4941, "step": 5241 }, { "epoch": 0.6216055970591723, "grad_norm": 1.419204909119834, "learning_rate": 4.796630456170115e-05, "loss": 0.4651, "step": 5242 }, { "epoch": 0.6217241788212973, "grad_norm": 1.5637572136428668, "learning_rate": 4.796535615166116e-05, "loss": 0.7094, "step": 5243 }, { "epoch": 0.6218427605834222, "grad_norm": 1.3782029370496744, "learning_rate": 4.796440752990875e-05, "loss": 0.5342, "step": 5244 }, { "epoch": 0.6219613423455472, "grad_norm": 1.3345791144494041, "learning_rate": 4.796345869645266e-05, "loss": 0.4697, "step": 5245 }, { "epoch": 0.6220799241076722, "grad_norm": 1.4478013872788873, "learning_rate": 4.796250965130164e-05, "loss": 0.5671, "step": 5246 }, { "epoch": 0.6221985058697972, "grad_norm": 1.4113780002777003, "learning_rate": 4.7961560394464445e-05, "loss": 0.7073, "step": 5247 }, { "epoch": 0.6223170876319222, "grad_norm": 1.2016300524607295, "learning_rate": 4.796061092594982e-05, "loss": 0.3753, "step": 5248 }, { "epoch": 0.6224356693940472, "grad_norm": 1.1606463363521238, "learning_rate": 4.795966124576651e-05, "loss": 0.4465, "step": 5249 }, { "epoch": 0.6225542511561721, "grad_norm": 1.471461781009048, "learning_rate": 4.795871135392329e-05, "loss": 0.5272, "step": 5250 }, { "epoch": 0.6226728329182971, "grad_norm": 1.2371438287816041, "learning_rate": 4.795776125042889e-05, "loss": 0.5247, "step": 5251 }, { "epoch": 0.6227914146804222, "grad_norm": 0.9522004131849651, "learning_rate": 4.7956810935292095e-05, "loss": 0.3285, "step": 5252 }, { "epoch": 0.6229099964425472, "grad_norm": 1.6152034614240498, "learning_rate": 4.795586040852165e-05, "loss": 0.7852, "step": 5253 }, { "epoch": 0.6230285782046722, "grad_norm": 1.3824032134032107, "learning_rate": 4.7954909670126323e-05, "loss": 0.5268, "step": 5254 }, { "epoch": 0.6231471599667971, "grad_norm": 1.746365282396822, "learning_rate": 4.7953958720114886e-05, "loss": 0.4973, "step": 5255 }, { "epoch": 0.6232657417289221, "grad_norm": 1.3657154891230598, "learning_rate": 4.7953007558496086e-05, "loss": 0.5596, "step": 5256 }, { "epoch": 0.6233843234910471, "grad_norm": 1.5449486685269789, "learning_rate": 4.795205618527871e-05, "loss": 0.3757, "step": 5257 }, { "epoch": 0.6235029052531721, "grad_norm": 2.2666428743029092, "learning_rate": 4.795110460047152e-05, "loss": 0.7168, "step": 5258 }, { "epoch": 0.6236214870152971, "grad_norm": 1.3079556025725079, "learning_rate": 4.795015280408329e-05, "loss": 0.4163, "step": 5259 }, { "epoch": 0.623740068777422, "grad_norm": 1.3214027249906304, "learning_rate": 4.7949200796122796e-05, "loss": 0.4414, "step": 5260 }, { "epoch": 0.623858650539547, "grad_norm": 1.6522168254567975, "learning_rate": 4.794824857659881e-05, "loss": 0.4816, "step": 5261 }, { "epoch": 0.623977232301672, "grad_norm": 1.603538000697992, "learning_rate": 4.7947296145520115e-05, "loss": 0.7394, "step": 5262 }, { "epoch": 0.624095814063797, "grad_norm": 1.7949231221731508, "learning_rate": 4.7946343502895485e-05, "loss": 0.6377, "step": 5263 }, { "epoch": 0.624214395825922, "grad_norm": 1.7999610725121924, "learning_rate": 4.7945390648733714e-05, "loss": 0.5805, "step": 5264 }, { "epoch": 0.624332977588047, "grad_norm": 1.4886701509539344, "learning_rate": 4.794443758304358e-05, "loss": 0.5166, "step": 5265 }, { "epoch": 0.6244515593501719, "grad_norm": 1.6024398964964393, "learning_rate": 4.794348430583386e-05, "loss": 0.5625, "step": 5266 }, { "epoch": 0.6245701411122969, "grad_norm": 1.3267546946989428, "learning_rate": 4.794253081711335e-05, "loss": 0.41, "step": 5267 }, { "epoch": 0.6246887228744219, "grad_norm": 1.354950208517627, "learning_rate": 4.7941577116890844e-05, "loss": 0.4797, "step": 5268 }, { "epoch": 0.6248073046365469, "grad_norm": 1.388058405252482, "learning_rate": 4.7940623205175127e-05, "loss": 0.5673, "step": 5269 }, { "epoch": 0.6249258863986719, "grad_norm": 1.5142074365401943, "learning_rate": 4.7939669081974994e-05, "loss": 0.6708, "step": 5270 }, { "epoch": 0.6250444681607968, "grad_norm": 1.517379599536678, "learning_rate": 4.793871474729925e-05, "loss": 0.5848, "step": 5271 }, { "epoch": 0.6251630499229218, "grad_norm": 2.173920314289835, "learning_rate": 4.7937760201156675e-05, "loss": 0.8125, "step": 5272 }, { "epoch": 0.6252816316850468, "grad_norm": 1.6303194616283676, "learning_rate": 4.7936805443556086e-05, "loss": 0.6143, "step": 5273 }, { "epoch": 0.6254002134471718, "grad_norm": 1.5740363650831375, "learning_rate": 4.7935850474506284e-05, "loss": 0.52, "step": 5274 }, { "epoch": 0.6255187952092968, "grad_norm": 1.7926411918258756, "learning_rate": 4.793489529401606e-05, "loss": 0.6414, "step": 5275 }, { "epoch": 0.6256373769714217, "grad_norm": 1.4016383586236063, "learning_rate": 4.793393990209422e-05, "loss": 0.5189, "step": 5276 }, { "epoch": 0.6257559587335468, "grad_norm": 1.4494240614145348, "learning_rate": 4.7932984298749584e-05, "loss": 0.4827, "step": 5277 }, { "epoch": 0.6258745404956718, "grad_norm": 1.3389014150554297, "learning_rate": 4.793202848399095e-05, "loss": 0.5063, "step": 5278 }, { "epoch": 0.6259931222577968, "grad_norm": 1.325004279898177, "learning_rate": 4.7931072457827145e-05, "loss": 0.4938, "step": 5279 }, { "epoch": 0.6261117040199218, "grad_norm": 1.3346700428632767, "learning_rate": 4.793011622026696e-05, "loss": 0.4091, "step": 5280 }, { "epoch": 0.6262302857820468, "grad_norm": 1.5350788608674566, "learning_rate": 4.792915977131923e-05, "loss": 0.5043, "step": 5281 }, { "epoch": 0.6263488675441717, "grad_norm": 1.2796133345645224, "learning_rate": 4.7928203110992766e-05, "loss": 0.4634, "step": 5282 }, { "epoch": 0.6264674493062967, "grad_norm": 1.2956359825959152, "learning_rate": 4.792724623929637e-05, "loss": 0.4306, "step": 5283 }, { "epoch": 0.6265860310684217, "grad_norm": 1.5706668820443535, "learning_rate": 4.79262891562389e-05, "loss": 0.5849, "step": 5284 }, { "epoch": 0.6267046128305467, "grad_norm": 1.609065747407046, "learning_rate": 4.792533186182915e-05, "loss": 0.6741, "step": 5285 }, { "epoch": 0.6268231945926717, "grad_norm": 1.4213773612693494, "learning_rate": 4.7924374356075955e-05, "loss": 0.5323, "step": 5286 }, { "epoch": 0.6269417763547966, "grad_norm": 1.5844943953545954, "learning_rate": 4.792341663898813e-05, "loss": 0.6792, "step": 5287 }, { "epoch": 0.6270603581169216, "grad_norm": 1.3695743725875875, "learning_rate": 4.792245871057452e-05, "loss": 0.5285, "step": 5288 }, { "epoch": 0.6271789398790466, "grad_norm": 1.5135072876060478, "learning_rate": 4.7921500570843955e-05, "loss": 0.6335, "step": 5289 }, { "epoch": 0.6272975216411716, "grad_norm": 1.2789365778974466, "learning_rate": 4.792054221980525e-05, "loss": 0.5112, "step": 5290 }, { "epoch": 0.6274161034032966, "grad_norm": 1.2680910751092334, "learning_rate": 4.791958365746727e-05, "loss": 0.4354, "step": 5291 }, { "epoch": 0.6275346851654215, "grad_norm": 1.5424033643003792, "learning_rate": 4.7918624883838824e-05, "loss": 0.5543, "step": 5292 }, { "epoch": 0.6276532669275465, "grad_norm": 1.515140621517723, "learning_rate": 4.791766589892877e-05, "loss": 0.5969, "step": 5293 }, { "epoch": 0.6277718486896715, "grad_norm": 1.3953447862366992, "learning_rate": 4.791670670274593e-05, "loss": 0.5517, "step": 5294 }, { "epoch": 0.6278904304517965, "grad_norm": 1.7660143833776598, "learning_rate": 4.791574729529916e-05, "loss": 0.691, "step": 5295 }, { "epoch": 0.6280090122139215, "grad_norm": 1.5577937516447835, "learning_rate": 4.7914787676597296e-05, "loss": 0.5777, "step": 5296 }, { "epoch": 0.6281275939760464, "grad_norm": 1.3602367688853483, "learning_rate": 4.7913827846649196e-05, "loss": 0.489, "step": 5297 }, { "epoch": 0.6282461757381714, "grad_norm": 1.588140437823987, "learning_rate": 4.7912867805463704e-05, "loss": 0.5147, "step": 5298 }, { "epoch": 0.6283647575002964, "grad_norm": 1.2285836215654025, "learning_rate": 4.791190755304966e-05, "loss": 0.399, "step": 5299 }, { "epoch": 0.6284833392624214, "grad_norm": 1.3350271505634235, "learning_rate": 4.791094708941594e-05, "loss": 0.4623, "step": 5300 }, { "epoch": 0.6286019210245464, "grad_norm": 1.4538059357218496, "learning_rate": 4.790998641457137e-05, "loss": 0.5195, "step": 5301 }, { "epoch": 0.6287205027866715, "grad_norm": 1.291021045182473, "learning_rate": 4.790902552852482e-05, "loss": 0.402, "step": 5302 }, { "epoch": 0.6288390845487964, "grad_norm": 1.5663951260754305, "learning_rate": 4.7908064431285146e-05, "loss": 0.545, "step": 5303 }, { "epoch": 0.6289576663109214, "grad_norm": 1.2711913373147226, "learning_rate": 4.7907103122861217e-05, "loss": 0.4595, "step": 5304 }, { "epoch": 0.6290762480730464, "grad_norm": 1.733945287599582, "learning_rate": 4.7906141603261884e-05, "loss": 0.665, "step": 5305 }, { "epoch": 0.6291948298351714, "grad_norm": 1.2425178389956333, "learning_rate": 4.790517987249602e-05, "loss": 0.3812, "step": 5306 }, { "epoch": 0.6293134115972964, "grad_norm": 1.3721106080274412, "learning_rate": 4.7904217930572474e-05, "loss": 0.4459, "step": 5307 }, { "epoch": 0.6294319933594213, "grad_norm": 1.2730843573854453, "learning_rate": 4.790325577750013e-05, "loss": 0.436, "step": 5308 }, { "epoch": 0.6295505751215463, "grad_norm": 1.5932038503093149, "learning_rate": 4.790229341328786e-05, "loss": 0.5711, "step": 5309 }, { "epoch": 0.6296691568836713, "grad_norm": 1.6000006533912348, "learning_rate": 4.7901330837944525e-05, "loss": 0.4992, "step": 5310 }, { "epoch": 0.6297877386457963, "grad_norm": 1.5628922023545795, "learning_rate": 4.7900368051479e-05, "loss": 0.4981, "step": 5311 }, { "epoch": 0.6299063204079213, "grad_norm": 1.301749285990473, "learning_rate": 4.7899405053900174e-05, "loss": 0.4748, "step": 5312 }, { "epoch": 0.6300249021700463, "grad_norm": 1.5502551861093352, "learning_rate": 4.789844184521691e-05, "loss": 0.5546, "step": 5313 }, { "epoch": 0.6301434839321712, "grad_norm": 1.616725992364907, "learning_rate": 4.7897478425438086e-05, "loss": 0.6546, "step": 5314 }, { "epoch": 0.6302620656942962, "grad_norm": 1.3352346431213222, "learning_rate": 4.7896514794572595e-05, "loss": 0.5913, "step": 5315 }, { "epoch": 0.6303806474564212, "grad_norm": 1.5155149505113874, "learning_rate": 4.789555095262931e-05, "loss": 0.6493, "step": 5316 }, { "epoch": 0.6304992292185462, "grad_norm": 1.6332759299045392, "learning_rate": 4.7894586899617134e-05, "loss": 0.7251, "step": 5317 }, { "epoch": 0.6306178109806712, "grad_norm": 1.9917679330040885, "learning_rate": 4.789362263554493e-05, "loss": 0.6212, "step": 5318 }, { "epoch": 0.6307363927427961, "grad_norm": 1.8216377879125303, "learning_rate": 4.78926581604216e-05, "loss": 0.6579, "step": 5319 }, { "epoch": 0.6308549745049211, "grad_norm": 1.2042622774016865, "learning_rate": 4.789169347425604e-05, "loss": 0.3977, "step": 5320 }, { "epoch": 0.6309735562670461, "grad_norm": 1.415696365251393, "learning_rate": 4.789072857705713e-05, "loss": 0.567, "step": 5321 }, { "epoch": 0.6310921380291711, "grad_norm": 1.2970759302848736, "learning_rate": 4.788976346883378e-05, "loss": 0.4339, "step": 5322 }, { "epoch": 0.6312107197912961, "grad_norm": 1.4556488312873683, "learning_rate": 4.7888798149594884e-05, "loss": 0.5294, "step": 5323 }, { "epoch": 0.631329301553421, "grad_norm": 1.4087624411583701, "learning_rate": 4.7887832619349326e-05, "loss": 0.5091, "step": 5324 }, { "epoch": 0.631447883315546, "grad_norm": 1.635384123807785, "learning_rate": 4.788686687810603e-05, "loss": 0.4875, "step": 5325 }, { "epoch": 0.631566465077671, "grad_norm": 1.3787383970469083, "learning_rate": 4.7885900925873874e-05, "loss": 0.5108, "step": 5326 }, { "epoch": 0.6316850468397961, "grad_norm": 1.4586136524507132, "learning_rate": 4.788493476266179e-05, "loss": 0.507, "step": 5327 }, { "epoch": 0.6318036286019211, "grad_norm": 1.5983837928584794, "learning_rate": 4.788396838847866e-05, "loss": 0.7177, "step": 5328 }, { "epoch": 0.631922210364046, "grad_norm": 1.5333005814466545, "learning_rate": 4.7883001803333404e-05, "loss": 0.2756, "step": 5329 }, { "epoch": 0.632040792126171, "grad_norm": 1.29924547790288, "learning_rate": 4.788203500723494e-05, "loss": 0.4193, "step": 5330 }, { "epoch": 0.632159373888296, "grad_norm": 1.3015094821585442, "learning_rate": 4.7881068000192165e-05, "loss": 0.4286, "step": 5331 }, { "epoch": 0.632277955650421, "grad_norm": 1.4332629460486779, "learning_rate": 4.788010078221401e-05, "loss": 0.623, "step": 5332 }, { "epoch": 0.632396537412546, "grad_norm": 1.495691841870641, "learning_rate": 4.787913335330938e-05, "loss": 0.4498, "step": 5333 }, { "epoch": 0.632515119174671, "grad_norm": 2.1246004742534104, "learning_rate": 4.787816571348719e-05, "loss": 0.7647, "step": 5334 }, { "epoch": 0.6326337009367959, "grad_norm": 2.064230678139192, "learning_rate": 4.7877197862756374e-05, "loss": 0.713, "step": 5335 }, { "epoch": 0.6327522826989209, "grad_norm": 1.7724726253011236, "learning_rate": 4.787622980112585e-05, "loss": 0.6464, "step": 5336 }, { "epoch": 0.6328708644610459, "grad_norm": 1.9981101405751718, "learning_rate": 4.787526152860453e-05, "loss": 0.8288, "step": 5337 }, { "epoch": 0.6329894462231709, "grad_norm": 1.5545201705295337, "learning_rate": 4.7874293045201355e-05, "loss": 0.5627, "step": 5338 }, { "epoch": 0.6331080279852959, "grad_norm": 1.3343313387755458, "learning_rate": 4.7873324350925256e-05, "loss": 0.4463, "step": 5339 }, { "epoch": 0.6332266097474208, "grad_norm": 1.7357327950331438, "learning_rate": 4.787235544578514e-05, "loss": 0.8354, "step": 5340 }, { "epoch": 0.6333451915095458, "grad_norm": 1.3043689970064647, "learning_rate": 4.7871386329789966e-05, "loss": 0.4894, "step": 5341 }, { "epoch": 0.6334637732716708, "grad_norm": 1.3328476551613646, "learning_rate": 4.787041700294866e-05, "loss": 0.3624, "step": 5342 }, { "epoch": 0.6335823550337958, "grad_norm": 1.6437652720914036, "learning_rate": 4.7869447465270144e-05, "loss": 0.5244, "step": 5343 }, { "epoch": 0.6337009367959208, "grad_norm": 1.3153442402192532, "learning_rate": 4.7868477716763374e-05, "loss": 0.4051, "step": 5344 }, { "epoch": 0.6338195185580457, "grad_norm": 1.4523196332534853, "learning_rate": 4.7867507757437286e-05, "loss": 0.5913, "step": 5345 }, { "epoch": 0.6339381003201707, "grad_norm": 1.20628361711499, "learning_rate": 4.7866537587300805e-05, "loss": 0.4049, "step": 5346 }, { "epoch": 0.6340566820822957, "grad_norm": 1.726914157885531, "learning_rate": 4.78655672063629e-05, "loss": 0.5617, "step": 5347 }, { "epoch": 0.6341752638444207, "grad_norm": 1.1713739385238473, "learning_rate": 4.786459661463251e-05, "loss": 0.4262, "step": 5348 }, { "epoch": 0.6342938456065457, "grad_norm": 1.3258495618267032, "learning_rate": 4.786362581211856e-05, "loss": 0.5309, "step": 5349 }, { "epoch": 0.6344124273686707, "grad_norm": 1.8039607954533463, "learning_rate": 4.786265479883003e-05, "loss": 0.508, "step": 5350 }, { "epoch": 0.6345310091307956, "grad_norm": 1.1221650876828526, "learning_rate": 4.786168357477586e-05, "loss": 0.3208, "step": 5351 }, { "epoch": 0.6346495908929207, "grad_norm": 1.167831826222556, "learning_rate": 4.786071213996499e-05, "loss": 0.3924, "step": 5352 }, { "epoch": 0.6347681726550457, "grad_norm": 1.6008869277754967, "learning_rate": 4.7859740494406404e-05, "loss": 0.4953, "step": 5353 }, { "epoch": 0.6348867544171707, "grad_norm": 1.5108926279564776, "learning_rate": 4.785876863810903e-05, "loss": 0.4596, "step": 5354 }, { "epoch": 0.6350053361792957, "grad_norm": 1.6840400245069476, "learning_rate": 4.785779657108185e-05, "loss": 0.6373, "step": 5355 }, { "epoch": 0.6351239179414206, "grad_norm": 1.4753817736596173, "learning_rate": 4.78568242933338e-05, "loss": 0.6196, "step": 5356 }, { "epoch": 0.6352424997035456, "grad_norm": 1.6554269218594935, "learning_rate": 4.785585180487388e-05, "loss": 0.6495, "step": 5357 }, { "epoch": 0.6353610814656706, "grad_norm": 2.040615283682844, "learning_rate": 4.785487910571102e-05, "loss": 0.6354, "step": 5358 }, { "epoch": 0.6354796632277956, "grad_norm": 1.7051311838868, "learning_rate": 4.7853906195854206e-05, "loss": 0.551, "step": 5359 }, { "epoch": 0.6355982449899206, "grad_norm": 1.5037967534210024, "learning_rate": 4.7852933075312404e-05, "loss": 0.4127, "step": 5360 }, { "epoch": 0.6357168267520455, "grad_norm": 1.5210801659146729, "learning_rate": 4.7851959744094575e-05, "loss": 0.4595, "step": 5361 }, { "epoch": 0.6358354085141705, "grad_norm": 1.4883497493153646, "learning_rate": 4.7850986202209704e-05, "loss": 0.5642, "step": 5362 }, { "epoch": 0.6359539902762955, "grad_norm": 1.4567994230823906, "learning_rate": 4.7850012449666756e-05, "loss": 0.4535, "step": 5363 }, { "epoch": 0.6360725720384205, "grad_norm": 1.251316372445524, "learning_rate": 4.784903848647473e-05, "loss": 0.4453, "step": 5364 }, { "epoch": 0.6361911538005455, "grad_norm": 1.4596461663833633, "learning_rate": 4.784806431264258e-05, "loss": 0.6271, "step": 5365 }, { "epoch": 0.6363097355626705, "grad_norm": 1.2123895120579264, "learning_rate": 4.784708992817929e-05, "loss": 0.3851, "step": 5366 }, { "epoch": 0.6364283173247954, "grad_norm": 1.5623063767197949, "learning_rate": 4.7846115333093853e-05, "loss": 0.4981, "step": 5367 }, { "epoch": 0.6365468990869204, "grad_norm": 1.1546380231793918, "learning_rate": 4.784514052739525e-05, "loss": 0.431, "step": 5368 }, { "epoch": 0.6366654808490454, "grad_norm": 1.2563692084485647, "learning_rate": 4.7844165511092466e-05, "loss": 0.4069, "step": 5369 }, { "epoch": 0.6367840626111704, "grad_norm": 1.7000793848217148, "learning_rate": 4.7843190284194484e-05, "loss": 0.7841, "step": 5370 }, { "epoch": 0.6369026443732954, "grad_norm": 1.3885910695022639, "learning_rate": 4.78422148467103e-05, "loss": 0.4804, "step": 5371 }, { "epoch": 0.6370212261354203, "grad_norm": 1.2620339626398784, "learning_rate": 4.784123919864891e-05, "loss": 0.508, "step": 5372 }, { "epoch": 0.6371398078975453, "grad_norm": 1.3536776450119712, "learning_rate": 4.78402633400193e-05, "loss": 0.5372, "step": 5373 }, { "epoch": 0.6372583896596703, "grad_norm": 1.4834589181078492, "learning_rate": 4.783928727083048e-05, "loss": 0.585, "step": 5374 }, { "epoch": 0.6373769714217953, "grad_norm": 1.3341343414257616, "learning_rate": 4.783831099109143e-05, "loss": 0.3697, "step": 5375 }, { "epoch": 0.6374955531839203, "grad_norm": 1.1878416365902786, "learning_rate": 4.783733450081116e-05, "loss": 0.5103, "step": 5376 }, { "epoch": 0.6376141349460454, "grad_norm": 1.2858984708942673, "learning_rate": 4.783635779999867e-05, "loss": 0.4269, "step": 5377 }, { "epoch": 0.6377327167081703, "grad_norm": 1.3969941516227795, "learning_rate": 4.7835380888662975e-05, "loss": 0.4547, "step": 5378 }, { "epoch": 0.6378512984702953, "grad_norm": 1.506017036790242, "learning_rate": 4.783440376681306e-05, "loss": 0.5481, "step": 5379 }, { "epoch": 0.6379698802324203, "grad_norm": 1.0497963886332582, "learning_rate": 4.783342643445794e-05, "loss": 0.3821, "step": 5380 }, { "epoch": 0.6380884619945453, "grad_norm": 1.3461982766222045, "learning_rate": 4.783244889160664e-05, "loss": 0.4196, "step": 5381 }, { "epoch": 0.6382070437566703, "grad_norm": 1.4151672866323572, "learning_rate": 4.783147113826815e-05, "loss": 0.5746, "step": 5382 }, { "epoch": 0.6383256255187952, "grad_norm": 2.197494737440352, "learning_rate": 4.78304931744515e-05, "loss": 0.9454, "step": 5383 }, { "epoch": 0.6384442072809202, "grad_norm": 1.4815301123104077, "learning_rate": 4.78295150001657e-05, "loss": 0.4622, "step": 5384 }, { "epoch": 0.6385627890430452, "grad_norm": 1.1691218925618796, "learning_rate": 4.7828536615419767e-05, "loss": 0.313, "step": 5385 }, { "epoch": 0.6386813708051702, "grad_norm": 1.442858975514327, "learning_rate": 4.7827558020222716e-05, "loss": 0.4164, "step": 5386 }, { "epoch": 0.6387999525672952, "grad_norm": 1.1769087341457927, "learning_rate": 4.7826579214583576e-05, "loss": 0.4112, "step": 5387 }, { "epoch": 0.6389185343294201, "grad_norm": 1.687819661768108, "learning_rate": 4.782560019851137e-05, "loss": 0.5531, "step": 5388 }, { "epoch": 0.6390371160915451, "grad_norm": 1.6280674560497048, "learning_rate": 4.782462097201512e-05, "loss": 0.5681, "step": 5389 }, { "epoch": 0.6391556978536701, "grad_norm": 1.604990187828565, "learning_rate": 4.7823641535103855e-05, "loss": 0.4568, "step": 5390 }, { "epoch": 0.6392742796157951, "grad_norm": 2.2182105996576196, "learning_rate": 4.78226618877866e-05, "loss": 0.9161, "step": 5391 }, { "epoch": 0.6393928613779201, "grad_norm": 1.6357106831984065, "learning_rate": 4.782168203007239e-05, "loss": 0.6083, "step": 5392 }, { "epoch": 0.639511443140045, "grad_norm": 2.1762357100866385, "learning_rate": 4.782070196197026e-05, "loss": 0.4393, "step": 5393 }, { "epoch": 0.63963002490217, "grad_norm": 1.3596593328892868, "learning_rate": 4.781972168348924e-05, "loss": 0.4523, "step": 5394 }, { "epoch": 0.639748606664295, "grad_norm": 1.6920758061302337, "learning_rate": 4.7818741194638375e-05, "loss": 0.6521, "step": 5395 }, { "epoch": 0.63986718842642, "grad_norm": 1.4814534633552972, "learning_rate": 4.781776049542669e-05, "loss": 0.5402, "step": 5396 }, { "epoch": 0.639985770188545, "grad_norm": 1.8233535182112188, "learning_rate": 4.7816779585863235e-05, "loss": 0.7446, "step": 5397 }, { "epoch": 0.64010435195067, "grad_norm": 1.111412618242199, "learning_rate": 4.781579846595705e-05, "loss": 0.365, "step": 5398 }, { "epoch": 0.6402229337127949, "grad_norm": 1.4031088236797145, "learning_rate": 4.781481713571719e-05, "loss": 0.4705, "step": 5399 }, { "epoch": 0.6403415154749199, "grad_norm": 1.767670869916356, "learning_rate": 4.7813835595152686e-05, "loss": 0.6815, "step": 5400 }, { "epoch": 0.6404600972370449, "grad_norm": 1.085042201217061, "learning_rate": 4.78128538442726e-05, "loss": 0.3866, "step": 5401 }, { "epoch": 0.64057867899917, "grad_norm": 1.6226055255289282, "learning_rate": 4.781187188308597e-05, "loss": 0.8709, "step": 5402 }, { "epoch": 0.640697260761295, "grad_norm": 1.2127150714557444, "learning_rate": 4.781088971160186e-05, "loss": 0.3767, "step": 5403 }, { "epoch": 0.6408158425234199, "grad_norm": 1.391408584789372, "learning_rate": 4.780990732982932e-05, "loss": 0.6142, "step": 5404 }, { "epoch": 0.6409344242855449, "grad_norm": 1.305139199911052, "learning_rate": 4.780892473777741e-05, "loss": 0.3902, "step": 5405 }, { "epoch": 0.6410530060476699, "grad_norm": 1.4319342781171889, "learning_rate": 4.780794193545517e-05, "loss": 0.3422, "step": 5406 }, { "epoch": 0.6411715878097949, "grad_norm": 1.279484739567199, "learning_rate": 4.7806958922871686e-05, "loss": 0.4586, "step": 5407 }, { "epoch": 0.6412901695719199, "grad_norm": 1.1384447848896209, "learning_rate": 4.7805975700036005e-05, "loss": 0.4097, "step": 5408 }, { "epoch": 0.6414087513340448, "grad_norm": 1.4458636367381004, "learning_rate": 4.780499226695719e-05, "loss": 0.597, "step": 5409 }, { "epoch": 0.6415273330961698, "grad_norm": 1.7424479547365863, "learning_rate": 4.780400862364432e-05, "loss": 0.5852, "step": 5410 }, { "epoch": 0.6416459148582948, "grad_norm": 1.3822484462263114, "learning_rate": 4.7803024770106454e-05, "loss": 0.37, "step": 5411 }, { "epoch": 0.6417644966204198, "grad_norm": 1.4809781945287175, "learning_rate": 4.780204070635266e-05, "loss": 0.5341, "step": 5412 }, { "epoch": 0.6418830783825448, "grad_norm": 1.3051568073139832, "learning_rate": 4.780105643239201e-05, "loss": 0.4986, "step": 5413 }, { "epoch": 0.6420016601446698, "grad_norm": 1.3572770339767615, "learning_rate": 4.780007194823358e-05, "loss": 0.3548, "step": 5414 }, { "epoch": 0.6421202419067947, "grad_norm": 1.3235423696564603, "learning_rate": 4.779908725388645e-05, "loss": 0.3902, "step": 5415 }, { "epoch": 0.6422388236689197, "grad_norm": 1.7832450959702961, "learning_rate": 4.7798102349359686e-05, "loss": 0.4074, "step": 5416 }, { "epoch": 0.6423574054310447, "grad_norm": 1.5770425977734124, "learning_rate": 4.7797117234662384e-05, "loss": 0.5872, "step": 5417 }, { "epoch": 0.6424759871931697, "grad_norm": 1.4405424789587729, "learning_rate": 4.779613190980362e-05, "loss": 0.4384, "step": 5418 }, { "epoch": 0.6425945689552947, "grad_norm": 1.6766428881256972, "learning_rate": 4.779514637479247e-05, "loss": 0.5144, "step": 5419 }, { "epoch": 0.6427131507174196, "grad_norm": 1.2938108995565323, "learning_rate": 4.779416062963801e-05, "loss": 0.3308, "step": 5420 }, { "epoch": 0.6428317324795446, "grad_norm": 1.1932600299923726, "learning_rate": 4.7793174674349354e-05, "loss": 0.3199, "step": 5421 }, { "epoch": 0.6429503142416696, "grad_norm": 1.6319704656123462, "learning_rate": 4.779218850893558e-05, "loss": 0.5819, "step": 5422 }, { "epoch": 0.6430688960037946, "grad_norm": 1.2588346124421088, "learning_rate": 4.7791202133405774e-05, "loss": 0.4005, "step": 5423 }, { "epoch": 0.6431874777659196, "grad_norm": 1.436827623485643, "learning_rate": 4.7790215547769034e-05, "loss": 0.5772, "step": 5424 }, { "epoch": 0.6433060595280445, "grad_norm": 1.5224763509491426, "learning_rate": 4.778922875203445e-05, "loss": 0.5826, "step": 5425 }, { "epoch": 0.6434246412901695, "grad_norm": 1.3611826402810052, "learning_rate": 4.778824174621113e-05, "loss": 0.4757, "step": 5426 }, { "epoch": 0.6435432230522946, "grad_norm": 1.781825167157929, "learning_rate": 4.7787254530308155e-05, "loss": 0.7802, "step": 5427 }, { "epoch": 0.6436618048144196, "grad_norm": 1.4242538877061368, "learning_rate": 4.778626710433465e-05, "loss": 0.6275, "step": 5428 }, { "epoch": 0.6437803865765446, "grad_norm": 1.6152927803156125, "learning_rate": 4.7785279468299696e-05, "loss": 0.5277, "step": 5429 }, { "epoch": 0.6438989683386696, "grad_norm": 1.3666454356801594, "learning_rate": 4.778429162221241e-05, "loss": 0.4746, "step": 5430 }, { "epoch": 0.6440175501007945, "grad_norm": 1.3421190270357968, "learning_rate": 4.778330356608189e-05, "loss": 0.5557, "step": 5431 }, { "epoch": 0.6441361318629195, "grad_norm": 1.2908804192661198, "learning_rate": 4.7782315299917256e-05, "loss": 0.4707, "step": 5432 }, { "epoch": 0.6442547136250445, "grad_norm": 1.4459911459496857, "learning_rate": 4.7781326823727616e-05, "loss": 0.57, "step": 5433 }, { "epoch": 0.6443732953871695, "grad_norm": 1.2327369333227085, "learning_rate": 4.7780338137522065e-05, "loss": 0.3389, "step": 5434 }, { "epoch": 0.6444918771492945, "grad_norm": 1.4481332285178652, "learning_rate": 4.7779349241309745e-05, "loss": 0.4996, "step": 5435 }, { "epoch": 0.6446104589114194, "grad_norm": 1.2755738162590706, "learning_rate": 4.7778360135099756e-05, "loss": 0.4161, "step": 5436 }, { "epoch": 0.6447290406735444, "grad_norm": 1.3636204266738823, "learning_rate": 4.777737081890121e-05, "loss": 0.3916, "step": 5437 }, { "epoch": 0.6448476224356694, "grad_norm": 1.4398972014008835, "learning_rate": 4.777638129272325e-05, "loss": 0.4856, "step": 5438 }, { "epoch": 0.6449662041977944, "grad_norm": 1.7389978213229575, "learning_rate": 4.7775391556574974e-05, "loss": 0.6661, "step": 5439 }, { "epoch": 0.6450847859599194, "grad_norm": 1.3828946160487154, "learning_rate": 4.777440161046552e-05, "loss": 0.4211, "step": 5440 }, { "epoch": 0.6452033677220443, "grad_norm": 1.5544160825809339, "learning_rate": 4.7773411454404014e-05, "loss": 0.4651, "step": 5441 }, { "epoch": 0.6453219494841693, "grad_norm": 1.300913010799981, "learning_rate": 4.777242108839958e-05, "loss": 0.376, "step": 5442 }, { "epoch": 0.6454405312462943, "grad_norm": 1.4494053943460876, "learning_rate": 4.777143051246135e-05, "loss": 0.4348, "step": 5443 }, { "epoch": 0.6455591130084193, "grad_norm": 1.4202220089777073, "learning_rate": 4.777043972659845e-05, "loss": 0.5935, "step": 5444 }, { "epoch": 0.6456776947705443, "grad_norm": 1.4142857304078622, "learning_rate": 4.776944873082002e-05, "loss": 0.3556, "step": 5445 }, { "epoch": 0.6457962765326692, "grad_norm": 1.902449747429788, "learning_rate": 4.776845752513519e-05, "loss": 0.5438, "step": 5446 }, { "epoch": 0.6459148582947942, "grad_norm": 1.2283827210140938, "learning_rate": 4.7767466109553114e-05, "loss": 0.4787, "step": 5447 }, { "epoch": 0.6460334400569192, "grad_norm": 1.8411403399388864, "learning_rate": 4.7766474484082914e-05, "loss": 0.5716, "step": 5448 }, { "epoch": 0.6461520218190442, "grad_norm": 1.8226755890417932, "learning_rate": 4.776548264873373e-05, "loss": 0.6661, "step": 5449 }, { "epoch": 0.6462706035811692, "grad_norm": 1.857965825763071, "learning_rate": 4.776449060351472e-05, "loss": 0.642, "step": 5450 }, { "epoch": 0.6463891853432941, "grad_norm": 1.6287274664600788, "learning_rate": 4.7763498348435025e-05, "loss": 0.4888, "step": 5451 }, { "epoch": 0.6465077671054192, "grad_norm": 1.5013313828070372, "learning_rate": 4.776250588350379e-05, "loss": 0.4368, "step": 5452 }, { "epoch": 0.6466263488675442, "grad_norm": 1.2167042778289083, "learning_rate": 4.776151320873016e-05, "loss": 0.4817, "step": 5453 }, { "epoch": 0.6467449306296692, "grad_norm": 1.035093828969511, "learning_rate": 4.776052032412329e-05, "loss": 0.3316, "step": 5454 }, { "epoch": 0.6468635123917942, "grad_norm": 1.5167762525439679, "learning_rate": 4.775952722969233e-05, "loss": 0.5834, "step": 5455 }, { "epoch": 0.6469820941539192, "grad_norm": 1.604613759417812, "learning_rate": 4.775853392544645e-05, "loss": 0.4661, "step": 5456 }, { "epoch": 0.6471006759160441, "grad_norm": 1.4732534189462014, "learning_rate": 4.775754041139478e-05, "loss": 0.5017, "step": 5457 }, { "epoch": 0.6472192576781691, "grad_norm": 1.2803143846964333, "learning_rate": 4.775654668754651e-05, "loss": 0.4826, "step": 5458 }, { "epoch": 0.6473378394402941, "grad_norm": 1.6869216406363567, "learning_rate": 4.7755552753910785e-05, "loss": 0.7759, "step": 5459 }, { "epoch": 0.6474564212024191, "grad_norm": 1.4575977667369064, "learning_rate": 4.775455861049676e-05, "loss": 0.5912, "step": 5460 }, { "epoch": 0.6475750029645441, "grad_norm": 1.475492662804033, "learning_rate": 4.7753564257313605e-05, "loss": 0.4078, "step": 5461 }, { "epoch": 0.647693584726669, "grad_norm": 1.2937100753879756, "learning_rate": 4.7752569694370494e-05, "loss": 0.4788, "step": 5462 }, { "epoch": 0.647812166488794, "grad_norm": 1.8864308269305865, "learning_rate": 4.775157492167659e-05, "loss": 0.7408, "step": 5463 }, { "epoch": 0.647930748250919, "grad_norm": 1.1742415556966193, "learning_rate": 4.775057993924107e-05, "loss": 0.4594, "step": 5464 }, { "epoch": 0.648049330013044, "grad_norm": 1.5269020082885998, "learning_rate": 4.77495847470731e-05, "loss": 0.5104, "step": 5465 }, { "epoch": 0.648167911775169, "grad_norm": 1.3442383685824795, "learning_rate": 4.774858934518185e-05, "loss": 0.5688, "step": 5466 }, { "epoch": 0.648286493537294, "grad_norm": 1.0926569470969445, "learning_rate": 4.7747593733576514e-05, "loss": 0.4409, "step": 5467 }, { "epoch": 0.6484050752994189, "grad_norm": 1.4978326774658637, "learning_rate": 4.774659791226625e-05, "loss": 0.4878, "step": 5468 }, { "epoch": 0.6485236570615439, "grad_norm": 1.4800648307170936, "learning_rate": 4.774560188126025e-05, "loss": 0.656, "step": 5469 }, { "epoch": 0.6486422388236689, "grad_norm": 1.539092887665531, "learning_rate": 4.774460564056769e-05, "loss": 0.5315, "step": 5470 }, { "epoch": 0.6487608205857939, "grad_norm": 1.5627909291876174, "learning_rate": 4.774360919019776e-05, "loss": 0.5972, "step": 5471 }, { "epoch": 0.6488794023479189, "grad_norm": 1.7599269592639044, "learning_rate": 4.774261253015964e-05, "loss": 0.5613, "step": 5472 }, { "epoch": 0.6489979841100438, "grad_norm": 1.5551229481979199, "learning_rate": 4.774161566046253e-05, "loss": 0.5281, "step": 5473 }, { "epoch": 0.6491165658721688, "grad_norm": 1.483141266923919, "learning_rate": 4.7740618581115604e-05, "loss": 0.5445, "step": 5474 }, { "epoch": 0.6492351476342938, "grad_norm": 1.2551531696643516, "learning_rate": 4.773962129212806e-05, "loss": 0.3909, "step": 5475 }, { "epoch": 0.6493537293964188, "grad_norm": 1.4868683617729141, "learning_rate": 4.77386237935091e-05, "loss": 0.6902, "step": 5476 }, { "epoch": 0.6494723111585439, "grad_norm": 1.0283816277048843, "learning_rate": 4.7737626085267906e-05, "loss": 0.35, "step": 5477 }, { "epoch": 0.6495908929206688, "grad_norm": 1.7448799760932534, "learning_rate": 4.773662816741368e-05, "loss": 0.6958, "step": 5478 }, { "epoch": 0.6497094746827938, "grad_norm": 1.3956807400238729, "learning_rate": 4.773563003995563e-05, "loss": 0.4433, "step": 5479 }, { "epoch": 0.6498280564449188, "grad_norm": 1.8317350105845756, "learning_rate": 4.7734631702902946e-05, "loss": 0.5411, "step": 5480 }, { "epoch": 0.6499466382070438, "grad_norm": 1.5882487845778857, "learning_rate": 4.773363315626484e-05, "loss": 0.5275, "step": 5481 }, { "epoch": 0.6500652199691688, "grad_norm": 1.218483442532768, "learning_rate": 4.7732634400050514e-05, "loss": 0.3981, "step": 5482 }, { "epoch": 0.6501838017312938, "grad_norm": 1.331287565834744, "learning_rate": 4.7731635434269175e-05, "loss": 0.3678, "step": 5483 }, { "epoch": 0.6503023834934187, "grad_norm": 1.9894203123083969, "learning_rate": 4.7730636258930035e-05, "loss": 0.814, "step": 5484 }, { "epoch": 0.6504209652555437, "grad_norm": 1.7955292253996447, "learning_rate": 4.7729636874042303e-05, "loss": 0.6522, "step": 5485 }, { "epoch": 0.6505395470176687, "grad_norm": 1.6984635155707117, "learning_rate": 4.772863727961518e-05, "loss": 0.6717, "step": 5486 }, { "epoch": 0.6506581287797937, "grad_norm": 1.9689116033896847, "learning_rate": 4.772763747565791e-05, "loss": 0.8422, "step": 5487 }, { "epoch": 0.6507767105419187, "grad_norm": 1.0935900590479302, "learning_rate": 4.7726637462179685e-05, "loss": 0.3968, "step": 5488 }, { "epoch": 0.6508952923040436, "grad_norm": 1.2643656976840127, "learning_rate": 4.7725637239189735e-05, "loss": 0.4154, "step": 5489 }, { "epoch": 0.6510138740661686, "grad_norm": 1.4353385150991158, "learning_rate": 4.772463680669728e-05, "loss": 0.5626, "step": 5490 }, { "epoch": 0.6511324558282936, "grad_norm": 0.9953620070113895, "learning_rate": 4.772363616471153e-05, "loss": 0.3132, "step": 5491 }, { "epoch": 0.6512510375904186, "grad_norm": 1.4692244547511863, "learning_rate": 4.772263531324173e-05, "loss": 0.4421, "step": 5492 }, { "epoch": 0.6513696193525436, "grad_norm": 1.6434472817410828, "learning_rate": 4.7721634252297085e-05, "loss": 0.6394, "step": 5493 }, { "epoch": 0.6514882011146685, "grad_norm": 1.1895448360683918, "learning_rate": 4.7720632981886846e-05, "loss": 0.4162, "step": 5494 }, { "epoch": 0.6516067828767935, "grad_norm": 1.237032113347861, "learning_rate": 4.771963150202023e-05, "loss": 0.341, "step": 5495 }, { "epoch": 0.6517253646389185, "grad_norm": 1.5835959520451128, "learning_rate": 4.771862981270647e-05, "loss": 0.3984, "step": 5496 }, { "epoch": 0.6518439464010435, "grad_norm": 1.5580807261587124, "learning_rate": 4.771762791395481e-05, "loss": 0.5832, "step": 5497 }, { "epoch": 0.6519625281631685, "grad_norm": 2.086172142013894, "learning_rate": 4.771662580577447e-05, "loss": 0.7653, "step": 5498 }, { "epoch": 0.6520811099252934, "grad_norm": 1.5960225712978253, "learning_rate": 4.77156234881747e-05, "loss": 0.4222, "step": 5499 }, { "epoch": 0.6521996916874184, "grad_norm": 1.4705984340965006, "learning_rate": 4.7714620961164735e-05, "loss": 0.7105, "step": 5500 }, { "epoch": 0.6523182734495434, "grad_norm": 1.5453492320340896, "learning_rate": 4.771361822475382e-05, "loss": 0.6626, "step": 5501 }, { "epoch": 0.6524368552116685, "grad_norm": 1.3432120353182777, "learning_rate": 4.77126152789512e-05, "loss": 0.4712, "step": 5502 }, { "epoch": 0.6525554369737935, "grad_norm": 1.0210423125604198, "learning_rate": 4.771161212376612e-05, "loss": 0.3562, "step": 5503 }, { "epoch": 0.6526740187359185, "grad_norm": 1.2342993600058514, "learning_rate": 4.7710608759207833e-05, "loss": 0.4858, "step": 5504 }, { "epoch": 0.6527926004980434, "grad_norm": 1.5932450921206622, "learning_rate": 4.770960518528557e-05, "loss": 0.727, "step": 5505 }, { "epoch": 0.6529111822601684, "grad_norm": 1.1883263554735541, "learning_rate": 4.77086014020086e-05, "loss": 0.4113, "step": 5506 }, { "epoch": 0.6530297640222934, "grad_norm": 1.196053688951196, "learning_rate": 4.770759740938618e-05, "loss": 0.4282, "step": 5507 }, { "epoch": 0.6531483457844184, "grad_norm": 1.4200075896739233, "learning_rate": 4.770659320742755e-05, "loss": 0.5613, "step": 5508 }, { "epoch": 0.6532669275465434, "grad_norm": 1.5345439021036995, "learning_rate": 4.7705588796141974e-05, "loss": 0.483, "step": 5509 }, { "epoch": 0.6533855093086683, "grad_norm": 1.1808965279501091, "learning_rate": 4.7704584175538715e-05, "loss": 0.4488, "step": 5510 }, { "epoch": 0.6535040910707933, "grad_norm": 1.4190273238530384, "learning_rate": 4.7703579345627035e-05, "loss": 0.4623, "step": 5511 }, { "epoch": 0.6536226728329183, "grad_norm": 1.17892022842376, "learning_rate": 4.770257430641619e-05, "loss": 0.4379, "step": 5512 }, { "epoch": 0.6537412545950433, "grad_norm": 1.7285906954612507, "learning_rate": 4.770156905791545e-05, "loss": 0.5286, "step": 5513 }, { "epoch": 0.6538598363571683, "grad_norm": 1.3250402095894565, "learning_rate": 4.7700563600134086e-05, "loss": 0.4404, "step": 5514 }, { "epoch": 0.6539784181192932, "grad_norm": 1.7166885057330972, "learning_rate": 4.769955793308136e-05, "loss": 0.5474, "step": 5515 }, { "epoch": 0.6540969998814182, "grad_norm": 1.4913435454276924, "learning_rate": 4.769855205676654e-05, "loss": 0.6202, "step": 5516 }, { "epoch": 0.6542155816435432, "grad_norm": 1.6706950089432584, "learning_rate": 4.769754597119892e-05, "loss": 0.6473, "step": 5517 }, { "epoch": 0.6543341634056682, "grad_norm": 1.303131461306497, "learning_rate": 4.769653967638775e-05, "loss": 0.4517, "step": 5518 }, { "epoch": 0.6544527451677932, "grad_norm": 1.4084224731721087, "learning_rate": 4.769553317234232e-05, "loss": 0.5158, "step": 5519 }, { "epoch": 0.6545713269299182, "grad_norm": 1.6007399761276142, "learning_rate": 4.7694526459071894e-05, "loss": 0.5913, "step": 5520 }, { "epoch": 0.6546899086920431, "grad_norm": 1.416039974432949, "learning_rate": 4.769351953658578e-05, "loss": 0.4333, "step": 5521 }, { "epoch": 0.6548084904541681, "grad_norm": 1.164617499655951, "learning_rate": 4.769251240489324e-05, "loss": 0.4208, "step": 5522 }, { "epoch": 0.6549270722162931, "grad_norm": 1.5514947160366102, "learning_rate": 4.7691505064003554e-05, "loss": 0.5274, "step": 5523 }, { "epoch": 0.6550456539784181, "grad_norm": 1.3909723046319775, "learning_rate": 4.7690497513926025e-05, "loss": 0.5395, "step": 5524 }, { "epoch": 0.655164235740543, "grad_norm": 1.4909464887992252, "learning_rate": 4.768948975466993e-05, "loss": 0.5379, "step": 5525 }, { "epoch": 0.655282817502668, "grad_norm": 1.801353281118953, "learning_rate": 4.768848178624457e-05, "loss": 0.7727, "step": 5526 }, { "epoch": 0.6554013992647931, "grad_norm": 1.5167777058503251, "learning_rate": 4.768747360865922e-05, "loss": 0.6603, "step": 5527 }, { "epoch": 0.6555199810269181, "grad_norm": 1.4949367346257258, "learning_rate": 4.768646522192319e-05, "loss": 0.5946, "step": 5528 }, { "epoch": 0.6556385627890431, "grad_norm": 1.423002051414685, "learning_rate": 4.7685456626045774e-05, "loss": 0.3946, "step": 5529 }, { "epoch": 0.6557571445511681, "grad_norm": 1.87574418389901, "learning_rate": 4.768444782103627e-05, "loss": 0.7253, "step": 5530 }, { "epoch": 0.655875726313293, "grad_norm": 1.5229904053889278, "learning_rate": 4.7683438806903964e-05, "loss": 0.55, "step": 5531 }, { "epoch": 0.655994308075418, "grad_norm": 1.3751278644252654, "learning_rate": 4.768242958365817e-05, "loss": 0.4518, "step": 5532 }, { "epoch": 0.656112889837543, "grad_norm": 1.4412807630448152, "learning_rate": 4.76814201513082e-05, "loss": 0.4618, "step": 5533 }, { "epoch": 0.656231471599668, "grad_norm": 1.4078066925670683, "learning_rate": 4.7680410509863337e-05, "loss": 0.4111, "step": 5534 }, { "epoch": 0.656350053361793, "grad_norm": 1.402411008421877, "learning_rate": 4.767940065933291e-05, "loss": 0.6071, "step": 5535 }, { "epoch": 0.656468635123918, "grad_norm": 1.1915533163873622, "learning_rate": 4.767839059972622e-05, "loss": 0.3973, "step": 5536 }, { "epoch": 0.6565872168860429, "grad_norm": 1.2714516289447388, "learning_rate": 4.767738033105257e-05, "loss": 0.4725, "step": 5537 }, { "epoch": 0.6567057986481679, "grad_norm": 1.3804594917645507, "learning_rate": 4.767636985332129e-05, "loss": 0.5487, "step": 5538 }, { "epoch": 0.6568243804102929, "grad_norm": 1.6629809928032673, "learning_rate": 4.767535916654169e-05, "loss": 0.5527, "step": 5539 }, { "epoch": 0.6569429621724179, "grad_norm": 1.2884819379466255, "learning_rate": 4.767434827072308e-05, "loss": 0.4546, "step": 5540 }, { "epoch": 0.6570615439345429, "grad_norm": 1.4469821637263613, "learning_rate": 4.7673337165874785e-05, "loss": 0.388, "step": 5541 }, { "epoch": 0.6571801256966678, "grad_norm": 1.2178555229960908, "learning_rate": 4.767232585200613e-05, "loss": 0.4765, "step": 5542 }, { "epoch": 0.6572987074587928, "grad_norm": 2.120933015542263, "learning_rate": 4.7671314329126426e-05, "loss": 0.6191, "step": 5543 }, { "epoch": 0.6574172892209178, "grad_norm": 1.672918110318068, "learning_rate": 4.767030259724501e-05, "loss": 0.6743, "step": 5544 }, { "epoch": 0.6575358709830428, "grad_norm": 1.5198129997614163, "learning_rate": 4.7669290656371205e-05, "loss": 0.4906, "step": 5545 }, { "epoch": 0.6576544527451678, "grad_norm": 1.2960251588640668, "learning_rate": 4.766827850651433e-05, "loss": 0.3755, "step": 5546 }, { "epoch": 0.6577730345072927, "grad_norm": 1.7814779645457421, "learning_rate": 4.766726614768373e-05, "loss": 0.6217, "step": 5547 }, { "epoch": 0.6578916162694177, "grad_norm": 1.4857419047062352, "learning_rate": 4.766625357988873e-05, "loss": 0.4397, "step": 5548 }, { "epoch": 0.6580101980315427, "grad_norm": 1.5592794432789863, "learning_rate": 4.766524080313868e-05, "loss": 0.4251, "step": 5549 }, { "epoch": 0.6581287797936677, "grad_norm": 1.167676471813798, "learning_rate": 4.766422781744289e-05, "loss": 0.348, "step": 5550 }, { "epoch": 0.6582473615557927, "grad_norm": 1.5103516982437075, "learning_rate": 4.7663214622810715e-05, "loss": 0.488, "step": 5551 }, { "epoch": 0.6583659433179178, "grad_norm": 1.6266517907577251, "learning_rate": 4.76622012192515e-05, "loss": 0.6529, "step": 5552 }, { "epoch": 0.6584845250800427, "grad_norm": 1.812538822833785, "learning_rate": 4.7661187606774574e-05, "loss": 0.7525, "step": 5553 }, { "epoch": 0.6586031068421677, "grad_norm": 1.8026108733493802, "learning_rate": 4.7660173785389285e-05, "loss": 0.6529, "step": 5554 }, { "epoch": 0.6587216886042927, "grad_norm": 1.4898206548250443, "learning_rate": 4.765915975510499e-05, "loss": 0.5007, "step": 5555 }, { "epoch": 0.6588402703664177, "grad_norm": 1.7670391952159232, "learning_rate": 4.765814551593102e-05, "loss": 0.5546, "step": 5556 }, { "epoch": 0.6589588521285427, "grad_norm": 1.5583754038727928, "learning_rate": 4.7657131067876734e-05, "loss": 0.4738, "step": 5557 }, { "epoch": 0.6590774338906676, "grad_norm": 1.410780151393153, "learning_rate": 4.765611641095149e-05, "loss": 0.5144, "step": 5558 }, { "epoch": 0.6591960156527926, "grad_norm": 1.5352844931344198, "learning_rate": 4.765510154516463e-05, "loss": 0.7187, "step": 5559 }, { "epoch": 0.6593145974149176, "grad_norm": 1.715649013469572, "learning_rate": 4.765408647052552e-05, "loss": 0.7461, "step": 5560 }, { "epoch": 0.6594331791770426, "grad_norm": 1.5716769792718548, "learning_rate": 4.765307118704351e-05, "loss": 0.6152, "step": 5561 }, { "epoch": 0.6595517609391676, "grad_norm": 1.4602411962986155, "learning_rate": 4.765205569472796e-05, "loss": 0.4704, "step": 5562 }, { "epoch": 0.6596703427012925, "grad_norm": 1.2454119480019343, "learning_rate": 4.765103999358823e-05, "loss": 0.5274, "step": 5563 }, { "epoch": 0.6597889244634175, "grad_norm": 1.2977436715118487, "learning_rate": 4.76500240836337e-05, "loss": 0.5524, "step": 5564 }, { "epoch": 0.6599075062255425, "grad_norm": 1.4396780924175279, "learning_rate": 4.764900796487371e-05, "loss": 0.5353, "step": 5565 }, { "epoch": 0.6600260879876675, "grad_norm": 1.4270331336616673, "learning_rate": 4.7647991637317656e-05, "loss": 0.4987, "step": 5566 }, { "epoch": 0.6601446697497925, "grad_norm": 1.2737595673598705, "learning_rate": 4.7646975100974884e-05, "loss": 0.4507, "step": 5567 }, { "epoch": 0.6602632515119174, "grad_norm": 1.4701661176921312, "learning_rate": 4.7645958355854766e-05, "loss": 0.6398, "step": 5568 }, { "epoch": 0.6603818332740424, "grad_norm": 1.5511452836945798, "learning_rate": 4.764494140196669e-05, "loss": 0.5602, "step": 5569 }, { "epoch": 0.6605004150361674, "grad_norm": 1.2797118295655874, "learning_rate": 4.7643924239320023e-05, "loss": 0.4516, "step": 5570 }, { "epoch": 0.6606189967982924, "grad_norm": 1.3468545053419123, "learning_rate": 4.764290686792415e-05, "loss": 0.4556, "step": 5571 }, { "epoch": 0.6607375785604174, "grad_norm": 1.2871862747597538, "learning_rate": 4.7641889287788435e-05, "loss": 0.4775, "step": 5572 }, { "epoch": 0.6608561603225424, "grad_norm": 1.2654100247589102, "learning_rate": 4.764087149892226e-05, "loss": 0.4236, "step": 5573 }, { "epoch": 0.6609747420846673, "grad_norm": 1.5751204936076302, "learning_rate": 4.763985350133502e-05, "loss": 0.6585, "step": 5574 }, { "epoch": 0.6610933238467923, "grad_norm": 1.6906274343266041, "learning_rate": 4.76388352950361e-05, "loss": 0.7877, "step": 5575 }, { "epoch": 0.6612119056089173, "grad_norm": 1.571394152040877, "learning_rate": 4.7637816880034866e-05, "loss": 0.5419, "step": 5576 }, { "epoch": 0.6613304873710424, "grad_norm": 1.4774632252518067, "learning_rate": 4.763679825634073e-05, "loss": 0.6181, "step": 5577 }, { "epoch": 0.6614490691331674, "grad_norm": 1.425563574060063, "learning_rate": 4.7635779423963075e-05, "loss": 0.5019, "step": 5578 }, { "epoch": 0.6615676508952923, "grad_norm": 2.1006436594990423, "learning_rate": 4.7634760382911283e-05, "loss": 0.7034, "step": 5579 }, { "epoch": 0.6616862326574173, "grad_norm": 1.7035131521081666, "learning_rate": 4.763374113319476e-05, "loss": 0.5857, "step": 5580 }, { "epoch": 0.6618048144195423, "grad_norm": 1.160551420182838, "learning_rate": 4.763272167482291e-05, "loss": 0.3458, "step": 5581 }, { "epoch": 0.6619233961816673, "grad_norm": 1.5750743937732545, "learning_rate": 4.76317020078051e-05, "loss": 0.5356, "step": 5582 }, { "epoch": 0.6620419779437923, "grad_norm": 1.2207003357773936, "learning_rate": 4.763068213215076e-05, "loss": 0.4344, "step": 5583 }, { "epoch": 0.6621605597059173, "grad_norm": 1.3631054391252084, "learning_rate": 4.7629662047869286e-05, "loss": 0.4548, "step": 5584 }, { "epoch": 0.6622791414680422, "grad_norm": 1.4000093442380195, "learning_rate": 4.762864175497008e-05, "loss": 0.6564, "step": 5585 }, { "epoch": 0.6623977232301672, "grad_norm": 1.3602457084005724, "learning_rate": 4.762762125346254e-05, "loss": 0.6229, "step": 5586 }, { "epoch": 0.6625163049922922, "grad_norm": 1.3037879926474403, "learning_rate": 4.762660054335608e-05, "loss": 0.4283, "step": 5587 }, { "epoch": 0.6626348867544172, "grad_norm": 1.3634137545182907, "learning_rate": 4.762557962466011e-05, "loss": 0.5352, "step": 5588 }, { "epoch": 0.6627534685165422, "grad_norm": 1.2981601889327796, "learning_rate": 4.7624558497384045e-05, "loss": 0.4211, "step": 5589 }, { "epoch": 0.6628720502786671, "grad_norm": 1.6832766198162132, "learning_rate": 4.762353716153729e-05, "loss": 0.6646, "step": 5590 }, { "epoch": 0.6629906320407921, "grad_norm": 1.0753178293381827, "learning_rate": 4.7622515617129274e-05, "loss": 0.3756, "step": 5591 }, { "epoch": 0.6631092138029171, "grad_norm": 1.4718499875401652, "learning_rate": 4.76214938641694e-05, "loss": 0.4729, "step": 5592 }, { "epoch": 0.6632277955650421, "grad_norm": 1.5267701312617328, "learning_rate": 4.7620471902667084e-05, "loss": 0.5159, "step": 5593 }, { "epoch": 0.6633463773271671, "grad_norm": 1.2547282420401475, "learning_rate": 4.7619449732631763e-05, "loss": 0.417, "step": 5594 }, { "epoch": 0.663464959089292, "grad_norm": 1.6506899632869252, "learning_rate": 4.7618427354072855e-05, "loss": 0.5606, "step": 5595 }, { "epoch": 0.663583540851417, "grad_norm": 1.4150494938916278, "learning_rate": 4.7617404766999786e-05, "loss": 0.4706, "step": 5596 }, { "epoch": 0.663702122613542, "grad_norm": 1.3883322554639808, "learning_rate": 4.7616381971421973e-05, "loss": 0.4994, "step": 5597 }, { "epoch": 0.663820704375667, "grad_norm": 1.298505621090561, "learning_rate": 4.761535896734886e-05, "loss": 0.3917, "step": 5598 }, { "epoch": 0.663939286137792, "grad_norm": 1.6245541923363853, "learning_rate": 4.7614335754789865e-05, "loss": 0.4814, "step": 5599 }, { "epoch": 0.6640578678999169, "grad_norm": 1.4323275480078919, "learning_rate": 4.761331233375442e-05, "loss": 0.3568, "step": 5600 }, { "epoch": 0.6641764496620419, "grad_norm": 1.7343320352651483, "learning_rate": 4.7612288704251977e-05, "loss": 0.5769, "step": 5601 }, { "epoch": 0.664295031424167, "grad_norm": 1.486590100745642, "learning_rate": 4.761126486629196e-05, "loss": 0.4244, "step": 5602 }, { "epoch": 0.664413613186292, "grad_norm": 1.7283797876027613, "learning_rate": 4.76102408198838e-05, "loss": 0.6618, "step": 5603 }, { "epoch": 0.664532194948417, "grad_norm": 1.4008102849992075, "learning_rate": 4.760921656503696e-05, "loss": 0.5685, "step": 5604 }, { "epoch": 0.664650776710542, "grad_norm": 1.7647114296346926, "learning_rate": 4.760819210176086e-05, "loss": 0.5761, "step": 5605 }, { "epoch": 0.6647693584726669, "grad_norm": 1.5851175574799725, "learning_rate": 4.760716743006495e-05, "loss": 0.589, "step": 5606 }, { "epoch": 0.6648879402347919, "grad_norm": 1.683217390993462, "learning_rate": 4.7606142549958685e-05, "loss": 0.6387, "step": 5607 }, { "epoch": 0.6650065219969169, "grad_norm": 1.369570236936051, "learning_rate": 4.76051174614515e-05, "loss": 0.3804, "step": 5608 }, { "epoch": 0.6651251037590419, "grad_norm": 1.3981975987954398, "learning_rate": 4.760409216455286e-05, "loss": 0.3623, "step": 5609 }, { "epoch": 0.6652436855211669, "grad_norm": 1.4237396758140843, "learning_rate": 4.76030666592722e-05, "loss": 0.4478, "step": 5610 }, { "epoch": 0.6653622672832918, "grad_norm": 1.3952658588201083, "learning_rate": 4.760204094561899e-05, "loss": 0.3809, "step": 5611 }, { "epoch": 0.6654808490454168, "grad_norm": 1.5883259544870982, "learning_rate": 4.760101502360268e-05, "loss": 0.4881, "step": 5612 }, { "epoch": 0.6655994308075418, "grad_norm": 1.4108621852212344, "learning_rate": 4.7599988893232725e-05, "loss": 0.4827, "step": 5613 }, { "epoch": 0.6657180125696668, "grad_norm": 1.6265104001112098, "learning_rate": 4.759896255451858e-05, "loss": 0.557, "step": 5614 }, { "epoch": 0.6658365943317918, "grad_norm": 1.5611252479710365, "learning_rate": 4.7597936007469725e-05, "loss": 0.5015, "step": 5615 }, { "epoch": 0.6659551760939167, "grad_norm": 1.2938329926648473, "learning_rate": 4.75969092520956e-05, "loss": 0.5601, "step": 5616 }, { "epoch": 0.6660737578560417, "grad_norm": 1.2489978934788148, "learning_rate": 4.759588228840569e-05, "loss": 0.3358, "step": 5617 }, { "epoch": 0.6661923396181667, "grad_norm": 1.3207432830211974, "learning_rate": 4.7594855116409457e-05, "loss": 0.4072, "step": 5618 }, { "epoch": 0.6663109213802917, "grad_norm": 1.4921468210346427, "learning_rate": 4.759382773611636e-05, "loss": 0.5415, "step": 5619 }, { "epoch": 0.6664295031424167, "grad_norm": 1.4011866199606544, "learning_rate": 4.7592800147535876e-05, "loss": 0.5408, "step": 5620 }, { "epoch": 0.6665480849045416, "grad_norm": 1.2505193533049175, "learning_rate": 4.759177235067748e-05, "loss": 0.413, "step": 5621 }, { "epoch": 0.6666666666666666, "grad_norm": 1.4800908942904514, "learning_rate": 4.7590744345550654e-05, "loss": 0.501, "step": 5622 }, { "epoch": 0.6667852484287916, "grad_norm": 1.4130622374022237, "learning_rate": 4.758971613216486e-05, "loss": 0.4447, "step": 5623 }, { "epoch": 0.6669038301909166, "grad_norm": 1.3244612267950255, "learning_rate": 4.758868771052959e-05, "loss": 0.3931, "step": 5624 }, { "epoch": 0.6670224119530416, "grad_norm": 1.9695968973840774, "learning_rate": 4.7587659080654314e-05, "loss": 0.749, "step": 5625 }, { "epoch": 0.6671409937151666, "grad_norm": 1.2362170653629911, "learning_rate": 4.7586630242548526e-05, "loss": 0.423, "step": 5626 }, { "epoch": 0.6672595754772916, "grad_norm": 1.6283913308991367, "learning_rate": 4.7585601196221704e-05, "loss": 0.4763, "step": 5627 }, { "epoch": 0.6673781572394166, "grad_norm": 1.5627561708035391, "learning_rate": 4.758457194168333e-05, "loss": 0.5114, "step": 5628 }, { "epoch": 0.6674967390015416, "grad_norm": 1.5970542858356094, "learning_rate": 4.75835424789429e-05, "loss": 0.5661, "step": 5629 }, { "epoch": 0.6676153207636666, "grad_norm": 1.5134203044744, "learning_rate": 4.75825128080099e-05, "loss": 0.4794, "step": 5630 }, { "epoch": 0.6677339025257916, "grad_norm": 1.5649060463357696, "learning_rate": 4.758148292889383e-05, "loss": 0.4563, "step": 5631 }, { "epoch": 0.6678524842879165, "grad_norm": 1.5387929982081128, "learning_rate": 4.7580452841604176e-05, "loss": 0.4775, "step": 5632 }, { "epoch": 0.6679710660500415, "grad_norm": 1.6389631850263902, "learning_rate": 4.757942254615044e-05, "loss": 0.5751, "step": 5633 }, { "epoch": 0.6680896478121665, "grad_norm": 1.6622997003336746, "learning_rate": 4.757839204254212e-05, "loss": 0.6983, "step": 5634 }, { "epoch": 0.6682082295742915, "grad_norm": 1.7377481934036711, "learning_rate": 4.75773613307887e-05, "loss": 0.6953, "step": 5635 }, { "epoch": 0.6683268113364165, "grad_norm": 1.771795710755704, "learning_rate": 4.7576330410899704e-05, "loss": 0.543, "step": 5636 }, { "epoch": 0.6684453930985415, "grad_norm": 1.7943604728199727, "learning_rate": 4.757529928288462e-05, "loss": 0.7238, "step": 5637 }, { "epoch": 0.6685639748606664, "grad_norm": 1.2736568039209832, "learning_rate": 4.757426794675297e-05, "loss": 0.3546, "step": 5638 }, { "epoch": 0.6686825566227914, "grad_norm": 1.606088964861244, "learning_rate": 4.757323640251424e-05, "loss": 0.6108, "step": 5639 }, { "epoch": 0.6688011383849164, "grad_norm": 1.3915937094430204, "learning_rate": 4.7572204650177966e-05, "loss": 0.4997, "step": 5640 }, { "epoch": 0.6689197201470414, "grad_norm": 1.5551136035331687, "learning_rate": 4.7571172689753626e-05, "loss": 0.6009, "step": 5641 }, { "epoch": 0.6690383019091664, "grad_norm": 1.6491315161968312, "learning_rate": 4.757014052125077e-05, "loss": 0.5775, "step": 5642 }, { "epoch": 0.6691568836712913, "grad_norm": 1.2786286915601188, "learning_rate": 4.7569108144678886e-05, "loss": 0.5127, "step": 5643 }, { "epoch": 0.6692754654334163, "grad_norm": 1.1776203045019915, "learning_rate": 4.7568075560047506e-05, "loss": 0.4247, "step": 5644 }, { "epoch": 0.6693940471955413, "grad_norm": 1.4844253440843935, "learning_rate": 4.7567042767366146e-05, "loss": 0.5413, "step": 5645 }, { "epoch": 0.6695126289576663, "grad_norm": 1.2491382627483054, "learning_rate": 4.756600976664431e-05, "loss": 0.5179, "step": 5646 }, { "epoch": 0.6696312107197913, "grad_norm": 1.3026591333069073, "learning_rate": 4.756497655789155e-05, "loss": 0.4513, "step": 5647 }, { "epoch": 0.6697497924819162, "grad_norm": 1.7041582338216834, "learning_rate": 4.756394314111738e-05, "loss": 0.521, "step": 5648 }, { "epoch": 0.6698683742440412, "grad_norm": 1.6921353140670534, "learning_rate": 4.7562909516331325e-05, "loss": 0.8663, "step": 5649 }, { "epoch": 0.6699869560061662, "grad_norm": 1.2498501265020507, "learning_rate": 4.75618756835429e-05, "loss": 0.4141, "step": 5650 }, { "epoch": 0.6701055377682912, "grad_norm": 1.3257997617322248, "learning_rate": 4.7560841642761654e-05, "loss": 0.4519, "step": 5651 }, { "epoch": 0.6702241195304163, "grad_norm": 1.137537694655641, "learning_rate": 4.7559807393997115e-05, "loss": 0.3897, "step": 5652 }, { "epoch": 0.6703427012925413, "grad_norm": 1.5822737179893402, "learning_rate": 4.7558772937258815e-05, "loss": 0.5599, "step": 5653 }, { "epoch": 0.6704612830546662, "grad_norm": 1.572115193929137, "learning_rate": 4.7557738272556294e-05, "loss": 0.6356, "step": 5654 }, { "epoch": 0.6705798648167912, "grad_norm": 1.3504263760920325, "learning_rate": 4.755670339989909e-05, "loss": 0.4882, "step": 5655 }, { "epoch": 0.6706984465789162, "grad_norm": 1.7429438985929042, "learning_rate": 4.7555668319296735e-05, "loss": 0.6602, "step": 5656 }, { "epoch": 0.6708170283410412, "grad_norm": 1.3334103399657982, "learning_rate": 4.7554633030758786e-05, "loss": 0.4111, "step": 5657 }, { "epoch": 0.6709356101031662, "grad_norm": 1.6771003581283996, "learning_rate": 4.7553597534294766e-05, "loss": 0.6284, "step": 5658 }, { "epoch": 0.6710541918652911, "grad_norm": 1.312761309800299, "learning_rate": 4.755256182991425e-05, "loss": 0.5118, "step": 5659 }, { "epoch": 0.6711727736274161, "grad_norm": 1.1984560864675602, "learning_rate": 4.755152591762676e-05, "loss": 0.4398, "step": 5660 }, { "epoch": 0.6712913553895411, "grad_norm": 1.4078255807019586, "learning_rate": 4.755048979744185e-05, "loss": 0.448, "step": 5661 }, { "epoch": 0.6714099371516661, "grad_norm": 1.5702655905154772, "learning_rate": 4.754945346936909e-05, "loss": 0.4503, "step": 5662 }, { "epoch": 0.6715285189137911, "grad_norm": 1.8173984831999148, "learning_rate": 4.754841693341801e-05, "loss": 0.6234, "step": 5663 }, { "epoch": 0.671647100675916, "grad_norm": 1.4377111177575816, "learning_rate": 4.754738018959819e-05, "loss": 0.5765, "step": 5664 }, { "epoch": 0.671765682438041, "grad_norm": 1.1930389051003656, "learning_rate": 4.754634323791917e-05, "loss": 0.3348, "step": 5665 }, { "epoch": 0.671884264200166, "grad_norm": 1.552262420096495, "learning_rate": 4.7545306078390504e-05, "loss": 0.5953, "step": 5666 }, { "epoch": 0.672002845962291, "grad_norm": 1.321534240760336, "learning_rate": 4.7544268711021774e-05, "loss": 0.391, "step": 5667 }, { "epoch": 0.672121427724416, "grad_norm": 1.6097570745358494, "learning_rate": 4.7543231135822526e-05, "loss": 0.4784, "step": 5668 }, { "epoch": 0.672240009486541, "grad_norm": 1.4615312544821546, "learning_rate": 4.7542193352802336e-05, "loss": 0.3915, "step": 5669 }, { "epoch": 0.6723585912486659, "grad_norm": 2.00719680700747, "learning_rate": 4.7541155361970756e-05, "loss": 0.9051, "step": 5670 }, { "epoch": 0.6724771730107909, "grad_norm": 1.6156403653975195, "learning_rate": 4.754011716333737e-05, "loss": 0.545, "step": 5671 }, { "epoch": 0.6725957547729159, "grad_norm": 1.248186354039386, "learning_rate": 4.7539078756911745e-05, "loss": 0.4915, "step": 5672 }, { "epoch": 0.6727143365350409, "grad_norm": 1.3289574053025608, "learning_rate": 4.7538040142703456e-05, "loss": 0.4955, "step": 5673 }, { "epoch": 0.6728329182971658, "grad_norm": 1.140908851326464, "learning_rate": 4.753700132072207e-05, "loss": 0.3346, "step": 5674 }, { "epoch": 0.6729515000592908, "grad_norm": 1.3713771008625937, "learning_rate": 4.753596229097718e-05, "loss": 0.5204, "step": 5675 }, { "epoch": 0.6730700818214158, "grad_norm": 1.5101763232070664, "learning_rate": 4.753492305347834e-05, "loss": 0.4512, "step": 5676 }, { "epoch": 0.6731886635835409, "grad_norm": 1.5487907524312163, "learning_rate": 4.753388360823515e-05, "loss": 0.4119, "step": 5677 }, { "epoch": 0.6733072453456659, "grad_norm": 1.3383500000262378, "learning_rate": 4.7532843955257186e-05, "loss": 0.2748, "step": 5678 }, { "epoch": 0.6734258271077909, "grad_norm": 1.9082305590113289, "learning_rate": 4.7531804094554026e-05, "loss": 0.7034, "step": 5679 }, { "epoch": 0.6735444088699158, "grad_norm": 1.3134558299978445, "learning_rate": 4.7530764026135267e-05, "loss": 0.463, "step": 5680 }, { "epoch": 0.6736629906320408, "grad_norm": 1.219087299787808, "learning_rate": 4.752972375001049e-05, "loss": 0.3625, "step": 5681 }, { "epoch": 0.6737815723941658, "grad_norm": 1.2344574496439853, "learning_rate": 4.75286832661893e-05, "loss": 0.4653, "step": 5682 }, { "epoch": 0.6739001541562908, "grad_norm": 1.4002967700738496, "learning_rate": 4.7527642574681264e-05, "loss": 0.3827, "step": 5683 }, { "epoch": 0.6740187359184158, "grad_norm": 1.3748216956989279, "learning_rate": 4.752660167549599e-05, "loss": 0.4092, "step": 5684 }, { "epoch": 0.6741373176805407, "grad_norm": 1.4187299090214673, "learning_rate": 4.7525560568643075e-05, "loss": 0.4458, "step": 5685 }, { "epoch": 0.6742558994426657, "grad_norm": 1.6932882310584985, "learning_rate": 4.752451925413212e-05, "loss": 0.6198, "step": 5686 }, { "epoch": 0.6743744812047907, "grad_norm": 1.6693948181196396, "learning_rate": 4.752347773197271e-05, "loss": 0.6604, "step": 5687 }, { "epoch": 0.6744930629669157, "grad_norm": 1.5463446134157632, "learning_rate": 4.752243600217445e-05, "loss": 0.4411, "step": 5688 }, { "epoch": 0.6746116447290407, "grad_norm": 1.3680417975062844, "learning_rate": 4.752139406474696e-05, "loss": 0.54, "step": 5689 }, { "epoch": 0.6747302264911657, "grad_norm": 1.178473452164607, "learning_rate": 4.7520351919699835e-05, "loss": 0.3254, "step": 5690 }, { "epoch": 0.6748488082532906, "grad_norm": 1.676362450203553, "learning_rate": 4.751930956704268e-05, "loss": 0.5942, "step": 5691 }, { "epoch": 0.6749673900154156, "grad_norm": 1.8847627242455596, "learning_rate": 4.75182670067851e-05, "loss": 0.6309, "step": 5692 }, { "epoch": 0.6750859717775406, "grad_norm": 1.8031157287255333, "learning_rate": 4.751722423893672e-05, "loss": 0.4821, "step": 5693 }, { "epoch": 0.6752045535396656, "grad_norm": 1.7835041679124368, "learning_rate": 4.751618126350714e-05, "loss": 0.7409, "step": 5694 }, { "epoch": 0.6753231353017906, "grad_norm": 1.1621828788924649, "learning_rate": 4.7515138080505975e-05, "loss": 0.3697, "step": 5695 }, { "epoch": 0.6754417170639155, "grad_norm": 1.3005362854337295, "learning_rate": 4.751409468994286e-05, "loss": 0.5506, "step": 5696 }, { "epoch": 0.6755602988260405, "grad_norm": 1.3127126657447157, "learning_rate": 4.751305109182739e-05, "loss": 0.491, "step": 5697 }, { "epoch": 0.6756788805881655, "grad_norm": 1.6029909907554378, "learning_rate": 4.7512007286169194e-05, "loss": 0.5559, "step": 5698 }, { "epoch": 0.6757974623502905, "grad_norm": 1.6368868822699887, "learning_rate": 4.751096327297791e-05, "loss": 0.7514, "step": 5699 }, { "epoch": 0.6759160441124155, "grad_norm": 1.3486319765784842, "learning_rate": 4.7509919052263135e-05, "loss": 0.4731, "step": 5700 }, { "epoch": 0.6760346258745404, "grad_norm": 1.6520943689150631, "learning_rate": 4.750887462403452e-05, "loss": 0.6394, "step": 5701 }, { "epoch": 0.6761532076366655, "grad_norm": 1.9549205113576387, "learning_rate": 4.750782998830168e-05, "loss": 0.7417, "step": 5702 }, { "epoch": 0.6762717893987905, "grad_norm": 1.4019045517710704, "learning_rate": 4.750678514507424e-05, "loss": 0.5043, "step": 5703 }, { "epoch": 0.6763903711609155, "grad_norm": 1.2331103373410803, "learning_rate": 4.750574009436185e-05, "loss": 0.3457, "step": 5704 }, { "epoch": 0.6765089529230405, "grad_norm": 1.2888198546063856, "learning_rate": 4.750469483617414e-05, "loss": 0.428, "step": 5705 }, { "epoch": 0.6766275346851655, "grad_norm": 1.5027087734449516, "learning_rate": 4.750364937052073e-05, "loss": 0.4914, "step": 5706 }, { "epoch": 0.6767461164472904, "grad_norm": 1.3758288242204866, "learning_rate": 4.7502603697411275e-05, "loss": 0.4446, "step": 5707 }, { "epoch": 0.6768646982094154, "grad_norm": 1.3814191688905333, "learning_rate": 4.75015578168554e-05, "loss": 0.5369, "step": 5708 }, { "epoch": 0.6769832799715404, "grad_norm": 1.39700404214086, "learning_rate": 4.7500511728862765e-05, "loss": 0.6476, "step": 5709 }, { "epoch": 0.6771018617336654, "grad_norm": 1.382339708010986, "learning_rate": 4.7499465433442994e-05, "loss": 0.5057, "step": 5710 }, { "epoch": 0.6772204434957904, "grad_norm": 1.141384171384917, "learning_rate": 4.7498418930605746e-05, "loss": 0.3883, "step": 5711 }, { "epoch": 0.6773390252579153, "grad_norm": 1.519447735802292, "learning_rate": 4.749737222036067e-05, "loss": 0.483, "step": 5712 }, { "epoch": 0.6774576070200403, "grad_norm": 1.4408536670783207, "learning_rate": 4.749632530271741e-05, "loss": 0.6194, "step": 5713 }, { "epoch": 0.6775761887821653, "grad_norm": 1.255692066259264, "learning_rate": 4.749527817768561e-05, "loss": 0.3159, "step": 5714 }, { "epoch": 0.6776947705442903, "grad_norm": 1.6091269946393139, "learning_rate": 4.749423084527494e-05, "loss": 0.4129, "step": 5715 }, { "epoch": 0.6778133523064153, "grad_norm": 1.5598367103172588, "learning_rate": 4.749318330549504e-05, "loss": 0.6161, "step": 5716 }, { "epoch": 0.6779319340685402, "grad_norm": 1.6045261438155523, "learning_rate": 4.7492135558355575e-05, "loss": 0.5549, "step": 5717 }, { "epoch": 0.6780505158306652, "grad_norm": 1.401595317048469, "learning_rate": 4.74910876038662e-05, "loss": 0.4333, "step": 5718 }, { "epoch": 0.6781690975927902, "grad_norm": 1.4040754506770416, "learning_rate": 4.7490039442036586e-05, "loss": 0.4989, "step": 5719 }, { "epoch": 0.6782876793549152, "grad_norm": 1.34523836474768, "learning_rate": 4.748899107287638e-05, "loss": 0.2966, "step": 5720 }, { "epoch": 0.6784062611170402, "grad_norm": 1.7956853970353286, "learning_rate": 4.748794249639524e-05, "loss": 0.7161, "step": 5721 }, { "epoch": 0.6785248428791651, "grad_norm": 1.8319856047771041, "learning_rate": 4.7486893712602864e-05, "loss": 0.6027, "step": 5722 }, { "epoch": 0.6786434246412901, "grad_norm": 1.1677178195233482, "learning_rate": 4.7485844721508904e-05, "loss": 0.3812, "step": 5723 }, { "epoch": 0.6787620064034151, "grad_norm": 1.405859399955405, "learning_rate": 4.7484795523123024e-05, "loss": 0.4021, "step": 5724 }, { "epoch": 0.6788805881655401, "grad_norm": 1.4871355049729083, "learning_rate": 4.748374611745491e-05, "loss": 0.6018, "step": 5725 }, { "epoch": 0.6789991699276652, "grad_norm": 1.6977391140875737, "learning_rate": 4.748269650451422e-05, "loss": 0.6082, "step": 5726 }, { "epoch": 0.6791177516897902, "grad_norm": 1.642297111535135, "learning_rate": 4.7481646684310636e-05, "loss": 0.4946, "step": 5727 }, { "epoch": 0.6792363334519151, "grad_norm": 1.380392274427136, "learning_rate": 4.748059665685385e-05, "loss": 0.4981, "step": 5728 }, { "epoch": 0.6793549152140401, "grad_norm": 1.0693675794851472, "learning_rate": 4.747954642215352e-05, "loss": 0.3795, "step": 5729 }, { "epoch": 0.6794734969761651, "grad_norm": 1.7329321286892665, "learning_rate": 4.747849598021934e-05, "loss": 0.5827, "step": 5730 }, { "epoch": 0.6795920787382901, "grad_norm": 1.553647587651021, "learning_rate": 4.7477445331060995e-05, "loss": 0.5982, "step": 5731 }, { "epoch": 0.6797106605004151, "grad_norm": 1.3527526266461087, "learning_rate": 4.747639447468816e-05, "loss": 0.4878, "step": 5732 }, { "epoch": 0.67982924226254, "grad_norm": 1.561262727665943, "learning_rate": 4.747534341111054e-05, "loss": 0.5842, "step": 5733 }, { "epoch": 0.679947824024665, "grad_norm": 1.4717740938943071, "learning_rate": 4.747429214033781e-05, "loss": 0.5314, "step": 5734 }, { "epoch": 0.68006640578679, "grad_norm": 1.2152414558564422, "learning_rate": 4.747324066237967e-05, "loss": 0.3428, "step": 5735 }, { "epoch": 0.680184987548915, "grad_norm": 1.0286953562102505, "learning_rate": 4.747218897724581e-05, "loss": 0.2679, "step": 5736 }, { "epoch": 0.68030356931104, "grad_norm": 1.471763413017837, "learning_rate": 4.7471137084945914e-05, "loss": 0.613, "step": 5737 }, { "epoch": 0.680422151073165, "grad_norm": 1.025446495488857, "learning_rate": 4.7470084985489706e-05, "loss": 0.3155, "step": 5738 }, { "epoch": 0.6805407328352899, "grad_norm": 1.4335370507410663, "learning_rate": 4.746903267888686e-05, "loss": 0.5187, "step": 5739 }, { "epoch": 0.6806593145974149, "grad_norm": 1.7907229488384093, "learning_rate": 4.746798016514708e-05, "loss": 0.6627, "step": 5740 }, { "epoch": 0.6807778963595399, "grad_norm": 1.629955398676479, "learning_rate": 4.746692744428008e-05, "loss": 0.5257, "step": 5741 }, { "epoch": 0.6808964781216649, "grad_norm": 1.261510817769618, "learning_rate": 4.746587451629557e-05, "loss": 0.3229, "step": 5742 }, { "epoch": 0.6810150598837899, "grad_norm": 1.2617854639955823, "learning_rate": 4.746482138120324e-05, "loss": 0.3327, "step": 5743 }, { "epoch": 0.6811336416459148, "grad_norm": 1.427248778387898, "learning_rate": 4.74637680390128e-05, "loss": 0.526, "step": 5744 }, { "epoch": 0.6812522234080398, "grad_norm": 1.633108309381934, "learning_rate": 4.7462714489733966e-05, "loss": 0.4754, "step": 5745 }, { "epoch": 0.6813708051701648, "grad_norm": 1.5079288725901714, "learning_rate": 4.746166073337646e-05, "loss": 0.5699, "step": 5746 }, { "epoch": 0.6814893869322898, "grad_norm": 1.5063429564856952, "learning_rate": 4.7460606769949975e-05, "loss": 0.5583, "step": 5747 }, { "epoch": 0.6816079686944148, "grad_norm": 1.653845585743617, "learning_rate": 4.745955259946424e-05, "loss": 0.5072, "step": 5748 }, { "epoch": 0.6817265504565397, "grad_norm": 1.5650740215091177, "learning_rate": 4.7458498221928976e-05, "loss": 0.5043, "step": 5749 }, { "epoch": 0.6818451322186647, "grad_norm": 1.3560233151331795, "learning_rate": 4.745744363735389e-05, "loss": 0.3191, "step": 5750 }, { "epoch": 0.6819637139807898, "grad_norm": 1.610234975595318, "learning_rate": 4.7456388845748726e-05, "loss": 0.4708, "step": 5751 }, { "epoch": 0.6820822957429148, "grad_norm": 1.6766647512056025, "learning_rate": 4.745533384712319e-05, "loss": 0.6165, "step": 5752 }, { "epoch": 0.6822008775050398, "grad_norm": 2.362638743684445, "learning_rate": 4.745427864148701e-05, "loss": 0.637, "step": 5753 }, { "epoch": 0.6823194592671648, "grad_norm": 1.7308657564322087, "learning_rate": 4.7453223228849916e-05, "loss": 0.6062, "step": 5754 }, { "epoch": 0.6824380410292897, "grad_norm": 1.3885745290325822, "learning_rate": 4.745216760922164e-05, "loss": 0.4075, "step": 5755 }, { "epoch": 0.6825566227914147, "grad_norm": 1.5995432831001777, "learning_rate": 4.745111178261191e-05, "loss": 0.5634, "step": 5756 }, { "epoch": 0.6826752045535397, "grad_norm": 1.1359592449952887, "learning_rate": 4.745005574903046e-05, "loss": 0.3389, "step": 5757 }, { "epoch": 0.6827937863156647, "grad_norm": 1.2134720756085207, "learning_rate": 4.744899950848703e-05, "loss": 0.4107, "step": 5758 }, { "epoch": 0.6829123680777897, "grad_norm": 1.562839897557436, "learning_rate": 4.7447943060991354e-05, "loss": 0.7269, "step": 5759 }, { "epoch": 0.6830309498399146, "grad_norm": 1.8055121715893887, "learning_rate": 4.7446886406553165e-05, "loss": 0.5841, "step": 5760 }, { "epoch": 0.6831495316020396, "grad_norm": 1.2775953107678784, "learning_rate": 4.744582954518221e-05, "loss": 0.4745, "step": 5761 }, { "epoch": 0.6832681133641646, "grad_norm": 1.1828260212046602, "learning_rate": 4.744477247688823e-05, "loss": 0.3799, "step": 5762 }, { "epoch": 0.6833866951262896, "grad_norm": 1.4155944055942016, "learning_rate": 4.744371520168098e-05, "loss": 0.4373, "step": 5763 }, { "epoch": 0.6835052768884146, "grad_norm": 1.1952576433089332, "learning_rate": 4.744265771957019e-05, "loss": 0.3561, "step": 5764 }, { "epoch": 0.6836238586505395, "grad_norm": 1.3890968927883893, "learning_rate": 4.744160003056562e-05, "loss": 0.3628, "step": 5765 }, { "epoch": 0.6837424404126645, "grad_norm": 1.7639634469047607, "learning_rate": 4.744054213467702e-05, "loss": 0.416, "step": 5766 }, { "epoch": 0.6838610221747895, "grad_norm": 1.4345110767900837, "learning_rate": 4.7439484031914136e-05, "loss": 0.3605, "step": 5767 }, { "epoch": 0.6839796039369145, "grad_norm": 1.9889336129228103, "learning_rate": 4.743842572228673e-05, "loss": 0.6876, "step": 5768 }, { "epoch": 0.6840981856990395, "grad_norm": 1.2993523100532105, "learning_rate": 4.743736720580455e-05, "loss": 0.2974, "step": 5769 }, { "epoch": 0.6842167674611644, "grad_norm": 1.63393060305369, "learning_rate": 4.743630848247735e-05, "loss": 0.5947, "step": 5770 }, { "epoch": 0.6843353492232894, "grad_norm": 2.1189957292946624, "learning_rate": 4.743524955231492e-05, "loss": 0.7836, "step": 5771 }, { "epoch": 0.6844539309854144, "grad_norm": 1.8974490791467324, "learning_rate": 4.7434190415326986e-05, "loss": 0.5277, "step": 5772 }, { "epoch": 0.6845725127475394, "grad_norm": 1.64567402334388, "learning_rate": 4.7433131071523336e-05, "loss": 0.5086, "step": 5773 }, { "epoch": 0.6846910945096644, "grad_norm": 1.5410066066019805, "learning_rate": 4.7432071520913726e-05, "loss": 0.4219, "step": 5774 }, { "epoch": 0.6848096762717893, "grad_norm": 1.4106540472369506, "learning_rate": 4.743101176350791e-05, "loss": 0.472, "step": 5775 }, { "epoch": 0.6849282580339144, "grad_norm": 1.6071839898585567, "learning_rate": 4.742995179931569e-05, "loss": 0.6883, "step": 5776 }, { "epoch": 0.6850468397960394, "grad_norm": 1.16745620335201, "learning_rate": 4.742889162834681e-05, "loss": 0.4036, "step": 5777 }, { "epoch": 0.6851654215581644, "grad_norm": 1.2407815939140547, "learning_rate": 4.742783125061106e-05, "loss": 0.3848, "step": 5778 }, { "epoch": 0.6852840033202894, "grad_norm": 1.4931621214811472, "learning_rate": 4.7426770666118205e-05, "loss": 0.4568, "step": 5779 }, { "epoch": 0.6854025850824144, "grad_norm": 1.0254139308822854, "learning_rate": 4.7425709874878024e-05, "loss": 0.2936, "step": 5780 }, { "epoch": 0.6855211668445393, "grad_norm": 1.523952301381571, "learning_rate": 4.74246488769003e-05, "loss": 0.4815, "step": 5781 }, { "epoch": 0.6856397486066643, "grad_norm": 1.1871311239244178, "learning_rate": 4.742358767219481e-05, "loss": 0.4113, "step": 5782 }, { "epoch": 0.6857583303687893, "grad_norm": 1.3933697555569393, "learning_rate": 4.742252626077135e-05, "loss": 0.5476, "step": 5783 }, { "epoch": 0.6858769121309143, "grad_norm": 1.333752144454184, "learning_rate": 4.742146464263968e-05, "loss": 0.369, "step": 5784 }, { "epoch": 0.6859954938930393, "grad_norm": 1.050728266739734, "learning_rate": 4.742040281780961e-05, "loss": 0.2619, "step": 5785 }, { "epoch": 0.6861140756551642, "grad_norm": 1.5663966866200687, "learning_rate": 4.741934078629092e-05, "loss": 0.5648, "step": 5786 }, { "epoch": 0.6862326574172892, "grad_norm": 1.5306240122144832, "learning_rate": 4.741827854809339e-05, "loss": 0.3877, "step": 5787 }, { "epoch": 0.6863512391794142, "grad_norm": 2.0252082689044117, "learning_rate": 4.7417216103226836e-05, "loss": 0.4912, "step": 5788 }, { "epoch": 0.6864698209415392, "grad_norm": 1.745513464000421, "learning_rate": 4.741615345170103e-05, "loss": 0.5605, "step": 5789 }, { "epoch": 0.6865884027036642, "grad_norm": 1.6721958193318687, "learning_rate": 4.7415090593525777e-05, "loss": 0.6811, "step": 5790 }, { "epoch": 0.6867069844657891, "grad_norm": 1.4525285813379845, "learning_rate": 4.741402752871089e-05, "loss": 0.3884, "step": 5791 }, { "epoch": 0.6868255662279141, "grad_norm": 1.319502126596369, "learning_rate": 4.741296425726614e-05, "loss": 0.4509, "step": 5792 }, { "epoch": 0.6869441479900391, "grad_norm": 1.6559489958152063, "learning_rate": 4.741190077920135e-05, "loss": 0.5439, "step": 5793 }, { "epoch": 0.6870627297521641, "grad_norm": 1.5271169908275757, "learning_rate": 4.741083709452632e-05, "loss": 0.404, "step": 5794 }, { "epoch": 0.6871813115142891, "grad_norm": 1.4201823236537994, "learning_rate": 4.740977320325085e-05, "loss": 0.4206, "step": 5795 }, { "epoch": 0.687299893276414, "grad_norm": 1.3733678658698703, "learning_rate": 4.740870910538475e-05, "loss": 0.3867, "step": 5796 }, { "epoch": 0.687418475038539, "grad_norm": 1.6173386157352332, "learning_rate": 4.740764480093783e-05, "loss": 0.5293, "step": 5797 }, { "epoch": 0.687537056800664, "grad_norm": 1.6228498561005573, "learning_rate": 4.740658028991991e-05, "loss": 0.6621, "step": 5798 }, { "epoch": 0.687655638562789, "grad_norm": 1.6840440604156015, "learning_rate": 4.74055155723408e-05, "loss": 0.6152, "step": 5799 }, { "epoch": 0.687774220324914, "grad_norm": 1.3685307912183806, "learning_rate": 4.7404450648210306e-05, "loss": 0.4294, "step": 5800 }, { "epoch": 0.6878928020870391, "grad_norm": 1.9204184077808022, "learning_rate": 4.740338551753825e-05, "loss": 0.6511, "step": 5801 }, { "epoch": 0.688011383849164, "grad_norm": 1.1336783293914625, "learning_rate": 4.740232018033445e-05, "loss": 0.326, "step": 5802 }, { "epoch": 0.688129965611289, "grad_norm": 1.7780522058694028, "learning_rate": 4.740125463660874e-05, "loss": 0.6763, "step": 5803 }, { "epoch": 0.688248547373414, "grad_norm": 1.7670303771803042, "learning_rate": 4.740018888637092e-05, "loss": 0.7297, "step": 5804 }, { "epoch": 0.688367129135539, "grad_norm": 1.4592300955971549, "learning_rate": 4.7399122929630835e-05, "loss": 0.5218, "step": 5805 }, { "epoch": 0.688485710897664, "grad_norm": 1.3675071710589848, "learning_rate": 4.73980567663983e-05, "loss": 0.5019, "step": 5806 }, { "epoch": 0.688604292659789, "grad_norm": 1.2105402872397526, "learning_rate": 4.739699039668315e-05, "loss": 0.4821, "step": 5807 }, { "epoch": 0.6887228744219139, "grad_norm": 1.4429391343794966, "learning_rate": 4.7395923820495216e-05, "loss": 0.4568, "step": 5808 }, { "epoch": 0.6888414561840389, "grad_norm": 1.7244619432020698, "learning_rate": 4.7394857037844326e-05, "loss": 0.5436, "step": 5809 }, { "epoch": 0.6889600379461639, "grad_norm": 1.5212172194719675, "learning_rate": 4.7393790048740315e-05, "loss": 0.3645, "step": 5810 }, { "epoch": 0.6890786197082889, "grad_norm": 1.3023759315402172, "learning_rate": 4.739272285319302e-05, "loss": 0.383, "step": 5811 }, { "epoch": 0.6891972014704139, "grad_norm": 1.5013055581939643, "learning_rate": 4.739165545121228e-05, "loss": 0.5427, "step": 5812 }, { "epoch": 0.6893157832325388, "grad_norm": 1.3645606562415795, "learning_rate": 4.7390587842807945e-05, "loss": 0.4807, "step": 5813 }, { "epoch": 0.6894343649946638, "grad_norm": 1.7429523223751278, "learning_rate": 4.738952002798983e-05, "loss": 0.5391, "step": 5814 }, { "epoch": 0.6895529467567888, "grad_norm": 1.4612758481364325, "learning_rate": 4.738845200676781e-05, "loss": 0.4797, "step": 5815 }, { "epoch": 0.6896715285189138, "grad_norm": 1.4122391607989557, "learning_rate": 4.738738377915172e-05, "loss": 0.3728, "step": 5816 }, { "epoch": 0.6897901102810388, "grad_norm": 1.2757257102006745, "learning_rate": 4.738631534515139e-05, "loss": 0.4348, "step": 5817 }, { "epoch": 0.6899086920431637, "grad_norm": 1.4468298303499614, "learning_rate": 4.738524670477669e-05, "loss": 0.5377, "step": 5818 }, { "epoch": 0.6900272738052887, "grad_norm": 2.460603324225845, "learning_rate": 4.7384177858037474e-05, "loss": 0.944, "step": 5819 }, { "epoch": 0.6901458555674137, "grad_norm": 1.4839328462933412, "learning_rate": 4.7383108804943574e-05, "loss": 0.6181, "step": 5820 }, { "epoch": 0.6902644373295387, "grad_norm": 1.4411617972886719, "learning_rate": 4.738203954550487e-05, "loss": 0.4295, "step": 5821 }, { "epoch": 0.6903830190916637, "grad_norm": 1.2831902185515272, "learning_rate": 4.73809700797312e-05, "loss": 0.3694, "step": 5822 }, { "epoch": 0.6905016008537886, "grad_norm": 1.9024922599655691, "learning_rate": 4.737990040763244e-05, "loss": 0.628, "step": 5823 }, { "epoch": 0.6906201826159136, "grad_norm": 1.8004161491489894, "learning_rate": 4.737883052921844e-05, "loss": 0.6793, "step": 5824 }, { "epoch": 0.6907387643780386, "grad_norm": 1.3782960645486948, "learning_rate": 4.737776044449906e-05, "loss": 0.5947, "step": 5825 }, { "epoch": 0.6908573461401637, "grad_norm": 1.5623152300599972, "learning_rate": 4.737669015348417e-05, "loss": 0.5731, "step": 5826 }, { "epoch": 0.6909759279022887, "grad_norm": 1.61871095612396, "learning_rate": 4.7375619656183644e-05, "loss": 0.7085, "step": 5827 }, { "epoch": 0.6910945096644137, "grad_norm": 1.1084739368144534, "learning_rate": 4.737454895260734e-05, "loss": 0.368, "step": 5828 }, { "epoch": 0.6912130914265386, "grad_norm": 1.211126403356313, "learning_rate": 4.737347804276513e-05, "loss": 0.5287, "step": 5829 }, { "epoch": 0.6913316731886636, "grad_norm": 0.9986801684586978, "learning_rate": 4.737240692666689e-05, "loss": 0.3306, "step": 5830 }, { "epoch": 0.6914502549507886, "grad_norm": 1.2615411251529267, "learning_rate": 4.73713356043225e-05, "loss": 0.5272, "step": 5831 }, { "epoch": 0.6915688367129136, "grad_norm": 1.528425079750217, "learning_rate": 4.737026407574182e-05, "loss": 0.6236, "step": 5832 }, { "epoch": 0.6916874184750386, "grad_norm": 1.334203849686508, "learning_rate": 4.736919234093474e-05, "loss": 0.4551, "step": 5833 }, { "epoch": 0.6918060002371635, "grad_norm": 1.4331442130400391, "learning_rate": 4.736812039991113e-05, "loss": 0.5355, "step": 5834 }, { "epoch": 0.6919245819992885, "grad_norm": 1.2440821015156103, "learning_rate": 4.736704825268089e-05, "loss": 0.5246, "step": 5835 }, { "epoch": 0.6920431637614135, "grad_norm": 1.4219206266067421, "learning_rate": 4.736597589925389e-05, "loss": 0.6297, "step": 5836 }, { "epoch": 0.6921617455235385, "grad_norm": 1.3086883771654296, "learning_rate": 4.736490333964002e-05, "loss": 0.4994, "step": 5837 }, { "epoch": 0.6922803272856635, "grad_norm": 1.4388139831673052, "learning_rate": 4.7363830573849166e-05, "loss": 0.5046, "step": 5838 }, { "epoch": 0.6923989090477884, "grad_norm": 1.4741280403822496, "learning_rate": 4.736275760189122e-05, "loss": 0.5176, "step": 5839 }, { "epoch": 0.6925174908099134, "grad_norm": 1.4945539866120008, "learning_rate": 4.736168442377607e-05, "loss": 0.5921, "step": 5840 }, { "epoch": 0.6926360725720384, "grad_norm": 1.5291303375321381, "learning_rate": 4.736061103951361e-05, "loss": 0.5943, "step": 5841 }, { "epoch": 0.6927546543341634, "grad_norm": 1.2516326357163647, "learning_rate": 4.735953744911374e-05, "loss": 0.4633, "step": 5842 }, { "epoch": 0.6928732360962884, "grad_norm": 1.3847101409884288, "learning_rate": 4.735846365258635e-05, "loss": 0.4081, "step": 5843 }, { "epoch": 0.6929918178584134, "grad_norm": 1.8363616681263069, "learning_rate": 4.7357389649941355e-05, "loss": 0.5753, "step": 5844 }, { "epoch": 0.6931103996205383, "grad_norm": 1.1289569515749396, "learning_rate": 4.735631544118863e-05, "loss": 0.3255, "step": 5845 }, { "epoch": 0.6932289813826633, "grad_norm": 1.7183448942153636, "learning_rate": 4.735524102633809e-05, "loss": 0.728, "step": 5846 }, { "epoch": 0.6933475631447883, "grad_norm": 1.7629938273922963, "learning_rate": 4.735416640539966e-05, "loss": 0.4884, "step": 5847 }, { "epoch": 0.6934661449069133, "grad_norm": 1.4779273893577392, "learning_rate": 4.7353091578383215e-05, "loss": 0.3993, "step": 5848 }, { "epoch": 0.6935847266690383, "grad_norm": 1.4366976387709554, "learning_rate": 4.735201654529867e-05, "loss": 0.4832, "step": 5849 }, { "epoch": 0.6937033084311632, "grad_norm": 1.6048615250847587, "learning_rate": 4.735094130615595e-05, "loss": 0.4833, "step": 5850 }, { "epoch": 0.6938218901932883, "grad_norm": 1.7905581277549416, "learning_rate": 4.734986586096496e-05, "loss": 0.6759, "step": 5851 }, { "epoch": 0.6939404719554133, "grad_norm": 1.7620761364125563, "learning_rate": 4.734879020973562e-05, "loss": 0.7111, "step": 5852 }, { "epoch": 0.6940590537175383, "grad_norm": 1.3730986457909102, "learning_rate": 4.734771435247783e-05, "loss": 0.5369, "step": 5853 }, { "epoch": 0.6941776354796633, "grad_norm": 1.4499867947033716, "learning_rate": 4.734663828920152e-05, "loss": 0.5283, "step": 5854 }, { "epoch": 0.6942962172417882, "grad_norm": 1.5452711121660943, "learning_rate": 4.734556201991661e-05, "loss": 0.6338, "step": 5855 }, { "epoch": 0.6944147990039132, "grad_norm": 1.374513338033488, "learning_rate": 4.734448554463302e-05, "loss": 0.4917, "step": 5856 }, { "epoch": 0.6945333807660382, "grad_norm": 1.2117202112112078, "learning_rate": 4.734340886336068e-05, "loss": 0.3614, "step": 5857 }, { "epoch": 0.6946519625281632, "grad_norm": 1.1981286588434894, "learning_rate": 4.73423319761095e-05, "loss": 0.4032, "step": 5858 }, { "epoch": 0.6947705442902882, "grad_norm": 1.1396402289290508, "learning_rate": 4.734125488288942e-05, "loss": 0.375, "step": 5859 }, { "epoch": 0.6948891260524132, "grad_norm": 1.2497182299420218, "learning_rate": 4.734017758371037e-05, "loss": 0.416, "step": 5860 }, { "epoch": 0.6950077078145381, "grad_norm": 1.591261943859206, "learning_rate": 4.733910007858228e-05, "loss": 0.6118, "step": 5861 }, { "epoch": 0.6951262895766631, "grad_norm": 1.7658282301585015, "learning_rate": 4.733802236751507e-05, "loss": 0.596, "step": 5862 }, { "epoch": 0.6952448713387881, "grad_norm": 1.6658263073469215, "learning_rate": 4.7336944450518696e-05, "loss": 0.5929, "step": 5863 }, { "epoch": 0.6953634531009131, "grad_norm": 1.7425330592934143, "learning_rate": 4.7335866327603086e-05, "loss": 0.5903, "step": 5864 }, { "epoch": 0.6954820348630381, "grad_norm": 1.6361601819130394, "learning_rate": 4.733478799877817e-05, "loss": 0.5193, "step": 5865 }, { "epoch": 0.695600616625163, "grad_norm": 1.823413887170786, "learning_rate": 4.733370946405391e-05, "loss": 0.6462, "step": 5866 }, { "epoch": 0.695719198387288, "grad_norm": 1.5333372358825776, "learning_rate": 4.7332630723440225e-05, "loss": 0.6184, "step": 5867 }, { "epoch": 0.695837780149413, "grad_norm": 1.3254614858725624, "learning_rate": 4.7331551776947075e-05, "loss": 0.451, "step": 5868 }, { "epoch": 0.695956361911538, "grad_norm": 1.386051915383401, "learning_rate": 4.7330472624584396e-05, "loss": 0.3332, "step": 5869 }, { "epoch": 0.696074943673663, "grad_norm": 1.812729380861544, "learning_rate": 4.732939326636216e-05, "loss": 0.6201, "step": 5870 }, { "epoch": 0.6961935254357879, "grad_norm": 1.3666421580671622, "learning_rate": 4.7328313702290285e-05, "loss": 0.3739, "step": 5871 }, { "epoch": 0.6963121071979129, "grad_norm": 1.415734444424527, "learning_rate": 4.732723393237875e-05, "loss": 0.3941, "step": 5872 }, { "epoch": 0.6964306889600379, "grad_norm": 1.7268802499312779, "learning_rate": 4.732615395663748e-05, "loss": 0.5275, "step": 5873 }, { "epoch": 0.6965492707221629, "grad_norm": 1.5786202907707103, "learning_rate": 4.732507377507646e-05, "loss": 0.5458, "step": 5874 }, { "epoch": 0.6966678524842879, "grad_norm": 1.4921647485503657, "learning_rate": 4.732399338770563e-05, "loss": 0.4846, "step": 5875 }, { "epoch": 0.696786434246413, "grad_norm": 1.3034398436639854, "learning_rate": 4.732291279453497e-05, "loss": 0.3817, "step": 5876 }, { "epoch": 0.6969050160085379, "grad_norm": 1.1344967833578508, "learning_rate": 4.7321831995574414e-05, "loss": 0.4116, "step": 5877 }, { "epoch": 0.6970235977706629, "grad_norm": 1.3882287457673606, "learning_rate": 4.7320750990833954e-05, "loss": 0.439, "step": 5878 }, { "epoch": 0.6971421795327879, "grad_norm": 1.4382200368897877, "learning_rate": 4.731966978032353e-05, "loss": 0.4349, "step": 5879 }, { "epoch": 0.6972607612949129, "grad_norm": 1.6500673192625435, "learning_rate": 4.7318588364053126e-05, "loss": 0.5658, "step": 5880 }, { "epoch": 0.6973793430570379, "grad_norm": 1.2696162915212013, "learning_rate": 4.73175067420327e-05, "loss": 0.3454, "step": 5881 }, { "epoch": 0.6974979248191628, "grad_norm": 1.024509747364139, "learning_rate": 4.731642491427224e-05, "loss": 0.3429, "step": 5882 }, { "epoch": 0.6976165065812878, "grad_norm": 1.665208322971569, "learning_rate": 4.73153428807817e-05, "loss": 0.4705, "step": 5883 }, { "epoch": 0.6977350883434128, "grad_norm": 1.2298907692723107, "learning_rate": 4.7314260641571065e-05, "loss": 0.3774, "step": 5884 }, { "epoch": 0.6978536701055378, "grad_norm": 1.1498817325914694, "learning_rate": 4.731317819665031e-05, "loss": 0.3075, "step": 5885 }, { "epoch": 0.6979722518676628, "grad_norm": 1.502461154985352, "learning_rate": 4.731209554602942e-05, "loss": 0.3905, "step": 5886 }, { "epoch": 0.6980908336297877, "grad_norm": 1.545365812659794, "learning_rate": 4.731101268971836e-05, "loss": 0.4738, "step": 5887 }, { "epoch": 0.6982094153919127, "grad_norm": 1.4836968000950312, "learning_rate": 4.730992962772714e-05, "loss": 0.3957, "step": 5888 }, { "epoch": 0.6983279971540377, "grad_norm": 1.6354786113236879, "learning_rate": 4.7308846360065706e-05, "loss": 0.5732, "step": 5889 }, { "epoch": 0.6984465789161627, "grad_norm": 1.770928217338458, "learning_rate": 4.730776288674408e-05, "loss": 0.5702, "step": 5890 }, { "epoch": 0.6985651606782877, "grad_norm": 2.0275870867200885, "learning_rate": 4.730667920777222e-05, "loss": 0.6628, "step": 5891 }, { "epoch": 0.6986837424404126, "grad_norm": 1.7326626731569186, "learning_rate": 4.730559532316015e-05, "loss": 0.4809, "step": 5892 }, { "epoch": 0.6988023242025376, "grad_norm": 1.902389066678117, "learning_rate": 4.7304511232917836e-05, "loss": 0.5499, "step": 5893 }, { "epoch": 0.6989209059646626, "grad_norm": 1.5488931618511972, "learning_rate": 4.730342693705528e-05, "loss": 0.4387, "step": 5894 }, { "epoch": 0.6990394877267876, "grad_norm": 1.6868137672314207, "learning_rate": 4.7302342435582476e-05, "loss": 0.4819, "step": 5895 }, { "epoch": 0.6991580694889126, "grad_norm": 1.3542243404793528, "learning_rate": 4.730125772850943e-05, "loss": 0.4079, "step": 5896 }, { "epoch": 0.6992766512510376, "grad_norm": 1.5639698899531111, "learning_rate": 4.730017281584613e-05, "loss": 0.5336, "step": 5897 }, { "epoch": 0.6993952330131625, "grad_norm": 1.5811606975490025, "learning_rate": 4.7299087697602587e-05, "loss": 0.4721, "step": 5898 }, { "epoch": 0.6995138147752875, "grad_norm": 1.732402612606219, "learning_rate": 4.7298002373788795e-05, "loss": 0.5615, "step": 5899 }, { "epoch": 0.6996323965374125, "grad_norm": 1.4524728076027031, "learning_rate": 4.729691684441476e-05, "loss": 0.4574, "step": 5900 }, { "epoch": 0.6997509782995376, "grad_norm": 1.197924847755885, "learning_rate": 4.729583110949051e-05, "loss": 0.4067, "step": 5901 }, { "epoch": 0.6998695600616626, "grad_norm": 1.1416280794254032, "learning_rate": 4.7294745169026024e-05, "loss": 0.3958, "step": 5902 }, { "epoch": 0.6999881418237875, "grad_norm": 1.3998865562194645, "learning_rate": 4.729365902303133e-05, "loss": 0.4019, "step": 5903 }, { "epoch": 0.7001067235859125, "grad_norm": 1.3192129175817104, "learning_rate": 4.7292572671516443e-05, "loss": 0.3784, "step": 5904 }, { "epoch": 0.7002253053480375, "grad_norm": 1.5874148871177438, "learning_rate": 4.729148611449137e-05, "loss": 0.5182, "step": 5905 }, { "epoch": 0.7003438871101625, "grad_norm": 1.2384219320303527, "learning_rate": 4.729039935196613e-05, "loss": 0.4747, "step": 5906 }, { "epoch": 0.7004624688722875, "grad_norm": 1.3975944832787983, "learning_rate": 4.7289312383950746e-05, "loss": 0.5296, "step": 5907 }, { "epoch": 0.7005810506344124, "grad_norm": 1.746167561330862, "learning_rate": 4.7288225210455237e-05, "loss": 0.5548, "step": 5908 }, { "epoch": 0.7006996323965374, "grad_norm": 1.4595847540456612, "learning_rate": 4.728713783148961e-05, "loss": 0.5001, "step": 5909 }, { "epoch": 0.7008182141586624, "grad_norm": 1.000173164655837, "learning_rate": 4.7286050247063916e-05, "loss": 0.3067, "step": 5910 }, { "epoch": 0.7009367959207874, "grad_norm": 1.0916913065665843, "learning_rate": 4.728496245718816e-05, "loss": 0.3675, "step": 5911 }, { "epoch": 0.7010553776829124, "grad_norm": 1.345617170667106, "learning_rate": 4.728387446187238e-05, "loss": 0.4811, "step": 5912 }, { "epoch": 0.7011739594450374, "grad_norm": 1.6765521877058907, "learning_rate": 4.728278626112661e-05, "loss": 0.5702, "step": 5913 }, { "epoch": 0.7012925412071623, "grad_norm": 1.7942172738157531, "learning_rate": 4.7281697854960874e-05, "loss": 0.46, "step": 5914 }, { "epoch": 0.7014111229692873, "grad_norm": 1.4679362350645122, "learning_rate": 4.72806092433852e-05, "loss": 0.4439, "step": 5915 }, { "epoch": 0.7015297047314123, "grad_norm": 1.4064822935685775, "learning_rate": 4.727952042640963e-05, "loss": 0.4108, "step": 5916 }, { "epoch": 0.7016482864935373, "grad_norm": 1.6994344565965738, "learning_rate": 4.727843140404421e-05, "loss": 0.4493, "step": 5917 }, { "epoch": 0.7017668682556623, "grad_norm": 2.104917069903942, "learning_rate": 4.727734217629897e-05, "loss": 0.648, "step": 5918 }, { "epoch": 0.7018854500177872, "grad_norm": 1.4274694245798705, "learning_rate": 4.7276252743183945e-05, "loss": 0.4147, "step": 5919 }, { "epoch": 0.7020040317799122, "grad_norm": 1.6318014827359697, "learning_rate": 4.72751631047092e-05, "loss": 0.4388, "step": 5920 }, { "epoch": 0.7021226135420372, "grad_norm": 1.3463002339299923, "learning_rate": 4.727407326088476e-05, "loss": 0.3649, "step": 5921 }, { "epoch": 0.7022411953041622, "grad_norm": 1.3621512858616547, "learning_rate": 4.7272983211720676e-05, "loss": 0.4368, "step": 5922 }, { "epoch": 0.7023597770662872, "grad_norm": 1.485238884881027, "learning_rate": 4.7271892957227e-05, "loss": 0.4407, "step": 5923 }, { "epoch": 0.7024783588284121, "grad_norm": 1.5654029799509774, "learning_rate": 4.727080249741379e-05, "loss": 0.5948, "step": 5924 }, { "epoch": 0.7025969405905371, "grad_norm": 1.6040530458222044, "learning_rate": 4.726971183229109e-05, "loss": 0.606, "step": 5925 }, { "epoch": 0.7027155223526622, "grad_norm": 1.5521128202492758, "learning_rate": 4.7268620961868956e-05, "loss": 0.425, "step": 5926 }, { "epoch": 0.7028341041147872, "grad_norm": 1.4568612287662037, "learning_rate": 4.726752988615743e-05, "loss": 0.4354, "step": 5927 }, { "epoch": 0.7029526858769122, "grad_norm": 1.4386641990131188, "learning_rate": 4.726643860516661e-05, "loss": 0.5651, "step": 5928 }, { "epoch": 0.7030712676390372, "grad_norm": 1.367753465330739, "learning_rate": 4.726534711890651e-05, "loss": 0.4008, "step": 5929 }, { "epoch": 0.7031898494011621, "grad_norm": 1.5000165608998892, "learning_rate": 4.726425542738722e-05, "loss": 0.7037, "step": 5930 }, { "epoch": 0.7033084311632871, "grad_norm": 1.5747477998382635, "learning_rate": 4.72631635306188e-05, "loss": 0.4796, "step": 5931 }, { "epoch": 0.7034270129254121, "grad_norm": 1.3833280612224677, "learning_rate": 4.726207142861131e-05, "loss": 0.4967, "step": 5932 }, { "epoch": 0.7035455946875371, "grad_norm": 1.895884851920397, "learning_rate": 4.726097912137482e-05, "loss": 0.7004, "step": 5933 }, { "epoch": 0.7036641764496621, "grad_norm": 1.410781548415178, "learning_rate": 4.72598866089194e-05, "loss": 0.3574, "step": 5934 }, { "epoch": 0.703782758211787, "grad_norm": 1.373035936774265, "learning_rate": 4.7258793891255124e-05, "loss": 0.4072, "step": 5935 }, { "epoch": 0.703901339973912, "grad_norm": 1.6744581797437508, "learning_rate": 4.7257700968392074e-05, "loss": 0.4675, "step": 5936 }, { "epoch": 0.704019921736037, "grad_norm": 1.1821717302901371, "learning_rate": 4.72566078403403e-05, "loss": 0.3443, "step": 5937 }, { "epoch": 0.704138503498162, "grad_norm": 1.4611561842336995, "learning_rate": 4.72555145071099e-05, "loss": 0.5054, "step": 5938 }, { "epoch": 0.704257085260287, "grad_norm": 1.7927054987476099, "learning_rate": 4.7254420968710945e-05, "loss": 0.6275, "step": 5939 }, { "epoch": 0.7043756670224119, "grad_norm": 1.3376478924166504, "learning_rate": 4.725332722515352e-05, "loss": 0.4088, "step": 5940 }, { "epoch": 0.7044942487845369, "grad_norm": 1.2137412295429737, "learning_rate": 4.725223327644771e-05, "loss": 0.3314, "step": 5941 }, { "epoch": 0.7046128305466619, "grad_norm": 1.4230484652856537, "learning_rate": 4.72511391226036e-05, "loss": 0.4057, "step": 5942 }, { "epoch": 0.7047314123087869, "grad_norm": 1.2226011256534264, "learning_rate": 4.725004476363127e-05, "loss": 0.3479, "step": 5943 }, { "epoch": 0.7048499940709119, "grad_norm": 1.5094247786964183, "learning_rate": 4.724895019954081e-05, "loss": 0.4414, "step": 5944 }, { "epoch": 0.7049685758330368, "grad_norm": 1.4776662812611645, "learning_rate": 4.7247855430342315e-05, "loss": 0.5321, "step": 5945 }, { "epoch": 0.7050871575951618, "grad_norm": 1.1026220443720005, "learning_rate": 4.7246760456045866e-05, "loss": 0.2955, "step": 5946 }, { "epoch": 0.7052057393572868, "grad_norm": 1.2130178216068794, "learning_rate": 4.724566527666158e-05, "loss": 0.2889, "step": 5947 }, { "epoch": 0.7053243211194118, "grad_norm": 1.3021672927987606, "learning_rate": 4.724456989219953e-05, "loss": 0.3943, "step": 5948 }, { "epoch": 0.7054429028815368, "grad_norm": 1.5558689035364544, "learning_rate": 4.7243474302669834e-05, "loss": 0.5451, "step": 5949 }, { "epoch": 0.7055614846436618, "grad_norm": 1.815468782358231, "learning_rate": 4.724237850808257e-05, "loss": 0.6548, "step": 5950 }, { "epoch": 0.7056800664057868, "grad_norm": 2.108305672181066, "learning_rate": 4.724128250844786e-05, "loss": 0.6917, "step": 5951 }, { "epoch": 0.7057986481679118, "grad_norm": 1.6886394623265477, "learning_rate": 4.7240186303775794e-05, "loss": 0.4883, "step": 5952 }, { "epoch": 0.7059172299300368, "grad_norm": 1.4978929230391342, "learning_rate": 4.723908989407648e-05, "loss": 0.4674, "step": 5953 }, { "epoch": 0.7060358116921618, "grad_norm": 1.7491778616671785, "learning_rate": 4.7237993279360036e-05, "loss": 0.5268, "step": 5954 }, { "epoch": 0.7061543934542868, "grad_norm": 1.528305047675958, "learning_rate": 4.723689645963656e-05, "loss": 0.4883, "step": 5955 }, { "epoch": 0.7062729752164117, "grad_norm": 1.2759809321609918, "learning_rate": 4.723579943491617e-05, "loss": 0.3816, "step": 5956 }, { "epoch": 0.7063915569785367, "grad_norm": 1.5935467831374166, "learning_rate": 4.7234702205208974e-05, "loss": 0.5671, "step": 5957 }, { "epoch": 0.7065101387406617, "grad_norm": 1.5548924218578515, "learning_rate": 4.7233604770525095e-05, "loss": 0.5028, "step": 5958 }, { "epoch": 0.7066287205027867, "grad_norm": 1.4301672275800712, "learning_rate": 4.723250713087464e-05, "loss": 0.447, "step": 5959 }, { "epoch": 0.7067473022649117, "grad_norm": 1.482943506343618, "learning_rate": 4.723140928626773e-05, "loss": 0.5458, "step": 5960 }, { "epoch": 0.7068658840270367, "grad_norm": 1.2833827823711732, "learning_rate": 4.7230311236714495e-05, "loss": 0.3465, "step": 5961 }, { "epoch": 0.7069844657891616, "grad_norm": 1.242530107871678, "learning_rate": 4.722921298222505e-05, "loss": 0.4536, "step": 5962 }, { "epoch": 0.7071030475512866, "grad_norm": 1.1598507232416433, "learning_rate": 4.722811452280951e-05, "loss": 0.3775, "step": 5963 }, { "epoch": 0.7072216293134116, "grad_norm": 1.439248138879495, "learning_rate": 4.7227015858478024e-05, "loss": 0.5237, "step": 5964 }, { "epoch": 0.7073402110755366, "grad_norm": 2.5165198662421266, "learning_rate": 4.72259169892407e-05, "loss": 0.727, "step": 5965 }, { "epoch": 0.7074587928376616, "grad_norm": 1.721405742762175, "learning_rate": 4.7224817915107686e-05, "loss": 0.5552, "step": 5966 }, { "epoch": 0.7075773745997865, "grad_norm": 1.3256628256889362, "learning_rate": 4.72237186360891e-05, "loss": 0.4454, "step": 5967 }, { "epoch": 0.7076959563619115, "grad_norm": 1.6477022857390915, "learning_rate": 4.722261915219508e-05, "loss": 0.6967, "step": 5968 }, { "epoch": 0.7078145381240365, "grad_norm": 1.0429301349454834, "learning_rate": 4.722151946343576e-05, "loss": 0.362, "step": 5969 }, { "epoch": 0.7079331198861615, "grad_norm": 1.387324199367693, "learning_rate": 4.722041956982128e-05, "loss": 0.4778, "step": 5970 }, { "epoch": 0.7080517016482865, "grad_norm": 1.5172439270254272, "learning_rate": 4.721931947136179e-05, "loss": 0.651, "step": 5971 }, { "epoch": 0.7081702834104114, "grad_norm": 1.4978675611510586, "learning_rate": 4.721821916806741e-05, "loss": 0.6066, "step": 5972 }, { "epoch": 0.7082888651725364, "grad_norm": 1.1445779510254601, "learning_rate": 4.72171186599483e-05, "loss": 0.3915, "step": 5973 }, { "epoch": 0.7084074469346614, "grad_norm": 1.5565573812420421, "learning_rate": 4.72160179470146e-05, "loss": 0.5772, "step": 5974 }, { "epoch": 0.7085260286967864, "grad_norm": 1.5208319074838659, "learning_rate": 4.721491702927646e-05, "loss": 0.4085, "step": 5975 }, { "epoch": 0.7086446104589115, "grad_norm": 1.5829729189712474, "learning_rate": 4.7213815906744026e-05, "loss": 0.6353, "step": 5976 }, { "epoch": 0.7087631922210365, "grad_norm": 1.3525127662085084, "learning_rate": 4.7212714579427444e-05, "loss": 0.4676, "step": 5977 }, { "epoch": 0.7088817739831614, "grad_norm": 1.4653520896630094, "learning_rate": 4.721161304733688e-05, "loss": 0.5458, "step": 5978 }, { "epoch": 0.7090003557452864, "grad_norm": 1.2492672836237313, "learning_rate": 4.7210511310482476e-05, "loss": 0.35, "step": 5979 }, { "epoch": 0.7091189375074114, "grad_norm": 1.223145669993536, "learning_rate": 4.72094093688744e-05, "loss": 0.3711, "step": 5980 }, { "epoch": 0.7092375192695364, "grad_norm": 1.2580518152776057, "learning_rate": 4.7208307222522805e-05, "loss": 0.4231, "step": 5981 }, { "epoch": 0.7093561010316614, "grad_norm": 1.11427313023881, "learning_rate": 4.720720487143784e-05, "loss": 0.432, "step": 5982 }, { "epoch": 0.7094746827937863, "grad_norm": 1.4003536407768433, "learning_rate": 4.7206102315629685e-05, "loss": 0.5864, "step": 5983 }, { "epoch": 0.7095932645559113, "grad_norm": 1.2311598632068457, "learning_rate": 4.72049995551085e-05, "loss": 0.4259, "step": 5984 }, { "epoch": 0.7097118463180363, "grad_norm": 1.4012744913169684, "learning_rate": 4.7203896589884444e-05, "loss": 0.3723, "step": 5985 }, { "epoch": 0.7098304280801613, "grad_norm": 1.243354222337033, "learning_rate": 4.720279341996769e-05, "loss": 0.4444, "step": 5986 }, { "epoch": 0.7099490098422863, "grad_norm": 1.461754893101291, "learning_rate": 4.720169004536842e-05, "loss": 0.4897, "step": 5987 }, { "epoch": 0.7100675916044112, "grad_norm": 1.3485648434952067, "learning_rate": 4.720058646609678e-05, "loss": 0.3231, "step": 5988 }, { "epoch": 0.7101861733665362, "grad_norm": 1.1887650968885484, "learning_rate": 4.719948268216296e-05, "loss": 0.364, "step": 5989 }, { "epoch": 0.7103047551286612, "grad_norm": 1.6067977576490993, "learning_rate": 4.7198378693577125e-05, "loss": 0.4317, "step": 5990 }, { "epoch": 0.7104233368907862, "grad_norm": 1.2634600163052283, "learning_rate": 4.7197274500349475e-05, "loss": 0.3408, "step": 5991 }, { "epoch": 0.7105419186529112, "grad_norm": 1.5235591373752373, "learning_rate": 4.7196170102490165e-05, "loss": 0.3395, "step": 5992 }, { "epoch": 0.7106605004150361, "grad_norm": 1.3898478875330689, "learning_rate": 4.7195065500009386e-05, "loss": 0.3994, "step": 5993 }, { "epoch": 0.7107790821771611, "grad_norm": 1.2420743272771753, "learning_rate": 4.719396069291733e-05, "loss": 0.3463, "step": 5994 }, { "epoch": 0.7108976639392861, "grad_norm": 1.8728331013760244, "learning_rate": 4.719285568122416e-05, "loss": 0.4337, "step": 5995 }, { "epoch": 0.7110162457014111, "grad_norm": 1.9429262801750444, "learning_rate": 4.719175046494008e-05, "loss": 0.5696, "step": 5996 }, { "epoch": 0.7111348274635361, "grad_norm": 1.5893827750058425, "learning_rate": 4.719064504407526e-05, "loss": 0.4821, "step": 5997 }, { "epoch": 0.711253409225661, "grad_norm": 1.4002367476420166, "learning_rate": 4.718953941863993e-05, "loss": 0.3533, "step": 5998 }, { "epoch": 0.711371990987786, "grad_norm": 1.8679366696872277, "learning_rate": 4.7188433588644246e-05, "loss": 0.6416, "step": 5999 }, { "epoch": 0.711490572749911, "grad_norm": 3.0503118304620536, "learning_rate": 4.718732755409841e-05, "loss": 0.7112, "step": 6000 }, { "epoch": 0.7116091545120361, "grad_norm": 1.7045087574206623, "learning_rate": 4.718622131501263e-05, "loss": 0.6012, "step": 6001 }, { "epoch": 0.7117277362741611, "grad_norm": 1.696515044979516, "learning_rate": 4.7185114871397086e-05, "loss": 0.4959, "step": 6002 }, { "epoch": 0.7118463180362861, "grad_norm": 1.6356284959822913, "learning_rate": 4.718400822326199e-05, "loss": 0.5346, "step": 6003 }, { "epoch": 0.711964899798411, "grad_norm": 1.6989017943775164, "learning_rate": 4.7182901370617546e-05, "loss": 0.58, "step": 6004 }, { "epoch": 0.712083481560536, "grad_norm": 1.6512814543362178, "learning_rate": 4.718179431347395e-05, "loss": 0.6236, "step": 6005 }, { "epoch": 0.712202063322661, "grad_norm": 1.5570988112192905, "learning_rate": 4.7180687051841416e-05, "loss": 0.5012, "step": 6006 }, { "epoch": 0.712320645084786, "grad_norm": 1.2465712638269832, "learning_rate": 4.717957958573015e-05, "loss": 0.4705, "step": 6007 }, { "epoch": 0.712439226846911, "grad_norm": 1.4098136796235567, "learning_rate": 4.7178471915150344e-05, "loss": 0.4736, "step": 6008 }, { "epoch": 0.712557808609036, "grad_norm": 1.6799569338660605, "learning_rate": 4.7177364040112236e-05, "loss": 0.5198, "step": 6009 }, { "epoch": 0.7126763903711609, "grad_norm": 1.3489622627853803, "learning_rate": 4.717625596062602e-05, "loss": 0.5322, "step": 6010 }, { "epoch": 0.7127949721332859, "grad_norm": 1.1519376170329596, "learning_rate": 4.717514767670193e-05, "loss": 0.4263, "step": 6011 }, { "epoch": 0.7129135538954109, "grad_norm": 1.3553575209621211, "learning_rate": 4.717403918835017e-05, "loss": 0.573, "step": 6012 }, { "epoch": 0.7130321356575359, "grad_norm": 1.3496918712492023, "learning_rate": 4.7172930495580945e-05, "loss": 0.4021, "step": 6013 }, { "epoch": 0.7131507174196609, "grad_norm": 1.207950603079253, "learning_rate": 4.717182159840451e-05, "loss": 0.4735, "step": 6014 }, { "epoch": 0.7132692991817858, "grad_norm": 1.5058323829145883, "learning_rate": 4.717071249683106e-05, "loss": 0.4265, "step": 6015 }, { "epoch": 0.7133878809439108, "grad_norm": 1.5216611171563474, "learning_rate": 4.716960319087082e-05, "loss": 0.474, "step": 6016 }, { "epoch": 0.7135064627060358, "grad_norm": 1.2316875303428252, "learning_rate": 4.7168493680534034e-05, "loss": 0.4708, "step": 6017 }, { "epoch": 0.7136250444681608, "grad_norm": 1.365902239593643, "learning_rate": 4.716738396583092e-05, "loss": 0.3592, "step": 6018 }, { "epoch": 0.7137436262302858, "grad_norm": 1.4722511555458055, "learning_rate": 4.7166274046771714e-05, "loss": 0.6663, "step": 6019 }, { "epoch": 0.7138622079924107, "grad_norm": 2.2329775512133896, "learning_rate": 4.716516392336664e-05, "loss": 0.6219, "step": 6020 }, { "epoch": 0.7139807897545357, "grad_norm": 1.385866409848301, "learning_rate": 4.7164053595625934e-05, "loss": 0.499, "step": 6021 }, { "epoch": 0.7140993715166607, "grad_norm": 1.3480872785222318, "learning_rate": 4.716294306355983e-05, "loss": 0.4731, "step": 6022 }, { "epoch": 0.7142179532787857, "grad_norm": 1.4786621663330546, "learning_rate": 4.716183232717857e-05, "loss": 0.5559, "step": 6023 }, { "epoch": 0.7143365350409107, "grad_norm": 1.7232082315966315, "learning_rate": 4.71607213864924e-05, "loss": 0.6616, "step": 6024 }, { "epoch": 0.7144551168030356, "grad_norm": 1.0566713350334818, "learning_rate": 4.7159610241511545e-05, "loss": 0.3917, "step": 6025 }, { "epoch": 0.7145736985651607, "grad_norm": 1.275234150206778, "learning_rate": 4.715849889224626e-05, "loss": 0.3643, "step": 6026 }, { "epoch": 0.7146922803272857, "grad_norm": 1.91446337837352, "learning_rate": 4.715738733870679e-05, "loss": 0.5402, "step": 6027 }, { "epoch": 0.7148108620894107, "grad_norm": 1.2693028220829001, "learning_rate": 4.715627558090338e-05, "loss": 0.4081, "step": 6028 }, { "epoch": 0.7149294438515357, "grad_norm": 1.5687933386689075, "learning_rate": 4.7155163618846274e-05, "loss": 0.4446, "step": 6029 }, { "epoch": 0.7150480256136607, "grad_norm": 1.4604014205736202, "learning_rate": 4.715405145254573e-05, "loss": 0.3999, "step": 6030 }, { "epoch": 0.7151666073757856, "grad_norm": 1.3775410782322095, "learning_rate": 4.715293908201201e-05, "loss": 0.4105, "step": 6031 }, { "epoch": 0.7152851891379106, "grad_norm": 1.569618936490067, "learning_rate": 4.7151826507255346e-05, "loss": 0.5459, "step": 6032 }, { "epoch": 0.7154037709000356, "grad_norm": 1.4234929001217402, "learning_rate": 4.7150713728286014e-05, "loss": 0.5213, "step": 6033 }, { "epoch": 0.7155223526621606, "grad_norm": 1.3599099970854809, "learning_rate": 4.714960074511425e-05, "loss": 0.4318, "step": 6034 }, { "epoch": 0.7156409344242856, "grad_norm": 1.185096464777227, "learning_rate": 4.714848755775034e-05, "loss": 0.4058, "step": 6035 }, { "epoch": 0.7157595161864105, "grad_norm": 1.3312068217066193, "learning_rate": 4.7147374166204525e-05, "loss": 0.341, "step": 6036 }, { "epoch": 0.7158780979485355, "grad_norm": 1.3527698017855552, "learning_rate": 4.714626057048709e-05, "loss": 0.5029, "step": 6037 }, { "epoch": 0.7159966797106605, "grad_norm": 1.7018056504987147, "learning_rate": 4.714514677060829e-05, "loss": 0.5775, "step": 6038 }, { "epoch": 0.7161152614727855, "grad_norm": 1.4503923855099095, "learning_rate": 4.7144032766578386e-05, "loss": 0.3905, "step": 6039 }, { "epoch": 0.7162338432349105, "grad_norm": 1.041306270461812, "learning_rate": 4.714291855840766e-05, "loss": 0.3394, "step": 6040 }, { "epoch": 0.7163524249970354, "grad_norm": 1.2064904598779385, "learning_rate": 4.7141804146106374e-05, "loss": 0.3862, "step": 6041 }, { "epoch": 0.7164710067591604, "grad_norm": 1.6417707836099262, "learning_rate": 4.7140689529684814e-05, "loss": 0.4147, "step": 6042 }, { "epoch": 0.7165895885212854, "grad_norm": 1.34049993247524, "learning_rate": 4.7139574709153236e-05, "loss": 0.4876, "step": 6043 }, { "epoch": 0.7167081702834104, "grad_norm": 1.5479176655938736, "learning_rate": 4.7138459684521935e-05, "loss": 0.4892, "step": 6044 }, { "epoch": 0.7168267520455354, "grad_norm": 1.5159484971048887, "learning_rate": 4.713734445580118e-05, "loss": 0.4567, "step": 6045 }, { "epoch": 0.7169453338076603, "grad_norm": 1.0177933904238412, "learning_rate": 4.713622902300126e-05, "loss": 0.359, "step": 6046 }, { "epoch": 0.7170639155697853, "grad_norm": 1.3862692328244868, "learning_rate": 4.713511338613245e-05, "loss": 0.397, "step": 6047 }, { "epoch": 0.7171824973319103, "grad_norm": 1.3885611342207496, "learning_rate": 4.713399754520505e-05, "loss": 0.3334, "step": 6048 }, { "epoch": 0.7173010790940353, "grad_norm": 2.13315786800925, "learning_rate": 4.713288150022932e-05, "loss": 0.7461, "step": 6049 }, { "epoch": 0.7174196608561603, "grad_norm": 1.5981453321549308, "learning_rate": 4.7131765251215565e-05, "loss": 0.4815, "step": 6050 }, { "epoch": 0.7175382426182854, "grad_norm": 1.913126525779876, "learning_rate": 4.7130648798174085e-05, "loss": 0.5337, "step": 6051 }, { "epoch": 0.7176568243804103, "grad_norm": 1.6175865442186839, "learning_rate": 4.712953214111515e-05, "loss": 0.5532, "step": 6052 }, { "epoch": 0.7177754061425353, "grad_norm": 1.4064948750719986, "learning_rate": 4.712841528004907e-05, "loss": 0.4733, "step": 6053 }, { "epoch": 0.7178939879046603, "grad_norm": 1.1546817956854067, "learning_rate": 4.712729821498614e-05, "loss": 0.3239, "step": 6054 }, { "epoch": 0.7180125696667853, "grad_norm": 1.4473153536356218, "learning_rate": 4.712618094593665e-05, "loss": 0.433, "step": 6055 }, { "epoch": 0.7181311514289103, "grad_norm": 1.5849077953621233, "learning_rate": 4.712506347291091e-05, "loss": 0.4741, "step": 6056 }, { "epoch": 0.7182497331910352, "grad_norm": 1.3400456664627796, "learning_rate": 4.712394579591921e-05, "loss": 0.4599, "step": 6057 }, { "epoch": 0.7183683149531602, "grad_norm": 2.1700411447037706, "learning_rate": 4.7122827914971864e-05, "loss": 0.6045, "step": 6058 }, { "epoch": 0.7184868967152852, "grad_norm": 1.221094944651475, "learning_rate": 4.712170983007917e-05, "loss": 0.4555, "step": 6059 }, { "epoch": 0.7186054784774102, "grad_norm": 1.5110335358415627, "learning_rate": 4.7120591541251444e-05, "loss": 0.6035, "step": 6060 }, { "epoch": 0.7187240602395352, "grad_norm": 1.238183171251342, "learning_rate": 4.711947304849899e-05, "loss": 0.4659, "step": 6061 }, { "epoch": 0.7188426420016601, "grad_norm": 1.3234519559758533, "learning_rate": 4.711835435183211e-05, "loss": 0.5861, "step": 6062 }, { "epoch": 0.7189612237637851, "grad_norm": 1.270018465954381, "learning_rate": 4.711723545126113e-05, "loss": 0.4681, "step": 6063 }, { "epoch": 0.7190798055259101, "grad_norm": 1.565894749531541, "learning_rate": 4.711611634679637e-05, "loss": 0.6446, "step": 6064 }, { "epoch": 0.7191983872880351, "grad_norm": 1.4628545783303284, "learning_rate": 4.711499703844813e-05, "loss": 0.4773, "step": 6065 }, { "epoch": 0.7193169690501601, "grad_norm": 1.3100905789092787, "learning_rate": 4.7113877526226733e-05, "loss": 0.4971, "step": 6066 }, { "epoch": 0.719435550812285, "grad_norm": 1.3882434648454482, "learning_rate": 4.711275781014251e-05, "loss": 0.4591, "step": 6067 }, { "epoch": 0.71955413257441, "grad_norm": 1.6472013814594941, "learning_rate": 4.7111637890205775e-05, "loss": 0.6753, "step": 6068 }, { "epoch": 0.719672714336535, "grad_norm": 1.4302680300845416, "learning_rate": 4.7110517766426855e-05, "loss": 0.4397, "step": 6069 }, { "epoch": 0.71979129609866, "grad_norm": 1.1305982517149848, "learning_rate": 4.710939743881607e-05, "loss": 0.317, "step": 6070 }, { "epoch": 0.719909877860785, "grad_norm": 1.2798688675403849, "learning_rate": 4.710827690738376e-05, "loss": 0.4581, "step": 6071 }, { "epoch": 0.72002845962291, "grad_norm": 1.3004879211362474, "learning_rate": 4.7107156172140244e-05, "loss": 0.5232, "step": 6072 }, { "epoch": 0.7201470413850349, "grad_norm": 1.4320610922750447, "learning_rate": 4.710603523309586e-05, "loss": 0.5267, "step": 6073 }, { "epoch": 0.7202656231471599, "grad_norm": 1.621326979491838, "learning_rate": 4.710491409026093e-05, "loss": 0.56, "step": 6074 }, { "epoch": 0.7203842049092849, "grad_norm": 1.4149332751544246, "learning_rate": 4.7103792743645806e-05, "loss": 0.4817, "step": 6075 }, { "epoch": 0.72050278667141, "grad_norm": 1.2001455843915008, "learning_rate": 4.710267119326082e-05, "loss": 0.3876, "step": 6076 }, { "epoch": 0.720621368433535, "grad_norm": 1.4304629414419554, "learning_rate": 4.710154943911631e-05, "loss": 0.5728, "step": 6077 }, { "epoch": 0.72073995019566, "grad_norm": 1.574098980406714, "learning_rate": 4.7100427481222614e-05, "loss": 0.5394, "step": 6078 }, { "epoch": 0.7208585319577849, "grad_norm": 1.6026621344997338, "learning_rate": 4.709930531959008e-05, "loss": 0.4271, "step": 6079 }, { "epoch": 0.7209771137199099, "grad_norm": 1.2532736098180903, "learning_rate": 4.7098182954229046e-05, "loss": 0.3911, "step": 6080 }, { "epoch": 0.7210956954820349, "grad_norm": 1.6874586795923738, "learning_rate": 4.709706038514987e-05, "loss": 0.5757, "step": 6081 }, { "epoch": 0.7212142772441599, "grad_norm": 1.3503404322641774, "learning_rate": 4.709593761236289e-05, "loss": 0.4268, "step": 6082 }, { "epoch": 0.7213328590062849, "grad_norm": 1.949233066843344, "learning_rate": 4.709481463587846e-05, "loss": 0.5557, "step": 6083 }, { "epoch": 0.7214514407684098, "grad_norm": 1.5681717012804985, "learning_rate": 4.709369145570694e-05, "loss": 0.566, "step": 6084 }, { "epoch": 0.7215700225305348, "grad_norm": 1.9754683427462, "learning_rate": 4.709256807185867e-05, "loss": 0.6635, "step": 6085 }, { "epoch": 0.7216886042926598, "grad_norm": 1.6730690473100094, "learning_rate": 4.709144448434402e-05, "loss": 0.5168, "step": 6086 }, { "epoch": 0.7218071860547848, "grad_norm": 1.5928167540314504, "learning_rate": 4.709032069317334e-05, "loss": 0.5256, "step": 6087 }, { "epoch": 0.7219257678169098, "grad_norm": 1.899830145254083, "learning_rate": 4.708919669835699e-05, "loss": 0.792, "step": 6088 }, { "epoch": 0.7220443495790347, "grad_norm": 1.1766100104843935, "learning_rate": 4.7088072499905345e-05, "loss": 0.3604, "step": 6089 }, { "epoch": 0.7221629313411597, "grad_norm": 1.8544421159246804, "learning_rate": 4.7086948097828744e-05, "loss": 0.7563, "step": 6090 }, { "epoch": 0.7222815131032847, "grad_norm": 1.352637704505466, "learning_rate": 4.708582349213757e-05, "loss": 0.5267, "step": 6091 }, { "epoch": 0.7224000948654097, "grad_norm": 1.7480040727560886, "learning_rate": 4.708469868284219e-05, "loss": 0.5927, "step": 6092 }, { "epoch": 0.7225186766275347, "grad_norm": 1.4510495934835725, "learning_rate": 4.7083573669952975e-05, "loss": 0.5037, "step": 6093 }, { "epoch": 0.7226372583896596, "grad_norm": 1.4686067214467082, "learning_rate": 4.7082448453480284e-05, "loss": 0.6015, "step": 6094 }, { "epoch": 0.7227558401517846, "grad_norm": 1.275748824337452, "learning_rate": 4.70813230334345e-05, "loss": 0.3506, "step": 6095 }, { "epoch": 0.7228744219139096, "grad_norm": 1.4045493416032426, "learning_rate": 4.708019740982599e-05, "loss": 0.494, "step": 6096 }, { "epoch": 0.7229930036760346, "grad_norm": 1.433057706514785, "learning_rate": 4.7079071582665144e-05, "loss": 0.3871, "step": 6097 }, { "epoch": 0.7231115854381596, "grad_norm": 1.096336106973171, "learning_rate": 4.707794555196233e-05, "loss": 0.3349, "step": 6098 }, { "epoch": 0.7232301672002845, "grad_norm": 1.4653026073170068, "learning_rate": 4.707681931772793e-05, "loss": 0.4744, "step": 6099 }, { "epoch": 0.7233487489624095, "grad_norm": 1.4702433964655757, "learning_rate": 4.707569287997233e-05, "loss": 0.5952, "step": 6100 }, { "epoch": 0.7234673307245346, "grad_norm": 1.3176293396136916, "learning_rate": 4.707456623870591e-05, "loss": 0.4497, "step": 6101 }, { "epoch": 0.7235859124866596, "grad_norm": 1.2449112754993978, "learning_rate": 4.707343939393906e-05, "loss": 0.467, "step": 6102 }, { "epoch": 0.7237044942487846, "grad_norm": 1.2954216031080554, "learning_rate": 4.707231234568217e-05, "loss": 0.4541, "step": 6103 }, { "epoch": 0.7238230760109096, "grad_norm": 1.4922824390051814, "learning_rate": 4.707118509394563e-05, "loss": 0.4825, "step": 6104 }, { "epoch": 0.7239416577730345, "grad_norm": 1.6072524405283446, "learning_rate": 4.707005763873982e-05, "loss": 0.6649, "step": 6105 }, { "epoch": 0.7240602395351595, "grad_norm": 1.1182452364273174, "learning_rate": 4.706892998007515e-05, "loss": 0.3064, "step": 6106 }, { "epoch": 0.7241788212972845, "grad_norm": 1.375058087510993, "learning_rate": 4.7067802117962e-05, "loss": 0.3902, "step": 6107 }, { "epoch": 0.7242974030594095, "grad_norm": 1.6848531189701381, "learning_rate": 4.7066674052410784e-05, "loss": 0.4881, "step": 6108 }, { "epoch": 0.7244159848215345, "grad_norm": 1.3644043370041457, "learning_rate": 4.7065545783431894e-05, "loss": 0.5179, "step": 6109 }, { "epoch": 0.7245345665836594, "grad_norm": 1.7573666511370971, "learning_rate": 4.7064417311035725e-05, "loss": 0.5681, "step": 6110 }, { "epoch": 0.7246531483457844, "grad_norm": 2.1447510082031775, "learning_rate": 4.7063288635232685e-05, "loss": 0.6742, "step": 6111 }, { "epoch": 0.7247717301079094, "grad_norm": 1.2430738151333283, "learning_rate": 4.706215975603319e-05, "loss": 0.3815, "step": 6112 }, { "epoch": 0.7248903118700344, "grad_norm": 1.8866850891453646, "learning_rate": 4.706103067344762e-05, "loss": 0.6571, "step": 6113 }, { "epoch": 0.7250088936321594, "grad_norm": 1.6442078766946733, "learning_rate": 4.705990138748641e-05, "loss": 0.5493, "step": 6114 }, { "epoch": 0.7251274753942843, "grad_norm": 1.298919915916663, "learning_rate": 4.7058771898159956e-05, "loss": 0.4686, "step": 6115 }, { "epoch": 0.7252460571564093, "grad_norm": 1.5927623984654276, "learning_rate": 4.705764220547868e-05, "loss": 0.4893, "step": 6116 }, { "epoch": 0.7253646389185343, "grad_norm": 1.4456115134497547, "learning_rate": 4.7056512309452996e-05, "loss": 0.4801, "step": 6117 }, { "epoch": 0.7254832206806593, "grad_norm": 1.0179228604227588, "learning_rate": 4.705538221009331e-05, "loss": 0.3511, "step": 6118 }, { "epoch": 0.7256018024427843, "grad_norm": 1.4692050879565346, "learning_rate": 4.705425190741004e-05, "loss": 0.5707, "step": 6119 }, { "epoch": 0.7257203842049093, "grad_norm": 1.1014355604907669, "learning_rate": 4.7053121401413626e-05, "loss": 0.3457, "step": 6120 }, { "epoch": 0.7258389659670342, "grad_norm": 1.3306585520792216, "learning_rate": 4.7051990692114465e-05, "loss": 0.4919, "step": 6121 }, { "epoch": 0.7259575477291592, "grad_norm": 1.1324490986329399, "learning_rate": 4.705085977952299e-05, "loss": 0.4341, "step": 6122 }, { "epoch": 0.7260761294912842, "grad_norm": 1.366145845239357, "learning_rate": 4.704972866364964e-05, "loss": 0.4886, "step": 6123 }, { "epoch": 0.7261947112534092, "grad_norm": 1.4748933511031468, "learning_rate": 4.704859734450483e-05, "loss": 0.5491, "step": 6124 }, { "epoch": 0.7263132930155342, "grad_norm": 1.4539515014673203, "learning_rate": 4.704746582209898e-05, "loss": 0.6176, "step": 6125 }, { "epoch": 0.7264318747776592, "grad_norm": 1.1830381252103113, "learning_rate": 4.704633409644254e-05, "loss": 0.5141, "step": 6126 }, { "epoch": 0.7265504565397842, "grad_norm": 1.526427468684614, "learning_rate": 4.704520216754593e-05, "loss": 0.5824, "step": 6127 }, { "epoch": 0.7266690383019092, "grad_norm": 1.2105971405103786, "learning_rate": 4.704407003541959e-05, "loss": 0.3537, "step": 6128 }, { "epoch": 0.7267876200640342, "grad_norm": 1.2663308055695888, "learning_rate": 4.7042937700073955e-05, "loss": 0.4999, "step": 6129 }, { "epoch": 0.7269062018261592, "grad_norm": 1.2129383480140594, "learning_rate": 4.704180516151947e-05, "loss": 0.344, "step": 6130 }, { "epoch": 0.7270247835882842, "grad_norm": 1.5349390517440467, "learning_rate": 4.7040672419766566e-05, "loss": 0.4624, "step": 6131 }, { "epoch": 0.7271433653504091, "grad_norm": 1.5005467457808328, "learning_rate": 4.703953947482569e-05, "loss": 0.4436, "step": 6132 }, { "epoch": 0.7272619471125341, "grad_norm": 1.5601081139531303, "learning_rate": 4.70384063267073e-05, "loss": 0.5558, "step": 6133 }, { "epoch": 0.7273805288746591, "grad_norm": 1.7806659284043609, "learning_rate": 4.703727297542181e-05, "loss": 0.4965, "step": 6134 }, { "epoch": 0.7274991106367841, "grad_norm": 1.4418226123121622, "learning_rate": 4.70361394209797e-05, "loss": 0.4394, "step": 6135 }, { "epoch": 0.727617692398909, "grad_norm": 1.9574513130486417, "learning_rate": 4.703500566339139e-05, "loss": 0.6418, "step": 6136 }, { "epoch": 0.727736274161034, "grad_norm": 1.6760989125310386, "learning_rate": 4.7033871702667373e-05, "loss": 0.5388, "step": 6137 }, { "epoch": 0.727854855923159, "grad_norm": 1.074912096626831, "learning_rate": 4.7032737538818063e-05, "loss": 0.3293, "step": 6138 }, { "epoch": 0.727973437685284, "grad_norm": 1.237697926830716, "learning_rate": 4.703160317185393e-05, "loss": 0.4246, "step": 6139 }, { "epoch": 0.728092019447409, "grad_norm": 1.0479735513351773, "learning_rate": 4.703046860178544e-05, "loss": 0.3414, "step": 6140 }, { "epoch": 0.728210601209534, "grad_norm": 1.6145324244652244, "learning_rate": 4.702933382862304e-05, "loss": 0.5514, "step": 6141 }, { "epoch": 0.7283291829716589, "grad_norm": 1.6033699810295368, "learning_rate": 4.70281988523772e-05, "loss": 0.4655, "step": 6142 }, { "epoch": 0.7284477647337839, "grad_norm": 1.79396504532826, "learning_rate": 4.7027063673058385e-05, "loss": 0.4765, "step": 6143 }, { "epoch": 0.7285663464959089, "grad_norm": 1.04439722632225, "learning_rate": 4.7025928290677045e-05, "loss": 0.3362, "step": 6144 }, { "epoch": 0.7286849282580339, "grad_norm": 1.5059568415885345, "learning_rate": 4.702479270524366e-05, "loss": 0.4121, "step": 6145 }, { "epoch": 0.7288035100201589, "grad_norm": 1.423702517425788, "learning_rate": 4.70236569167687e-05, "loss": 0.5228, "step": 6146 }, { "epoch": 0.7289220917822838, "grad_norm": 1.4425488350041447, "learning_rate": 4.702252092526262e-05, "loss": 0.4691, "step": 6147 }, { "epoch": 0.7290406735444088, "grad_norm": 1.5632808079508402, "learning_rate": 4.7021384730735906e-05, "loss": 0.5509, "step": 6148 }, { "epoch": 0.7291592553065338, "grad_norm": 1.1668290562334627, "learning_rate": 4.7020248333199037e-05, "loss": 0.2994, "step": 6149 }, { "epoch": 0.7292778370686588, "grad_norm": 1.4527775611446538, "learning_rate": 4.701911173266247e-05, "loss": 0.4897, "step": 6150 }, { "epoch": 0.7293964188307839, "grad_norm": 1.4109943976971817, "learning_rate": 4.7017974929136696e-05, "loss": 0.4182, "step": 6151 }, { "epoch": 0.7295150005929089, "grad_norm": 1.5438995468183552, "learning_rate": 4.70168379226322e-05, "loss": 0.5807, "step": 6152 }, { "epoch": 0.7296335823550338, "grad_norm": 1.4552135237747463, "learning_rate": 4.701570071315945e-05, "loss": 0.4432, "step": 6153 }, { "epoch": 0.7297521641171588, "grad_norm": 0.9821154338245045, "learning_rate": 4.7014563300728945e-05, "loss": 0.3268, "step": 6154 }, { "epoch": 0.7298707458792838, "grad_norm": 1.7380504818896743, "learning_rate": 4.701342568535115e-05, "loss": 0.6064, "step": 6155 }, { "epoch": 0.7299893276414088, "grad_norm": 1.554833338188221, "learning_rate": 4.7012287867036574e-05, "loss": 0.4653, "step": 6156 }, { "epoch": 0.7301079094035338, "grad_norm": 1.4807020832089637, "learning_rate": 4.70111498457957e-05, "loss": 0.5737, "step": 6157 }, { "epoch": 0.7302264911656587, "grad_norm": 1.0859189164372294, "learning_rate": 4.7010011621639014e-05, "loss": 0.3405, "step": 6158 }, { "epoch": 0.7303450729277837, "grad_norm": 1.6008969398029698, "learning_rate": 4.700887319457701e-05, "loss": 0.5306, "step": 6159 }, { "epoch": 0.7304636546899087, "grad_norm": 1.390401548286397, "learning_rate": 4.700773456462018e-05, "loss": 0.38, "step": 6160 }, { "epoch": 0.7305822364520337, "grad_norm": 1.34474864340512, "learning_rate": 4.700659573177903e-05, "loss": 0.423, "step": 6161 }, { "epoch": 0.7307008182141587, "grad_norm": 1.0637285030638972, "learning_rate": 4.700545669606405e-05, "loss": 0.3454, "step": 6162 }, { "epoch": 0.7308193999762836, "grad_norm": 1.1108455204122984, "learning_rate": 4.700431745748575e-05, "loss": 0.3514, "step": 6163 }, { "epoch": 0.7309379817384086, "grad_norm": 1.315841038732577, "learning_rate": 4.700317801605462e-05, "loss": 0.4903, "step": 6164 }, { "epoch": 0.7310565635005336, "grad_norm": 1.4186120888516303, "learning_rate": 4.700203837178118e-05, "loss": 0.5242, "step": 6165 }, { "epoch": 0.7311751452626586, "grad_norm": 1.2177020366633406, "learning_rate": 4.700089852467592e-05, "loss": 0.3933, "step": 6166 }, { "epoch": 0.7312937270247836, "grad_norm": 1.48942028659979, "learning_rate": 4.699975847474936e-05, "loss": 0.3509, "step": 6167 }, { "epoch": 0.7314123087869085, "grad_norm": 1.600936471918868, "learning_rate": 4.6998618222011994e-05, "loss": 0.5253, "step": 6168 }, { "epoch": 0.7315308905490335, "grad_norm": 1.8584273433299079, "learning_rate": 4.699747776647435e-05, "loss": 0.521, "step": 6169 }, { "epoch": 0.7316494723111585, "grad_norm": 1.4689302738485108, "learning_rate": 4.6996337108146934e-05, "loss": 0.4587, "step": 6170 }, { "epoch": 0.7317680540732835, "grad_norm": 1.3531526689751616, "learning_rate": 4.6995196247040265e-05, "loss": 0.3281, "step": 6171 }, { "epoch": 0.7318866358354085, "grad_norm": 1.13607446537375, "learning_rate": 4.6994055183164866e-05, "loss": 0.3423, "step": 6172 }, { "epoch": 0.7320052175975335, "grad_norm": 1.0883323003081955, "learning_rate": 4.6992913916531246e-05, "loss": 0.3638, "step": 6173 }, { "epoch": 0.7321237993596584, "grad_norm": 1.2238382530147618, "learning_rate": 4.6991772447149916e-05, "loss": 0.502, "step": 6174 }, { "epoch": 0.7322423811217834, "grad_norm": 1.6985040053128915, "learning_rate": 4.699063077503143e-05, "loss": 0.6741, "step": 6175 }, { "epoch": 0.7323609628839085, "grad_norm": 1.3960324352697886, "learning_rate": 4.6989488900186286e-05, "loss": 0.4824, "step": 6176 }, { "epoch": 0.7324795446460335, "grad_norm": 1.5055420216694377, "learning_rate": 4.698834682262502e-05, "loss": 0.5284, "step": 6177 }, { "epoch": 0.7325981264081585, "grad_norm": 1.7294854946126987, "learning_rate": 4.6987204542358165e-05, "loss": 0.4152, "step": 6178 }, { "epoch": 0.7327167081702834, "grad_norm": 2.274259617256788, "learning_rate": 4.6986062059396244e-05, "loss": 0.7068, "step": 6179 }, { "epoch": 0.7328352899324084, "grad_norm": 1.443472081441776, "learning_rate": 4.698491937374979e-05, "loss": 0.5453, "step": 6180 }, { "epoch": 0.7329538716945334, "grad_norm": 1.3129736414853164, "learning_rate": 4.6983776485429334e-05, "loss": 0.4856, "step": 6181 }, { "epoch": 0.7330724534566584, "grad_norm": 1.5444262671347668, "learning_rate": 4.698263339444543e-05, "loss": 0.5288, "step": 6182 }, { "epoch": 0.7331910352187834, "grad_norm": 1.8276854081251293, "learning_rate": 4.698149010080859e-05, "loss": 0.5939, "step": 6183 }, { "epoch": 0.7333096169809084, "grad_norm": 1.2800965119769772, "learning_rate": 4.6980346604529373e-05, "loss": 0.3721, "step": 6184 }, { "epoch": 0.7334281987430333, "grad_norm": 1.1686261227419101, "learning_rate": 4.697920290561831e-05, "loss": 0.4384, "step": 6185 }, { "epoch": 0.7335467805051583, "grad_norm": 1.2403214374962317, "learning_rate": 4.697805900408596e-05, "loss": 0.3816, "step": 6186 }, { "epoch": 0.7336653622672833, "grad_norm": 1.2670523518517676, "learning_rate": 4.6976914899942846e-05, "loss": 0.3659, "step": 6187 }, { "epoch": 0.7337839440294083, "grad_norm": 1.63392549491091, "learning_rate": 4.697577059319953e-05, "loss": 0.6715, "step": 6188 }, { "epoch": 0.7339025257915333, "grad_norm": 1.175237018573613, "learning_rate": 4.697462608386656e-05, "loss": 0.3582, "step": 6189 }, { "epoch": 0.7340211075536582, "grad_norm": 1.2353414323680718, "learning_rate": 4.697348137195449e-05, "loss": 0.3591, "step": 6190 }, { "epoch": 0.7341396893157832, "grad_norm": 1.5115238865837388, "learning_rate": 4.697233645747385e-05, "loss": 0.5769, "step": 6191 }, { "epoch": 0.7342582710779082, "grad_norm": 1.860457604990858, "learning_rate": 4.697119134043523e-05, "loss": 0.6327, "step": 6192 }, { "epoch": 0.7343768528400332, "grad_norm": 1.1299392051405932, "learning_rate": 4.6970046020849165e-05, "loss": 0.3577, "step": 6193 }, { "epoch": 0.7344954346021582, "grad_norm": 1.5139762648327395, "learning_rate": 4.6968900498726206e-05, "loss": 0.566, "step": 6194 }, { "epoch": 0.7346140163642831, "grad_norm": 1.6584783683673403, "learning_rate": 4.696775477407693e-05, "loss": 0.5875, "step": 6195 }, { "epoch": 0.7347325981264081, "grad_norm": 1.3178724581766523, "learning_rate": 4.69666088469119e-05, "loss": 0.3837, "step": 6196 }, { "epoch": 0.7348511798885331, "grad_norm": 1.5518933656315026, "learning_rate": 4.696546271724167e-05, "loss": 0.523, "step": 6197 }, { "epoch": 0.7349697616506581, "grad_norm": 1.4776493106478672, "learning_rate": 4.6964316385076815e-05, "loss": 0.5226, "step": 6198 }, { "epoch": 0.7350883434127831, "grad_norm": 1.2762043009316175, "learning_rate": 4.696316985042788e-05, "loss": 0.4173, "step": 6199 }, { "epoch": 0.735206925174908, "grad_norm": 1.5719072045010867, "learning_rate": 4.696202311330547e-05, "loss": 0.511, "step": 6200 }, { "epoch": 0.7353255069370331, "grad_norm": 1.9067606581198728, "learning_rate": 4.696087617372012e-05, "loss": 0.7745, "step": 6201 }, { "epoch": 0.7354440886991581, "grad_norm": 1.722439127736495, "learning_rate": 4.6959729031682444e-05, "loss": 0.748, "step": 6202 }, { "epoch": 0.7355626704612831, "grad_norm": 1.1810700100815466, "learning_rate": 4.695858168720298e-05, "loss": 0.4605, "step": 6203 }, { "epoch": 0.7356812522234081, "grad_norm": 1.150253266435276, "learning_rate": 4.695743414029233e-05, "loss": 0.3639, "step": 6204 }, { "epoch": 0.7357998339855331, "grad_norm": 1.2596372997562701, "learning_rate": 4.695628639096106e-05, "loss": 0.4115, "step": 6205 }, { "epoch": 0.735918415747658, "grad_norm": 1.3016674762842935, "learning_rate": 4.695513843921975e-05, "loss": 0.3847, "step": 6206 }, { "epoch": 0.736036997509783, "grad_norm": 1.1780769422285366, "learning_rate": 4.6953990285078985e-05, "loss": 0.3555, "step": 6207 }, { "epoch": 0.736155579271908, "grad_norm": 1.2754956577478063, "learning_rate": 4.695284192854936e-05, "loss": 0.4648, "step": 6208 }, { "epoch": 0.736274161034033, "grad_norm": 1.1123335540746864, "learning_rate": 4.6951693369641445e-05, "loss": 0.3568, "step": 6209 }, { "epoch": 0.736392742796158, "grad_norm": 1.4984428450490659, "learning_rate": 4.695054460836584e-05, "loss": 0.4622, "step": 6210 }, { "epoch": 0.7365113245582829, "grad_norm": 1.139312819189427, "learning_rate": 4.694939564473313e-05, "loss": 0.3493, "step": 6211 }, { "epoch": 0.7366299063204079, "grad_norm": 1.3611111494634414, "learning_rate": 4.694824647875391e-05, "loss": 0.604, "step": 6212 }, { "epoch": 0.7367484880825329, "grad_norm": 1.3571835127280327, "learning_rate": 4.694709711043877e-05, "loss": 0.5375, "step": 6213 }, { "epoch": 0.7368670698446579, "grad_norm": 1.6814267035413153, "learning_rate": 4.6945947539798304e-05, "loss": 0.4604, "step": 6214 }, { "epoch": 0.7369856516067829, "grad_norm": 1.2757036064202247, "learning_rate": 4.694479776684312e-05, "loss": 0.4486, "step": 6215 }, { "epoch": 0.7371042333689078, "grad_norm": 1.5798852454912613, "learning_rate": 4.694364779158381e-05, "loss": 0.4504, "step": 6216 }, { "epoch": 0.7372228151310328, "grad_norm": 1.3489750600329697, "learning_rate": 4.694249761403098e-05, "loss": 0.3882, "step": 6217 }, { "epoch": 0.7373413968931578, "grad_norm": 1.4958926842604283, "learning_rate": 4.6941347234195224e-05, "loss": 0.3677, "step": 6218 }, { "epoch": 0.7374599786552828, "grad_norm": 1.6205198729670163, "learning_rate": 4.694019665208715e-05, "loss": 0.5831, "step": 6219 }, { "epoch": 0.7375785604174078, "grad_norm": 1.4242864209446198, "learning_rate": 4.693904586771738e-05, "loss": 0.371, "step": 6220 }, { "epoch": 0.7376971421795327, "grad_norm": 2.060640282675291, "learning_rate": 4.69378948810965e-05, "loss": 0.6788, "step": 6221 }, { "epoch": 0.7378157239416577, "grad_norm": 1.5737639808977788, "learning_rate": 4.693674369223513e-05, "loss": 0.5484, "step": 6222 }, { "epoch": 0.7379343057037827, "grad_norm": 1.4721894584509985, "learning_rate": 4.693559230114389e-05, "loss": 0.5108, "step": 6223 }, { "epoch": 0.7380528874659077, "grad_norm": 1.780398861560955, "learning_rate": 4.693444070783338e-05, "loss": 0.5076, "step": 6224 }, { "epoch": 0.7381714692280328, "grad_norm": 1.4972951523777451, "learning_rate": 4.693328891231423e-05, "loss": 0.3972, "step": 6225 }, { "epoch": 0.7382900509901578, "grad_norm": 1.6700400692469686, "learning_rate": 4.6932136914597055e-05, "loss": 0.4477, "step": 6226 }, { "epoch": 0.7384086327522827, "grad_norm": 1.9875202458537773, "learning_rate": 4.693098471469247e-05, "loss": 0.7133, "step": 6227 }, { "epoch": 0.7385272145144077, "grad_norm": 1.5074036409175564, "learning_rate": 4.69298323126111e-05, "loss": 0.5093, "step": 6228 }, { "epoch": 0.7386457962765327, "grad_norm": 1.5168591566486045, "learning_rate": 4.692867970836357e-05, "loss": 0.6296, "step": 6229 }, { "epoch": 0.7387643780386577, "grad_norm": 1.5856867070195186, "learning_rate": 4.6927526901960505e-05, "loss": 0.5616, "step": 6230 }, { "epoch": 0.7388829598007827, "grad_norm": 1.3820906911864215, "learning_rate": 4.6926373893412526e-05, "loss": 0.4506, "step": 6231 }, { "epoch": 0.7390015415629076, "grad_norm": 1.166112472442942, "learning_rate": 4.692522068273027e-05, "loss": 0.3213, "step": 6232 }, { "epoch": 0.7391201233250326, "grad_norm": 1.9808266662163505, "learning_rate": 4.6924067269924365e-05, "loss": 0.735, "step": 6233 }, { "epoch": 0.7392387050871576, "grad_norm": 1.1112223004449522, "learning_rate": 4.692291365500545e-05, "loss": 0.3481, "step": 6234 }, { "epoch": 0.7393572868492826, "grad_norm": 1.6713131974593587, "learning_rate": 4.6921759837984145e-05, "loss": 0.5537, "step": 6235 }, { "epoch": 0.7394758686114076, "grad_norm": 1.523757764031677, "learning_rate": 4.69206058188711e-05, "loss": 0.4888, "step": 6236 }, { "epoch": 0.7395944503735326, "grad_norm": 1.5293078920365217, "learning_rate": 4.6919451597676954e-05, "loss": 0.4426, "step": 6237 }, { "epoch": 0.7397130321356575, "grad_norm": 1.5612062343967632, "learning_rate": 4.691829717441234e-05, "loss": 0.6002, "step": 6238 }, { "epoch": 0.7398316138977825, "grad_norm": 1.4664504308550452, "learning_rate": 4.691714254908791e-05, "loss": 0.4037, "step": 6239 }, { "epoch": 0.7399501956599075, "grad_norm": 1.4605012174306575, "learning_rate": 4.6915987721714294e-05, "loss": 0.516, "step": 6240 }, { "epoch": 0.7400687774220325, "grad_norm": 1.3064238209515766, "learning_rate": 4.691483269230215e-05, "loss": 0.3833, "step": 6241 }, { "epoch": 0.7401873591841575, "grad_norm": 1.7329444590012557, "learning_rate": 4.6913677460862124e-05, "loss": 0.4952, "step": 6242 }, { "epoch": 0.7403059409462824, "grad_norm": 1.3613866062947992, "learning_rate": 4.691252202740486e-05, "loss": 0.4433, "step": 6243 }, { "epoch": 0.7404245227084074, "grad_norm": 1.4322400382294767, "learning_rate": 4.6911366391941016e-05, "loss": 0.6692, "step": 6244 }, { "epoch": 0.7405431044705324, "grad_norm": 1.8028155918342084, "learning_rate": 4.691021055448125e-05, "loss": 0.5646, "step": 6245 }, { "epoch": 0.7406616862326574, "grad_norm": 1.2414361610253701, "learning_rate": 4.69090545150362e-05, "loss": 0.3502, "step": 6246 }, { "epoch": 0.7407802679947824, "grad_norm": 1.3857572150620012, "learning_rate": 4.690789827361654e-05, "loss": 0.4687, "step": 6247 }, { "epoch": 0.7408988497569073, "grad_norm": 1.1439656211907105, "learning_rate": 4.6906741830232924e-05, "loss": 0.3641, "step": 6248 }, { "epoch": 0.7410174315190323, "grad_norm": 1.2691498579985017, "learning_rate": 4.690558518489601e-05, "loss": 0.3836, "step": 6249 }, { "epoch": 0.7411360132811574, "grad_norm": 1.7393228924994169, "learning_rate": 4.6904428337616465e-05, "loss": 0.547, "step": 6250 }, { "epoch": 0.7412545950432824, "grad_norm": 1.4299207937025349, "learning_rate": 4.690327128840495e-05, "loss": 0.5943, "step": 6251 }, { "epoch": 0.7413731768054074, "grad_norm": 1.583028220196199, "learning_rate": 4.6902114037272124e-05, "loss": 0.4931, "step": 6252 }, { "epoch": 0.7414917585675324, "grad_norm": 1.4690036246475067, "learning_rate": 4.690095658422868e-05, "loss": 0.6038, "step": 6253 }, { "epoch": 0.7416103403296573, "grad_norm": 1.3607355683654334, "learning_rate": 4.689979892928526e-05, "loss": 0.4787, "step": 6254 }, { "epoch": 0.7417289220917823, "grad_norm": 1.3121263825412204, "learning_rate": 4.689864107245256e-05, "loss": 0.3524, "step": 6255 }, { "epoch": 0.7418475038539073, "grad_norm": 1.349502106272725, "learning_rate": 4.689748301374124e-05, "loss": 0.5354, "step": 6256 }, { "epoch": 0.7419660856160323, "grad_norm": 1.2986048992157135, "learning_rate": 4.689632475316198e-05, "loss": 0.4841, "step": 6257 }, { "epoch": 0.7420846673781573, "grad_norm": 1.4643493803807395, "learning_rate": 4.6895166290725456e-05, "loss": 0.5016, "step": 6258 }, { "epoch": 0.7422032491402822, "grad_norm": 1.4210685599000255, "learning_rate": 4.689400762644235e-05, "loss": 0.4371, "step": 6259 }, { "epoch": 0.7423218309024072, "grad_norm": 1.574054715333771, "learning_rate": 4.6892848760323334e-05, "loss": 0.6711, "step": 6260 }, { "epoch": 0.7424404126645322, "grad_norm": 1.742093082502485, "learning_rate": 4.6891689692379104e-05, "loss": 0.6061, "step": 6261 }, { "epoch": 0.7425589944266572, "grad_norm": 1.1977217080585496, "learning_rate": 4.6890530422620336e-05, "loss": 0.398, "step": 6262 }, { "epoch": 0.7426775761887822, "grad_norm": 1.0460934136921634, "learning_rate": 4.688937095105773e-05, "loss": 0.4373, "step": 6263 }, { "epoch": 0.7427961579509071, "grad_norm": 1.2671592083371985, "learning_rate": 4.688821127770197e-05, "loss": 0.3046, "step": 6264 }, { "epoch": 0.7429147397130321, "grad_norm": 1.3426680731401661, "learning_rate": 4.688705140256373e-05, "loss": 0.4547, "step": 6265 }, { "epoch": 0.7430333214751571, "grad_norm": 1.521964705164523, "learning_rate": 4.688589132565372e-05, "loss": 0.5848, "step": 6266 }, { "epoch": 0.7431519032372821, "grad_norm": 1.4341454164559786, "learning_rate": 4.6884731046982634e-05, "loss": 0.5758, "step": 6267 }, { "epoch": 0.7432704849994071, "grad_norm": 1.4419929430364473, "learning_rate": 4.6883570566561166e-05, "loss": 0.3937, "step": 6268 }, { "epoch": 0.743389066761532, "grad_norm": 1.3850729824248333, "learning_rate": 4.6882409884400004e-05, "loss": 0.3761, "step": 6269 }, { "epoch": 0.743507648523657, "grad_norm": 1.4464350177976217, "learning_rate": 4.688124900050986e-05, "loss": 0.5444, "step": 6270 }, { "epoch": 0.743626230285782, "grad_norm": 1.3448928629987063, "learning_rate": 4.688008791490144e-05, "loss": 0.4035, "step": 6271 }, { "epoch": 0.743744812047907, "grad_norm": 1.2816235349919038, "learning_rate": 4.687892662758543e-05, "loss": 0.3779, "step": 6272 }, { "epoch": 0.743863393810032, "grad_norm": 1.1283749770743732, "learning_rate": 4.6877765138572555e-05, "loss": 0.3269, "step": 6273 }, { "epoch": 0.743981975572157, "grad_norm": 1.5368536326371092, "learning_rate": 4.6876603447873515e-05, "loss": 0.4891, "step": 6274 }, { "epoch": 0.744100557334282, "grad_norm": 1.6549517169697632, "learning_rate": 4.6875441555499014e-05, "loss": 0.4507, "step": 6275 }, { "epoch": 0.744219139096407, "grad_norm": 1.6954866653335727, "learning_rate": 4.687427946145976e-05, "loss": 0.5253, "step": 6276 }, { "epoch": 0.744337720858532, "grad_norm": 1.453392360831875, "learning_rate": 4.687311716576648e-05, "loss": 0.475, "step": 6277 }, { "epoch": 0.744456302620657, "grad_norm": 1.737531614806573, "learning_rate": 4.6871954668429886e-05, "loss": 0.5169, "step": 6278 }, { "epoch": 0.744574884382782, "grad_norm": 1.3979650710063858, "learning_rate": 4.6870791969460685e-05, "loss": 0.4533, "step": 6279 }, { "epoch": 0.744693466144907, "grad_norm": 2.0513704355663553, "learning_rate": 4.68696290688696e-05, "loss": 0.7054, "step": 6280 }, { "epoch": 0.7448120479070319, "grad_norm": 1.1660077427246562, "learning_rate": 4.6868465966667364e-05, "loss": 0.2725, "step": 6281 }, { "epoch": 0.7449306296691569, "grad_norm": 1.4810992969157994, "learning_rate": 4.686730266286468e-05, "loss": 0.4782, "step": 6282 }, { "epoch": 0.7450492114312819, "grad_norm": 1.361708325245046, "learning_rate": 4.6866139157472277e-05, "loss": 0.4006, "step": 6283 }, { "epoch": 0.7451677931934069, "grad_norm": 1.3644987167457872, "learning_rate": 4.6864975450500895e-05, "loss": 0.3732, "step": 6284 }, { "epoch": 0.7452863749555318, "grad_norm": 1.281873756301015, "learning_rate": 4.6863811541961245e-05, "loss": 0.4067, "step": 6285 }, { "epoch": 0.7454049567176568, "grad_norm": 1.6036346586664754, "learning_rate": 4.686264743186407e-05, "loss": 0.4754, "step": 6286 }, { "epoch": 0.7455235384797818, "grad_norm": 1.5166698070384537, "learning_rate": 4.6861483120220096e-05, "loss": 0.4348, "step": 6287 }, { "epoch": 0.7456421202419068, "grad_norm": 1.4206860095627198, "learning_rate": 4.686031860704005e-05, "loss": 0.5118, "step": 6288 }, { "epoch": 0.7457607020040318, "grad_norm": 1.2567813032335244, "learning_rate": 4.685915389233467e-05, "loss": 0.3513, "step": 6289 }, { "epoch": 0.7458792837661568, "grad_norm": 1.6151164669959355, "learning_rate": 4.685798897611471e-05, "loss": 0.6388, "step": 6290 }, { "epoch": 0.7459978655282817, "grad_norm": 1.240300975613239, "learning_rate": 4.6856823858390884e-05, "loss": 0.4301, "step": 6291 }, { "epoch": 0.7461164472904067, "grad_norm": 1.0376382955659638, "learning_rate": 4.6855658539173955e-05, "loss": 0.4068, "step": 6292 }, { "epoch": 0.7462350290525317, "grad_norm": 0.7771924152856599, "learning_rate": 4.685449301847465e-05, "loss": 0.2652, "step": 6293 }, { "epoch": 0.7463536108146567, "grad_norm": 1.4235350644691864, "learning_rate": 4.6853327296303714e-05, "loss": 0.4568, "step": 6294 }, { "epoch": 0.7464721925767817, "grad_norm": 1.5592215962676588, "learning_rate": 4.6852161372671896e-05, "loss": 0.4522, "step": 6295 }, { "epoch": 0.7465907743389066, "grad_norm": 1.0785573479148498, "learning_rate": 4.6850995247589955e-05, "loss": 0.3245, "step": 6296 }, { "epoch": 0.7467093561010316, "grad_norm": 1.5374996539087347, "learning_rate": 4.684982892106863e-05, "loss": 0.3583, "step": 6297 }, { "epoch": 0.7468279378631566, "grad_norm": 1.9464221293527022, "learning_rate": 4.6848662393118684e-05, "loss": 0.5642, "step": 6298 }, { "epoch": 0.7469465196252816, "grad_norm": 1.3666059234077679, "learning_rate": 4.684749566375085e-05, "loss": 0.422, "step": 6299 }, { "epoch": 0.7470651013874067, "grad_norm": 1.6365293180621938, "learning_rate": 4.684632873297591e-05, "loss": 0.604, "step": 6300 }, { "epoch": 0.7471836831495317, "grad_norm": 1.6855090959004302, "learning_rate": 4.68451616008046e-05, "loss": 0.6004, "step": 6301 }, { "epoch": 0.7473022649116566, "grad_norm": 1.560049246259256, "learning_rate": 4.6843994267247695e-05, "loss": 0.5244, "step": 6302 }, { "epoch": 0.7474208466737816, "grad_norm": 0.8921498237627483, "learning_rate": 4.684282673231595e-05, "loss": 0.275, "step": 6303 }, { "epoch": 0.7475394284359066, "grad_norm": 1.2897714879029365, "learning_rate": 4.684165899602012e-05, "loss": 0.4828, "step": 6304 }, { "epoch": 0.7476580101980316, "grad_norm": 1.2839966808908505, "learning_rate": 4.684049105837098e-05, "loss": 0.3765, "step": 6305 }, { "epoch": 0.7477765919601566, "grad_norm": 1.2264015874872136, "learning_rate": 4.68393229193793e-05, "loss": 0.368, "step": 6306 }, { "epoch": 0.7478951737222815, "grad_norm": 1.5062032870307644, "learning_rate": 4.683815457905584e-05, "loss": 0.5263, "step": 6307 }, { "epoch": 0.7480137554844065, "grad_norm": 1.5185731592604483, "learning_rate": 4.683698603741138e-05, "loss": 0.4129, "step": 6308 }, { "epoch": 0.7481323372465315, "grad_norm": 0.9903665611370346, "learning_rate": 4.683581729445668e-05, "loss": 0.3155, "step": 6309 }, { "epoch": 0.7482509190086565, "grad_norm": 1.1603193654960873, "learning_rate": 4.6834648350202525e-05, "loss": 0.2924, "step": 6310 }, { "epoch": 0.7483695007707815, "grad_norm": 1.3101852942949073, "learning_rate": 4.6833479204659695e-05, "loss": 0.3953, "step": 6311 }, { "epoch": 0.7484880825329064, "grad_norm": 1.898794508044423, "learning_rate": 4.6832309857838946e-05, "loss": 0.5173, "step": 6312 }, { "epoch": 0.7486066642950314, "grad_norm": 1.2706464865648388, "learning_rate": 4.683114030975108e-05, "loss": 0.454, "step": 6313 }, { "epoch": 0.7487252460571564, "grad_norm": 1.930766446772458, "learning_rate": 4.682997056040687e-05, "loss": 0.6507, "step": 6314 }, { "epoch": 0.7488438278192814, "grad_norm": 1.580148496131274, "learning_rate": 4.68288006098171e-05, "loss": 0.4644, "step": 6315 }, { "epoch": 0.7489624095814064, "grad_norm": 1.1073748525734413, "learning_rate": 4.682763045799256e-05, "loss": 0.3289, "step": 6316 }, { "epoch": 0.7490809913435313, "grad_norm": 1.6606635855200873, "learning_rate": 4.6826460104944035e-05, "loss": 0.6108, "step": 6317 }, { "epoch": 0.7491995731056563, "grad_norm": 1.711615462563066, "learning_rate": 4.682528955068231e-05, "loss": 0.5268, "step": 6318 }, { "epoch": 0.7493181548677813, "grad_norm": 1.1788679942905094, "learning_rate": 4.682411879521818e-05, "loss": 0.4082, "step": 6319 }, { "epoch": 0.7494367366299063, "grad_norm": 0.9787727467315984, "learning_rate": 4.682294783856244e-05, "loss": 0.2536, "step": 6320 }, { "epoch": 0.7495553183920313, "grad_norm": 1.560339068923449, "learning_rate": 4.6821776680725876e-05, "loss": 0.4788, "step": 6321 }, { "epoch": 0.7496739001541562, "grad_norm": 1.137331913528358, "learning_rate": 4.68206053217193e-05, "loss": 0.4129, "step": 6322 }, { "epoch": 0.7497924819162812, "grad_norm": 1.5547947060057168, "learning_rate": 4.681943376155349e-05, "loss": 0.4586, "step": 6323 }, { "epoch": 0.7499110636784062, "grad_norm": 1.4711917056605195, "learning_rate": 4.6818262000239264e-05, "loss": 0.3921, "step": 6324 }, { "epoch": 0.7500296454405313, "grad_norm": 1.3853343833891825, "learning_rate": 4.6817090037787416e-05, "loss": 0.4461, "step": 6325 }, { "epoch": 0.7501482272026563, "grad_norm": 1.5295401507384065, "learning_rate": 4.6815917874208746e-05, "loss": 0.4513, "step": 6326 }, { "epoch": 0.7502668089647813, "grad_norm": 1.5731740575822744, "learning_rate": 4.681474550951408e-05, "loss": 0.5532, "step": 6327 }, { "epoch": 0.7503853907269062, "grad_norm": 1.3947035345596193, "learning_rate": 4.68135729437142e-05, "loss": 0.3727, "step": 6328 }, { "epoch": 0.7505039724890312, "grad_norm": 1.6756604812947764, "learning_rate": 4.681240017681993e-05, "loss": 0.4802, "step": 6329 }, { "epoch": 0.7506225542511562, "grad_norm": 2.2677092230008826, "learning_rate": 4.6811227208842076e-05, "loss": 0.6117, "step": 6330 }, { "epoch": 0.7507411360132812, "grad_norm": 1.5782431756054374, "learning_rate": 4.681005403979146e-05, "loss": 0.426, "step": 6331 }, { "epoch": 0.7508597177754062, "grad_norm": 1.3447748555349468, "learning_rate": 4.680888066967889e-05, "loss": 0.3053, "step": 6332 }, { "epoch": 0.7509782995375311, "grad_norm": 1.7228981969016572, "learning_rate": 4.680770709851517e-05, "loss": 0.5614, "step": 6333 }, { "epoch": 0.7510968812996561, "grad_norm": 2.2698421409751544, "learning_rate": 4.680653332631115e-05, "loss": 0.9063, "step": 6334 }, { "epoch": 0.7512154630617811, "grad_norm": 1.6218961033729142, "learning_rate": 4.680535935307763e-05, "loss": 0.5079, "step": 6335 }, { "epoch": 0.7513340448239061, "grad_norm": 1.311630948240553, "learning_rate": 4.6804185178825434e-05, "loss": 0.3865, "step": 6336 }, { "epoch": 0.7514526265860311, "grad_norm": 1.6246719049163065, "learning_rate": 4.680301080356539e-05, "loss": 0.5459, "step": 6337 }, { "epoch": 0.751571208348156, "grad_norm": 1.972300465129122, "learning_rate": 4.680183622730833e-05, "loss": 0.6248, "step": 6338 }, { "epoch": 0.751689790110281, "grad_norm": 1.343925631327889, "learning_rate": 4.680066145006506e-05, "loss": 0.4111, "step": 6339 }, { "epoch": 0.751808371872406, "grad_norm": 1.3245543720241955, "learning_rate": 4.679948647184643e-05, "loss": 0.4471, "step": 6340 }, { "epoch": 0.751926953634531, "grad_norm": 1.1057681698699755, "learning_rate": 4.6798311292663265e-05, "loss": 0.3182, "step": 6341 }, { "epoch": 0.752045535396656, "grad_norm": 1.1435927650034214, "learning_rate": 4.679713591252641e-05, "loss": 0.3889, "step": 6342 }, { "epoch": 0.752164117158781, "grad_norm": 1.302082473723719, "learning_rate": 4.6795960331446684e-05, "loss": 0.3491, "step": 6343 }, { "epoch": 0.7522826989209059, "grad_norm": 1.4407345549067014, "learning_rate": 4.679478454943494e-05, "loss": 0.5028, "step": 6344 }, { "epoch": 0.7524012806830309, "grad_norm": 1.2970821893293363, "learning_rate": 4.6793608566502e-05, "loss": 0.4172, "step": 6345 }, { "epoch": 0.7525198624451559, "grad_norm": 1.7610362612712684, "learning_rate": 4.679243238265872e-05, "loss": 0.5603, "step": 6346 }, { "epoch": 0.7526384442072809, "grad_norm": 1.5376896733591277, "learning_rate": 4.6791255997915926e-05, "loss": 0.5489, "step": 6347 }, { "epoch": 0.7527570259694059, "grad_norm": 1.4221101972515386, "learning_rate": 4.6790079412284485e-05, "loss": 0.4881, "step": 6348 }, { "epoch": 0.7528756077315308, "grad_norm": 1.2570054042193417, "learning_rate": 4.678890262577523e-05, "loss": 0.3891, "step": 6349 }, { "epoch": 0.7529941894936559, "grad_norm": 1.2780964507939017, "learning_rate": 4.6787725638399016e-05, "loss": 0.4088, "step": 6350 }, { "epoch": 0.7531127712557809, "grad_norm": 1.2651787367497171, "learning_rate": 4.6786548450166686e-05, "loss": 0.4207, "step": 6351 }, { "epoch": 0.7532313530179059, "grad_norm": 1.7399243249642313, "learning_rate": 4.6785371061089095e-05, "loss": 0.4997, "step": 6352 }, { "epoch": 0.7533499347800309, "grad_norm": 1.4677619673833766, "learning_rate": 4.678419347117709e-05, "loss": 0.4275, "step": 6353 }, { "epoch": 0.7534685165421559, "grad_norm": 1.274939620926509, "learning_rate": 4.6783015680441547e-05, "loss": 0.6621, "step": 6354 }, { "epoch": 0.7535870983042808, "grad_norm": 1.647710065775666, "learning_rate": 4.6781837688893307e-05, "loss": 0.507, "step": 6355 }, { "epoch": 0.7537056800664058, "grad_norm": 1.2466373663376678, "learning_rate": 4.6780659496543235e-05, "loss": 0.3296, "step": 6356 }, { "epoch": 0.7538242618285308, "grad_norm": 1.5647744791648217, "learning_rate": 4.677948110340219e-05, "loss": 0.4529, "step": 6357 }, { "epoch": 0.7539428435906558, "grad_norm": 1.296580911664781, "learning_rate": 4.6778302509481046e-05, "loss": 0.3588, "step": 6358 }, { "epoch": 0.7540614253527808, "grad_norm": 1.5830925680805785, "learning_rate": 4.6777123714790646e-05, "loss": 0.5504, "step": 6359 }, { "epoch": 0.7541800071149057, "grad_norm": 1.529421546470144, "learning_rate": 4.677594471934188e-05, "loss": 0.4854, "step": 6360 }, { "epoch": 0.7542985888770307, "grad_norm": 1.3575573438663817, "learning_rate": 4.6774765523145596e-05, "loss": 0.5334, "step": 6361 }, { "epoch": 0.7544171706391557, "grad_norm": 1.1371287831170098, "learning_rate": 4.677358612621269e-05, "loss": 0.3899, "step": 6362 }, { "epoch": 0.7545357524012807, "grad_norm": 1.879954146029522, "learning_rate": 4.677240652855401e-05, "loss": 0.6789, "step": 6363 }, { "epoch": 0.7546543341634057, "grad_norm": 1.9507377658242837, "learning_rate": 4.677122673018044e-05, "loss": 0.6271, "step": 6364 }, { "epoch": 0.7547729159255306, "grad_norm": 1.567099515901128, "learning_rate": 4.677004673110286e-05, "loss": 0.3931, "step": 6365 }, { "epoch": 0.7548914976876556, "grad_norm": 1.5516136497773267, "learning_rate": 4.6768866531332146e-05, "loss": 0.5701, "step": 6366 }, { "epoch": 0.7550100794497806, "grad_norm": 1.385937021189053, "learning_rate": 4.676768613087918e-05, "loss": 0.4048, "step": 6367 }, { "epoch": 0.7551286612119056, "grad_norm": 1.3338284263764733, "learning_rate": 4.6766505529754836e-05, "loss": 0.4834, "step": 6368 }, { "epoch": 0.7552472429740306, "grad_norm": 1.4188792856800085, "learning_rate": 4.676532472797001e-05, "loss": 0.5361, "step": 6369 }, { "epoch": 0.7553658247361555, "grad_norm": 1.302520009652949, "learning_rate": 4.676414372553558e-05, "loss": 0.4428, "step": 6370 }, { "epoch": 0.7554844064982805, "grad_norm": 1.5704706519737799, "learning_rate": 4.676296252246242e-05, "loss": 0.6478, "step": 6371 }, { "epoch": 0.7556029882604055, "grad_norm": 1.4803086760575612, "learning_rate": 4.676178111876145e-05, "loss": 0.5026, "step": 6372 }, { "epoch": 0.7557215700225305, "grad_norm": 1.5731255429612399, "learning_rate": 4.6760599514443534e-05, "loss": 0.5063, "step": 6373 }, { "epoch": 0.7558401517846555, "grad_norm": 1.382580382360104, "learning_rate": 4.675941770951958e-05, "loss": 0.4246, "step": 6374 }, { "epoch": 0.7559587335467806, "grad_norm": 1.340642397854399, "learning_rate": 4.6758235704000475e-05, "loss": 0.4869, "step": 6375 }, { "epoch": 0.7560773153089055, "grad_norm": 1.2699941466730815, "learning_rate": 4.675705349789712e-05, "loss": 0.5821, "step": 6376 }, { "epoch": 0.7561958970710305, "grad_norm": 1.311415128077252, "learning_rate": 4.675587109122041e-05, "loss": 0.388, "step": 6377 }, { "epoch": 0.7563144788331555, "grad_norm": 1.5121760083020388, "learning_rate": 4.6754688483981245e-05, "loss": 0.4529, "step": 6378 }, { "epoch": 0.7564330605952805, "grad_norm": 1.4939643932700748, "learning_rate": 4.6753505676190534e-05, "loss": 0.4166, "step": 6379 }, { "epoch": 0.7565516423574055, "grad_norm": 1.3654979596748529, "learning_rate": 4.675232266785918e-05, "loss": 0.4754, "step": 6380 }, { "epoch": 0.7566702241195304, "grad_norm": 1.7431542579573476, "learning_rate": 4.675113945899808e-05, "loss": 0.4331, "step": 6381 }, { "epoch": 0.7567888058816554, "grad_norm": 1.3703199685142164, "learning_rate": 4.674995604961815e-05, "loss": 0.4035, "step": 6382 }, { "epoch": 0.7569073876437804, "grad_norm": 1.6277486479822678, "learning_rate": 4.6748772439730296e-05, "loss": 0.6543, "step": 6383 }, { "epoch": 0.7570259694059054, "grad_norm": 2.320300536021157, "learning_rate": 4.674758862934543e-05, "loss": 0.6792, "step": 6384 }, { "epoch": 0.7571445511680304, "grad_norm": 1.313674046161056, "learning_rate": 4.674640461847446e-05, "loss": 0.3928, "step": 6385 }, { "epoch": 0.7572631329301553, "grad_norm": 1.6008584178966698, "learning_rate": 4.674522040712831e-05, "loss": 0.4775, "step": 6386 }, { "epoch": 0.7573817146922803, "grad_norm": 1.204795718682405, "learning_rate": 4.6744035995317904e-05, "loss": 0.4268, "step": 6387 }, { "epoch": 0.7575002964544053, "grad_norm": 1.3008577256366438, "learning_rate": 4.674285138305414e-05, "loss": 0.4624, "step": 6388 }, { "epoch": 0.7576188782165303, "grad_norm": 0.9339434218965825, "learning_rate": 4.6741666570347955e-05, "loss": 0.2961, "step": 6389 }, { "epoch": 0.7577374599786553, "grad_norm": 1.3218351015514316, "learning_rate": 4.6740481557210255e-05, "loss": 0.4566, "step": 6390 }, { "epoch": 0.7578560417407803, "grad_norm": 1.4449707954912088, "learning_rate": 4.6739296343651984e-05, "loss": 0.4949, "step": 6391 }, { "epoch": 0.7579746235029052, "grad_norm": 1.1416505505809214, "learning_rate": 4.673811092968405e-05, "loss": 0.3755, "step": 6392 }, { "epoch": 0.7580932052650302, "grad_norm": 1.5579435616217685, "learning_rate": 4.6736925315317405e-05, "loss": 0.429, "step": 6393 }, { "epoch": 0.7582117870271552, "grad_norm": 1.9696530106094354, "learning_rate": 4.673573950056295e-05, "loss": 0.7039, "step": 6394 }, { "epoch": 0.7583303687892802, "grad_norm": 1.0107883887827513, "learning_rate": 4.6734553485431644e-05, "loss": 0.3101, "step": 6395 }, { "epoch": 0.7584489505514052, "grad_norm": 1.167814399850664, "learning_rate": 4.6733367269934396e-05, "loss": 0.3866, "step": 6396 }, { "epoch": 0.7585675323135301, "grad_norm": 1.598100452253808, "learning_rate": 4.673218085408215e-05, "loss": 0.5933, "step": 6397 }, { "epoch": 0.7586861140756551, "grad_norm": 1.5499286159558943, "learning_rate": 4.673099423788586e-05, "loss": 0.5904, "step": 6398 }, { "epoch": 0.7588046958377801, "grad_norm": 1.6069915179791623, "learning_rate": 4.6729807421356445e-05, "loss": 0.6372, "step": 6399 }, { "epoch": 0.7589232775999052, "grad_norm": 1.393395391411628, "learning_rate": 4.672862040450485e-05, "loss": 0.4713, "step": 6400 }, { "epoch": 0.7590418593620302, "grad_norm": 1.5929691332121063, "learning_rate": 4.672743318734203e-05, "loss": 0.5967, "step": 6401 }, { "epoch": 0.7591604411241551, "grad_norm": 1.6923247459856718, "learning_rate": 4.672624576987891e-05, "loss": 0.6099, "step": 6402 }, { "epoch": 0.7592790228862801, "grad_norm": 1.3349663724973453, "learning_rate": 4.672505815212645e-05, "loss": 0.4296, "step": 6403 }, { "epoch": 0.7593976046484051, "grad_norm": 1.359473232695507, "learning_rate": 4.672387033409559e-05, "loss": 0.4382, "step": 6404 }, { "epoch": 0.7595161864105301, "grad_norm": 1.4302299571309112, "learning_rate": 4.67226823157973e-05, "loss": 0.5272, "step": 6405 }, { "epoch": 0.7596347681726551, "grad_norm": 1.147234166712874, "learning_rate": 4.67214940972425e-05, "loss": 0.5173, "step": 6406 }, { "epoch": 0.75975334993478, "grad_norm": 1.7253445215826135, "learning_rate": 4.672030567844217e-05, "loss": 0.4518, "step": 6407 }, { "epoch": 0.759871931696905, "grad_norm": 1.4651173014109806, "learning_rate": 4.671911705940725e-05, "loss": 0.6164, "step": 6408 }, { "epoch": 0.75999051345903, "grad_norm": 1.4604058134744229, "learning_rate": 4.671792824014871e-05, "loss": 0.4845, "step": 6409 }, { "epoch": 0.760109095221155, "grad_norm": 1.342755721730925, "learning_rate": 4.67167392206775e-05, "loss": 0.4405, "step": 6410 }, { "epoch": 0.76022767698328, "grad_norm": 1.2155231330655305, "learning_rate": 4.6715550001004595e-05, "loss": 0.4055, "step": 6411 }, { "epoch": 0.760346258745405, "grad_norm": 1.7705242987871812, "learning_rate": 4.6714360581140935e-05, "loss": 0.547, "step": 6412 }, { "epoch": 0.7604648405075299, "grad_norm": 1.2026494247519293, "learning_rate": 4.671317096109751e-05, "loss": 0.322, "step": 6413 }, { "epoch": 0.7605834222696549, "grad_norm": 1.7333295397221768, "learning_rate": 4.6711981140885266e-05, "loss": 0.4771, "step": 6414 }, { "epoch": 0.7607020040317799, "grad_norm": 1.8285567583799063, "learning_rate": 4.6710791120515186e-05, "loss": 0.4589, "step": 6415 }, { "epoch": 0.7608205857939049, "grad_norm": 1.8510524984689483, "learning_rate": 4.6709600899998227e-05, "loss": 0.6604, "step": 6416 }, { "epoch": 0.7609391675560299, "grad_norm": 1.0666000983178736, "learning_rate": 4.670841047934538e-05, "loss": 0.3965, "step": 6417 }, { "epoch": 0.7610577493181548, "grad_norm": 1.5513888927779644, "learning_rate": 4.670721985856761e-05, "loss": 0.4553, "step": 6418 }, { "epoch": 0.7611763310802798, "grad_norm": 1.824373617801327, "learning_rate": 4.670602903767589e-05, "loss": 0.6176, "step": 6419 }, { "epoch": 0.7612949128424048, "grad_norm": 1.5627240704132948, "learning_rate": 4.6704838016681194e-05, "loss": 0.4372, "step": 6420 }, { "epoch": 0.7614134946045298, "grad_norm": 1.80460435216683, "learning_rate": 4.670364679559451e-05, "loss": 0.5977, "step": 6421 }, { "epoch": 0.7615320763666548, "grad_norm": 1.7914159435872152, "learning_rate": 4.670245537442682e-05, "loss": 0.6279, "step": 6422 }, { "epoch": 0.7616506581287797, "grad_norm": 1.413865018881453, "learning_rate": 4.670126375318911e-05, "loss": 0.4546, "step": 6423 }, { "epoch": 0.7617692398909047, "grad_norm": 1.330705173222603, "learning_rate": 4.670007193189235e-05, "loss": 0.4226, "step": 6424 }, { "epoch": 0.7618878216530298, "grad_norm": 1.3134575371428086, "learning_rate": 4.669887991054755e-05, "loss": 0.4454, "step": 6425 }, { "epoch": 0.7620064034151548, "grad_norm": 1.3163422520332084, "learning_rate": 4.669768768916567e-05, "loss": 0.4224, "step": 6426 }, { "epoch": 0.7621249851772798, "grad_norm": 1.2029832276604893, "learning_rate": 4.669649526775772e-05, "loss": 0.3225, "step": 6427 }, { "epoch": 0.7622435669394048, "grad_norm": 1.3088757485088305, "learning_rate": 4.6695302646334696e-05, "loss": 0.4099, "step": 6428 }, { "epoch": 0.7623621487015297, "grad_norm": 1.3195542098819792, "learning_rate": 4.669410982490758e-05, "loss": 0.3074, "step": 6429 }, { "epoch": 0.7624807304636547, "grad_norm": 1.5325658883459392, "learning_rate": 4.669291680348738e-05, "loss": 0.4068, "step": 6430 }, { "epoch": 0.7625993122257797, "grad_norm": 1.3203077647651247, "learning_rate": 4.669172358208509e-05, "loss": 0.3552, "step": 6431 }, { "epoch": 0.7627178939879047, "grad_norm": 1.5172277575313575, "learning_rate": 4.6690530160711706e-05, "loss": 0.4217, "step": 6432 }, { "epoch": 0.7628364757500297, "grad_norm": 1.3277092787811597, "learning_rate": 4.668933653937823e-05, "loss": 0.3657, "step": 6433 }, { "epoch": 0.7629550575121546, "grad_norm": 1.4330126501601583, "learning_rate": 4.6688142718095674e-05, "loss": 0.4196, "step": 6434 }, { "epoch": 0.7630736392742796, "grad_norm": 1.405381881800787, "learning_rate": 4.668694869687503e-05, "loss": 0.5217, "step": 6435 }, { "epoch": 0.7631922210364046, "grad_norm": 1.6006976445324543, "learning_rate": 4.6685754475727314e-05, "loss": 0.519, "step": 6436 }, { "epoch": 0.7633108027985296, "grad_norm": 1.6376109359697357, "learning_rate": 4.668456005466354e-05, "loss": 0.5264, "step": 6437 }, { "epoch": 0.7634293845606546, "grad_norm": 1.539930971646191, "learning_rate": 4.668336543369471e-05, "loss": 0.536, "step": 6438 }, { "epoch": 0.7635479663227795, "grad_norm": 1.3223322924182594, "learning_rate": 4.668217061283185e-05, "loss": 0.3156, "step": 6439 }, { "epoch": 0.7636665480849045, "grad_norm": 1.3355908715578695, "learning_rate": 4.668097559208596e-05, "loss": 0.387, "step": 6440 }, { "epoch": 0.7637851298470295, "grad_norm": 1.4916414550098231, "learning_rate": 4.667978037146806e-05, "loss": 0.5523, "step": 6441 }, { "epoch": 0.7639037116091545, "grad_norm": 1.1308474400435427, "learning_rate": 4.6678584950989166e-05, "loss": 0.3363, "step": 6442 }, { "epoch": 0.7640222933712795, "grad_norm": 1.316071484681721, "learning_rate": 4.667738933066031e-05, "loss": 0.4495, "step": 6443 }, { "epoch": 0.7641408751334045, "grad_norm": 1.500205501935983, "learning_rate": 4.6676193510492506e-05, "loss": 0.62, "step": 6444 }, { "epoch": 0.7642594568955294, "grad_norm": 1.5590775239592332, "learning_rate": 4.667499749049678e-05, "loss": 0.5531, "step": 6445 }, { "epoch": 0.7643780386576544, "grad_norm": 1.2984710585101096, "learning_rate": 4.667380127068415e-05, "loss": 0.5089, "step": 6446 }, { "epoch": 0.7644966204197794, "grad_norm": 1.2114479726217011, "learning_rate": 4.667260485106565e-05, "loss": 0.3264, "step": 6447 }, { "epoch": 0.7646152021819044, "grad_norm": 1.329612608425185, "learning_rate": 4.667140823165232e-05, "loss": 0.4754, "step": 6448 }, { "epoch": 0.7647337839440294, "grad_norm": 1.704977618714458, "learning_rate": 4.667021141245518e-05, "loss": 0.5764, "step": 6449 }, { "epoch": 0.7648523657061544, "grad_norm": 1.638804696965265, "learning_rate": 4.6669014393485254e-05, "loss": 0.4876, "step": 6450 }, { "epoch": 0.7649709474682794, "grad_norm": 1.1010650840052179, "learning_rate": 4.666781717475359e-05, "loss": 0.2961, "step": 6451 }, { "epoch": 0.7650895292304044, "grad_norm": 1.378726161184451, "learning_rate": 4.6666619756271236e-05, "loss": 0.4631, "step": 6452 }, { "epoch": 0.7652081109925294, "grad_norm": 1.4812342936156728, "learning_rate": 4.66654221380492e-05, "loss": 0.5238, "step": 6453 }, { "epoch": 0.7653266927546544, "grad_norm": 1.2047353970987953, "learning_rate": 4.666422432009855e-05, "loss": 0.3812, "step": 6454 }, { "epoch": 0.7654452745167794, "grad_norm": 1.3230846691937228, "learning_rate": 4.666302630243031e-05, "loss": 0.409, "step": 6455 }, { "epoch": 0.7655638562789043, "grad_norm": 1.6184003334444839, "learning_rate": 4.666182808505554e-05, "loss": 0.5414, "step": 6456 }, { "epoch": 0.7656824380410293, "grad_norm": 1.1282027779603376, "learning_rate": 4.666062966798528e-05, "loss": 0.342, "step": 6457 }, { "epoch": 0.7658010198031543, "grad_norm": 1.3697230127732885, "learning_rate": 4.665943105123057e-05, "loss": 0.4596, "step": 6458 }, { "epoch": 0.7659196015652793, "grad_norm": 2.453823672789991, "learning_rate": 4.665823223480247e-05, "loss": 0.742, "step": 6459 }, { "epoch": 0.7660381833274043, "grad_norm": 1.2935627740031765, "learning_rate": 4.665703321871203e-05, "loss": 0.381, "step": 6460 }, { "epoch": 0.7661567650895292, "grad_norm": 1.4180040867851065, "learning_rate": 4.66558340029703e-05, "loss": 0.4217, "step": 6461 }, { "epoch": 0.7662753468516542, "grad_norm": 1.5261154115877416, "learning_rate": 4.665463458758833e-05, "loss": 0.6565, "step": 6462 }, { "epoch": 0.7663939286137792, "grad_norm": 1.17520212426376, "learning_rate": 4.665343497257719e-05, "loss": 0.3684, "step": 6463 }, { "epoch": 0.7665125103759042, "grad_norm": 1.3179439747098858, "learning_rate": 4.665223515794793e-05, "loss": 0.5264, "step": 6464 }, { "epoch": 0.7666310921380292, "grad_norm": 1.2429272828802582, "learning_rate": 4.665103514371162e-05, "loss": 0.3837, "step": 6465 }, { "epoch": 0.7667496739001541, "grad_norm": 1.3587044291326027, "learning_rate": 4.664983492987931e-05, "loss": 0.3617, "step": 6466 }, { "epoch": 0.7668682556622791, "grad_norm": 1.256189492911367, "learning_rate": 4.6648634516462075e-05, "loss": 0.4963, "step": 6467 }, { "epoch": 0.7669868374244041, "grad_norm": 1.2474470533267716, "learning_rate": 4.664743390347097e-05, "loss": 0.3878, "step": 6468 }, { "epoch": 0.7671054191865291, "grad_norm": 1.453369973399849, "learning_rate": 4.664623309091708e-05, "loss": 0.4113, "step": 6469 }, { "epoch": 0.7672240009486541, "grad_norm": 1.2840396128370917, "learning_rate": 4.6645032078811454e-05, "loss": 0.4027, "step": 6470 }, { "epoch": 0.767342582710779, "grad_norm": 1.595395091681589, "learning_rate": 4.664383086716518e-05, "loss": 0.633, "step": 6471 }, { "epoch": 0.767461164472904, "grad_norm": 1.1877442668559632, "learning_rate": 4.664262945598933e-05, "loss": 0.3749, "step": 6472 }, { "epoch": 0.767579746235029, "grad_norm": 1.4172857992591599, "learning_rate": 4.664142784529497e-05, "loss": 0.4716, "step": 6473 }, { "epoch": 0.767698327997154, "grad_norm": 1.0966281256963835, "learning_rate": 4.664022603509319e-05, "loss": 0.2825, "step": 6474 }, { "epoch": 0.7678169097592791, "grad_norm": 1.6544893955028661, "learning_rate": 4.663902402539506e-05, "loss": 0.5551, "step": 6475 }, { "epoch": 0.7679354915214041, "grad_norm": 1.4694299699863405, "learning_rate": 4.663782181621167e-05, "loss": 0.4179, "step": 6476 }, { "epoch": 0.768054073283529, "grad_norm": 1.1701320782118931, "learning_rate": 4.663661940755409e-05, "loss": 0.2864, "step": 6477 }, { "epoch": 0.768172655045654, "grad_norm": 1.6338833511766266, "learning_rate": 4.663541679943341e-05, "loss": 0.4764, "step": 6478 }, { "epoch": 0.768291236807779, "grad_norm": 1.7776164992610815, "learning_rate": 4.6634213991860725e-05, "loss": 0.4201, "step": 6479 }, { "epoch": 0.768409818569904, "grad_norm": 1.5395720778779978, "learning_rate": 4.663301098484711e-05, "loss": 0.4249, "step": 6480 }, { "epoch": 0.768528400332029, "grad_norm": 1.1992309264281984, "learning_rate": 4.663180777840367e-05, "loss": 0.4957, "step": 6481 }, { "epoch": 0.7686469820941539, "grad_norm": 2.0749187178196884, "learning_rate": 4.663060437254149e-05, "loss": 0.6147, "step": 6482 }, { "epoch": 0.7687655638562789, "grad_norm": 1.2843186067622823, "learning_rate": 4.662940076727165e-05, "loss": 0.3225, "step": 6483 }, { "epoch": 0.7688841456184039, "grad_norm": 1.2583362540876608, "learning_rate": 4.6628196962605264e-05, "loss": 0.4189, "step": 6484 }, { "epoch": 0.7690027273805289, "grad_norm": 1.3250166006018702, "learning_rate": 4.662699295855343e-05, "loss": 0.4007, "step": 6485 }, { "epoch": 0.7691213091426539, "grad_norm": 1.4168172092669973, "learning_rate": 4.6625788755127234e-05, "loss": 0.3607, "step": 6486 }, { "epoch": 0.7692398909047788, "grad_norm": 1.2787063208223133, "learning_rate": 4.6624584352337785e-05, "loss": 0.3277, "step": 6487 }, { "epoch": 0.7693584726669038, "grad_norm": 1.547480863147103, "learning_rate": 4.66233797501962e-05, "loss": 0.5388, "step": 6488 }, { "epoch": 0.7694770544290288, "grad_norm": 1.472271960827582, "learning_rate": 4.662217494871355e-05, "loss": 0.4292, "step": 6489 }, { "epoch": 0.7695956361911538, "grad_norm": 1.346044493131191, "learning_rate": 4.662096994790097e-05, "loss": 0.4433, "step": 6490 }, { "epoch": 0.7697142179532788, "grad_norm": 1.3774488443649382, "learning_rate": 4.661976474776957e-05, "loss": 0.3946, "step": 6491 }, { "epoch": 0.7698327997154037, "grad_norm": 1.3387218577332938, "learning_rate": 4.661855934833044e-05, "loss": 0.4436, "step": 6492 }, { "epoch": 0.7699513814775287, "grad_norm": 1.5211497409189045, "learning_rate": 4.6617353749594705e-05, "loss": 0.4678, "step": 6493 }, { "epoch": 0.7700699632396537, "grad_norm": 1.5620165297944306, "learning_rate": 4.6616147951573473e-05, "loss": 0.4204, "step": 6494 }, { "epoch": 0.7701885450017787, "grad_norm": 1.273343397846066, "learning_rate": 4.6614941954277874e-05, "loss": 0.3989, "step": 6495 }, { "epoch": 0.7703071267639037, "grad_norm": 1.4827005910864917, "learning_rate": 4.6613735757719015e-05, "loss": 0.2968, "step": 6496 }, { "epoch": 0.7704257085260287, "grad_norm": 1.6263568272872708, "learning_rate": 4.661252936190801e-05, "loss": 0.4583, "step": 6497 }, { "epoch": 0.7705442902881536, "grad_norm": 1.7261357024183326, "learning_rate": 4.661132276685599e-05, "loss": 0.5942, "step": 6498 }, { "epoch": 0.7706628720502786, "grad_norm": 1.2871280905432627, "learning_rate": 4.6610115972574076e-05, "loss": 0.3665, "step": 6499 }, { "epoch": 0.7707814538124037, "grad_norm": 1.6336164844780496, "learning_rate": 4.66089089790734e-05, "loss": 0.6051, "step": 6500 }, { "epoch": 0.7709000355745287, "grad_norm": 1.0089709225249612, "learning_rate": 4.6607701786365073e-05, "loss": 0.3319, "step": 6501 }, { "epoch": 0.7710186173366537, "grad_norm": 1.1306694839257165, "learning_rate": 4.6606494394460234e-05, "loss": 0.3932, "step": 6502 }, { "epoch": 0.7711371990987786, "grad_norm": 0.9224723105363929, "learning_rate": 4.660528680337001e-05, "loss": 0.2361, "step": 6503 }, { "epoch": 0.7712557808609036, "grad_norm": 1.3306746757284846, "learning_rate": 4.660407901310554e-05, "loss": 0.368, "step": 6504 }, { "epoch": 0.7713743626230286, "grad_norm": 1.244160563890268, "learning_rate": 4.6602871023677954e-05, "loss": 0.2994, "step": 6505 }, { "epoch": 0.7714929443851536, "grad_norm": 1.4076381142777747, "learning_rate": 4.660166283509839e-05, "loss": 0.3604, "step": 6506 }, { "epoch": 0.7716115261472786, "grad_norm": 1.1310523313151444, "learning_rate": 4.6600454447377984e-05, "loss": 0.3696, "step": 6507 }, { "epoch": 0.7717301079094036, "grad_norm": 1.6816453083968836, "learning_rate": 4.659924586052787e-05, "loss": 0.6305, "step": 6508 }, { "epoch": 0.7718486896715285, "grad_norm": 1.3113942665916933, "learning_rate": 4.6598037074559206e-05, "loss": 0.3883, "step": 6509 }, { "epoch": 0.7719672714336535, "grad_norm": 1.3267456865479466, "learning_rate": 4.659682808948311e-05, "loss": 0.4406, "step": 6510 }, { "epoch": 0.7720858531957785, "grad_norm": 1.3727033883731172, "learning_rate": 4.659561890531075e-05, "loss": 0.367, "step": 6511 }, { "epoch": 0.7722044349579035, "grad_norm": 1.4160859909008723, "learning_rate": 4.6594409522053274e-05, "loss": 0.4587, "step": 6512 }, { "epoch": 0.7723230167200285, "grad_norm": 1.4093932507475724, "learning_rate": 4.659319993972182e-05, "loss": 0.428, "step": 6513 }, { "epoch": 0.7724415984821534, "grad_norm": 2.0514626920292423, "learning_rate": 4.659199015832754e-05, "loss": 0.7161, "step": 6514 }, { "epoch": 0.7725601802442784, "grad_norm": 1.248877552649357, "learning_rate": 4.659078017788159e-05, "loss": 0.3889, "step": 6515 }, { "epoch": 0.7726787620064034, "grad_norm": 1.4449355139035729, "learning_rate": 4.658956999839512e-05, "loss": 0.4634, "step": 6516 }, { "epoch": 0.7727973437685284, "grad_norm": 1.4226979592672477, "learning_rate": 4.658835961987929e-05, "loss": 0.4873, "step": 6517 }, { "epoch": 0.7729159255306534, "grad_norm": 1.4008376908230498, "learning_rate": 4.658714904234526e-05, "loss": 0.4552, "step": 6518 }, { "epoch": 0.7730345072927783, "grad_norm": 1.2283476758321132, "learning_rate": 4.6585938265804185e-05, "loss": 0.4203, "step": 6519 }, { "epoch": 0.7731530890549033, "grad_norm": 1.8445197291233268, "learning_rate": 4.658472729026723e-05, "loss": 0.4691, "step": 6520 }, { "epoch": 0.7732716708170283, "grad_norm": 1.120060473976515, "learning_rate": 4.658351611574556e-05, "loss": 0.334, "step": 6521 }, { "epoch": 0.7733902525791533, "grad_norm": 0.9966498068422353, "learning_rate": 4.658230474225034e-05, "loss": 0.2891, "step": 6522 }, { "epoch": 0.7735088343412783, "grad_norm": 1.531459228654834, "learning_rate": 4.6581093169792736e-05, "loss": 0.4761, "step": 6523 }, { "epoch": 0.7736274161034032, "grad_norm": 1.218605961609238, "learning_rate": 4.657988139838391e-05, "loss": 0.3765, "step": 6524 }, { "epoch": 0.7737459978655283, "grad_norm": 1.2168243628158208, "learning_rate": 4.6578669428035046e-05, "loss": 0.3852, "step": 6525 }, { "epoch": 0.7738645796276533, "grad_norm": 1.4128000594398538, "learning_rate": 4.6577457258757305e-05, "loss": 0.4081, "step": 6526 }, { "epoch": 0.7739831613897783, "grad_norm": 1.8018811795496454, "learning_rate": 4.657624489056188e-05, "loss": 0.8212, "step": 6527 }, { "epoch": 0.7741017431519033, "grad_norm": 1.3932370558432718, "learning_rate": 4.6575032323459936e-05, "loss": 0.564, "step": 6528 }, { "epoch": 0.7742203249140283, "grad_norm": 1.4671316374111003, "learning_rate": 4.657381955746264e-05, "loss": 0.4626, "step": 6529 }, { "epoch": 0.7743389066761532, "grad_norm": 1.6585652593287696, "learning_rate": 4.6572606592581185e-05, "loss": 0.5325, "step": 6530 }, { "epoch": 0.7744574884382782, "grad_norm": 1.8186272342583483, "learning_rate": 4.657139342882676e-05, "loss": 0.5823, "step": 6531 }, { "epoch": 0.7745760702004032, "grad_norm": 1.582671612198208, "learning_rate": 4.6570180066210535e-05, "loss": 0.5073, "step": 6532 }, { "epoch": 0.7746946519625282, "grad_norm": 1.5956702678256094, "learning_rate": 4.6568966504743695e-05, "loss": 0.5744, "step": 6533 }, { "epoch": 0.7748132337246532, "grad_norm": 1.252404815012282, "learning_rate": 4.6567752744437444e-05, "loss": 0.4314, "step": 6534 }, { "epoch": 0.7749318154867781, "grad_norm": 1.188551376885332, "learning_rate": 4.6566538785302954e-05, "loss": 0.3851, "step": 6535 }, { "epoch": 0.7750503972489031, "grad_norm": 1.2470106562023522, "learning_rate": 4.656532462735143e-05, "loss": 0.491, "step": 6536 }, { "epoch": 0.7751689790110281, "grad_norm": 1.2937397339877341, "learning_rate": 4.656411027059405e-05, "loss": 0.4969, "step": 6537 }, { "epoch": 0.7752875607731531, "grad_norm": 1.5625229148846294, "learning_rate": 4.656289571504202e-05, "loss": 0.6669, "step": 6538 }, { "epoch": 0.7754061425352781, "grad_norm": 1.3401974795476501, "learning_rate": 4.6561680960706534e-05, "loss": 0.3877, "step": 6539 }, { "epoch": 0.775524724297403, "grad_norm": 0.9860766231121926, "learning_rate": 4.656046600759879e-05, "loss": 0.3324, "step": 6540 }, { "epoch": 0.775643306059528, "grad_norm": 1.5322818544569272, "learning_rate": 4.6559250855729987e-05, "loss": 0.4561, "step": 6541 }, { "epoch": 0.775761887821653, "grad_norm": 1.321905160995246, "learning_rate": 4.655803550511133e-05, "loss": 0.4752, "step": 6542 }, { "epoch": 0.775880469583778, "grad_norm": 1.1170411025960196, "learning_rate": 4.6556819955754024e-05, "loss": 0.3534, "step": 6543 }, { "epoch": 0.775999051345903, "grad_norm": 1.3305223122317076, "learning_rate": 4.6555604207669266e-05, "loss": 0.408, "step": 6544 }, { "epoch": 0.776117633108028, "grad_norm": 1.3377376979450066, "learning_rate": 4.6554388260868275e-05, "loss": 0.4269, "step": 6545 }, { "epoch": 0.7762362148701529, "grad_norm": 2.21015347267266, "learning_rate": 4.655317211536226e-05, "loss": 0.5413, "step": 6546 }, { "epoch": 0.7763547966322779, "grad_norm": 1.7716878362483046, "learning_rate": 4.6551955771162425e-05, "loss": 0.5767, "step": 6547 }, { "epoch": 0.7764733783944029, "grad_norm": 1.377367055606228, "learning_rate": 4.655073922827998e-05, "loss": 0.4409, "step": 6548 }, { "epoch": 0.7765919601565279, "grad_norm": 1.261813075480174, "learning_rate": 4.654952248672616e-05, "loss": 0.2847, "step": 6549 }, { "epoch": 0.776710541918653, "grad_norm": 1.3986104484093065, "learning_rate": 4.654830554651216e-05, "loss": 0.4353, "step": 6550 }, { "epoch": 0.7768291236807779, "grad_norm": 1.4572227997162819, "learning_rate": 4.654708840764921e-05, "loss": 0.5021, "step": 6551 }, { "epoch": 0.7769477054429029, "grad_norm": 1.4022894493442308, "learning_rate": 4.654587107014853e-05, "loss": 0.3983, "step": 6552 }, { "epoch": 0.7770662872050279, "grad_norm": 1.2834708178445597, "learning_rate": 4.6544653534021334e-05, "loss": 0.4158, "step": 6553 }, { "epoch": 0.7771848689671529, "grad_norm": 3.0489271833187592, "learning_rate": 4.654343579927885e-05, "loss": 0.8787, "step": 6554 }, { "epoch": 0.7773034507292779, "grad_norm": 1.3080379289422677, "learning_rate": 4.6542217865932315e-05, "loss": 0.2629, "step": 6555 }, { "epoch": 0.7774220324914028, "grad_norm": 1.343897645741176, "learning_rate": 4.654099973399294e-05, "loss": 0.336, "step": 6556 }, { "epoch": 0.7775406142535278, "grad_norm": 1.447205648137889, "learning_rate": 4.653978140347197e-05, "loss": 0.4831, "step": 6557 }, { "epoch": 0.7776591960156528, "grad_norm": 1.2807828992554564, "learning_rate": 4.653856287438062e-05, "loss": 0.4801, "step": 6558 }, { "epoch": 0.7777777777777778, "grad_norm": 1.2902055525111253, "learning_rate": 4.653734414673014e-05, "loss": 0.4083, "step": 6559 }, { "epoch": 0.7778963595399028, "grad_norm": 1.2372292089986545, "learning_rate": 4.6536125220531755e-05, "loss": 0.4319, "step": 6560 }, { "epoch": 0.7780149413020278, "grad_norm": 1.3271920961881103, "learning_rate": 4.6534906095796706e-05, "loss": 0.4973, "step": 6561 }, { "epoch": 0.7781335230641527, "grad_norm": 1.4005573852124653, "learning_rate": 4.653368677253624e-05, "loss": 0.3038, "step": 6562 }, { "epoch": 0.7782521048262777, "grad_norm": 1.4876545931199878, "learning_rate": 4.653246725076157e-05, "loss": 0.4208, "step": 6563 }, { "epoch": 0.7783706865884027, "grad_norm": 1.6838067541636368, "learning_rate": 4.653124753048397e-05, "loss": 0.5805, "step": 6564 }, { "epoch": 0.7784892683505277, "grad_norm": 1.3954592638945884, "learning_rate": 4.653002761171467e-05, "loss": 0.4218, "step": 6565 }, { "epoch": 0.7786078501126527, "grad_norm": 1.5621459629155585, "learning_rate": 4.652880749446491e-05, "loss": 0.5241, "step": 6566 }, { "epoch": 0.7787264318747776, "grad_norm": 1.2926988465946827, "learning_rate": 4.6527587178745944e-05, "loss": 0.3251, "step": 6567 }, { "epoch": 0.7788450136369026, "grad_norm": 1.2667694853422353, "learning_rate": 4.652636666456903e-05, "loss": 0.3381, "step": 6568 }, { "epoch": 0.7789635953990276, "grad_norm": 1.531268806586195, "learning_rate": 4.6525145951945414e-05, "loss": 0.4225, "step": 6569 }, { "epoch": 0.7790821771611526, "grad_norm": 1.377772852536876, "learning_rate": 4.652392504088634e-05, "loss": 0.4351, "step": 6570 }, { "epoch": 0.7792007589232776, "grad_norm": 1.5343216038050087, "learning_rate": 4.6522703931403076e-05, "loss": 0.5383, "step": 6571 }, { "epoch": 0.7793193406854025, "grad_norm": 1.3832083923892833, "learning_rate": 4.6521482623506884e-05, "loss": 0.3849, "step": 6572 }, { "epoch": 0.7794379224475275, "grad_norm": 1.5905603114586269, "learning_rate": 4.6520261117209e-05, "loss": 0.3746, "step": 6573 }, { "epoch": 0.7795565042096525, "grad_norm": 1.3059507069915892, "learning_rate": 4.6519039412520705e-05, "loss": 0.4432, "step": 6574 }, { "epoch": 0.7796750859717776, "grad_norm": 1.3754256838610053, "learning_rate": 4.6517817509453255e-05, "loss": 0.4308, "step": 6575 }, { "epoch": 0.7797936677339026, "grad_norm": 1.1673034472723682, "learning_rate": 4.651659540801791e-05, "loss": 0.3347, "step": 6576 }, { "epoch": 0.7799122494960276, "grad_norm": 1.2175770901414391, "learning_rate": 4.6515373108225947e-05, "loss": 0.3486, "step": 6577 }, { "epoch": 0.7800308312581525, "grad_norm": 1.2881720123690121, "learning_rate": 4.6514150610088625e-05, "loss": 0.4074, "step": 6578 }, { "epoch": 0.7801494130202775, "grad_norm": 1.3597629581164035, "learning_rate": 4.651292791361722e-05, "loss": 0.4276, "step": 6579 }, { "epoch": 0.7802679947824025, "grad_norm": 1.7343670804077032, "learning_rate": 4.6511705018822994e-05, "loss": 0.4493, "step": 6580 }, { "epoch": 0.7803865765445275, "grad_norm": 1.4995219045695767, "learning_rate": 4.651048192571723e-05, "loss": 0.3193, "step": 6581 }, { "epoch": 0.7805051583066525, "grad_norm": 1.9612531233107218, "learning_rate": 4.6509258634311204e-05, "loss": 0.5545, "step": 6582 }, { "epoch": 0.7806237400687774, "grad_norm": 1.251403274746046, "learning_rate": 4.650803514461618e-05, "loss": 0.3055, "step": 6583 }, { "epoch": 0.7807423218309024, "grad_norm": 2.045916086418424, "learning_rate": 4.650681145664345e-05, "loss": 0.6073, "step": 6584 }, { "epoch": 0.7808609035930274, "grad_norm": 1.0903603631561924, "learning_rate": 4.6505587570404294e-05, "loss": 0.2879, "step": 6585 }, { "epoch": 0.7809794853551524, "grad_norm": 1.1904648057298506, "learning_rate": 4.650436348591e-05, "loss": 0.3152, "step": 6586 }, { "epoch": 0.7810980671172774, "grad_norm": 1.7696324971328283, "learning_rate": 4.6503139203171835e-05, "loss": 0.4376, "step": 6587 }, { "epoch": 0.7812166488794023, "grad_norm": 1.321851338481891, "learning_rate": 4.65019147222011e-05, "loss": 0.4104, "step": 6588 }, { "epoch": 0.7813352306415273, "grad_norm": 1.6544236989936176, "learning_rate": 4.650069004300907e-05, "loss": 0.4254, "step": 6589 }, { "epoch": 0.7814538124036523, "grad_norm": 1.1617717346810583, "learning_rate": 4.649946516560706e-05, "loss": 0.2722, "step": 6590 }, { "epoch": 0.7815723941657773, "grad_norm": 1.5239958868019585, "learning_rate": 4.649824009000634e-05, "loss": 0.4815, "step": 6591 }, { "epoch": 0.7816909759279023, "grad_norm": 1.3215836343969878, "learning_rate": 4.6497014816218204e-05, "loss": 0.4013, "step": 6592 }, { "epoch": 0.7818095576900272, "grad_norm": 1.7773785611082737, "learning_rate": 4.6495789344253956e-05, "loss": 0.6061, "step": 6593 }, { "epoch": 0.7819281394521522, "grad_norm": 1.6180311324097827, "learning_rate": 4.6494563674124885e-05, "loss": 0.585, "step": 6594 }, { "epoch": 0.7820467212142772, "grad_norm": 1.2182669749934008, "learning_rate": 4.64933378058423e-05, "loss": 0.4376, "step": 6595 }, { "epoch": 0.7821653029764022, "grad_norm": 1.4204672644715368, "learning_rate": 4.64921117394175e-05, "loss": 0.4075, "step": 6596 }, { "epoch": 0.7822838847385272, "grad_norm": 1.552530833583739, "learning_rate": 4.649088547486178e-05, "loss": 0.5427, "step": 6597 }, { "epoch": 0.7824024665006521, "grad_norm": 1.6896210923377524, "learning_rate": 4.648965901218645e-05, "loss": 0.3838, "step": 6598 }, { "epoch": 0.7825210482627771, "grad_norm": 1.1646877590311608, "learning_rate": 4.6488432351402814e-05, "loss": 0.3693, "step": 6599 }, { "epoch": 0.7826396300249022, "grad_norm": 1.3872232845816346, "learning_rate": 4.648720549252219e-05, "loss": 0.3612, "step": 6600 }, { "epoch": 0.7827582117870272, "grad_norm": 1.2235432309515755, "learning_rate": 4.6485978435555876e-05, "loss": 0.4067, "step": 6601 }, { "epoch": 0.7828767935491522, "grad_norm": 1.5887460736133132, "learning_rate": 4.6484751180515195e-05, "loss": 0.5259, "step": 6602 }, { "epoch": 0.7829953753112772, "grad_norm": 1.3352573761402948, "learning_rate": 4.648352372741145e-05, "loss": 0.4603, "step": 6603 }, { "epoch": 0.7831139570734021, "grad_norm": 1.6872872542466342, "learning_rate": 4.648229607625595e-05, "loss": 0.5336, "step": 6604 }, { "epoch": 0.7832325388355271, "grad_norm": 1.2490662215353685, "learning_rate": 4.648106822706004e-05, "loss": 0.4372, "step": 6605 }, { "epoch": 0.7833511205976521, "grad_norm": 2.371324581906594, "learning_rate": 4.647984017983501e-05, "loss": 0.7074, "step": 6606 }, { "epoch": 0.7834697023597771, "grad_norm": 1.6153160111139822, "learning_rate": 4.647861193459221e-05, "loss": 0.5244, "step": 6607 }, { "epoch": 0.7835882841219021, "grad_norm": 1.5251649159924985, "learning_rate": 4.647738349134294e-05, "loss": 0.5058, "step": 6608 }, { "epoch": 0.783706865884027, "grad_norm": 1.5961512332210726, "learning_rate": 4.647615485009852e-05, "loss": 0.4258, "step": 6609 }, { "epoch": 0.783825447646152, "grad_norm": 1.3324454357961255, "learning_rate": 4.64749260108703e-05, "loss": 0.3645, "step": 6610 }, { "epoch": 0.783944029408277, "grad_norm": 1.5615902586942711, "learning_rate": 4.647369697366959e-05, "loss": 0.5477, "step": 6611 }, { "epoch": 0.784062611170402, "grad_norm": 1.065742833650226, "learning_rate": 4.647246773850773e-05, "loss": 0.2854, "step": 6612 }, { "epoch": 0.784181192932527, "grad_norm": 1.0915283346077382, "learning_rate": 4.6471238305396056e-05, "loss": 0.3757, "step": 6613 }, { "epoch": 0.784299774694652, "grad_norm": 1.116887846202958, "learning_rate": 4.647000867434588e-05, "loss": 0.4114, "step": 6614 }, { "epoch": 0.7844183564567769, "grad_norm": 1.2776495097600167, "learning_rate": 4.6468778845368566e-05, "loss": 0.3584, "step": 6615 }, { "epoch": 0.7845369382189019, "grad_norm": 1.1430431398979126, "learning_rate": 4.646754881847543e-05, "loss": 0.3368, "step": 6616 }, { "epoch": 0.7846555199810269, "grad_norm": 1.1785757549882503, "learning_rate": 4.646631859367782e-05, "loss": 0.2818, "step": 6617 }, { "epoch": 0.7847741017431519, "grad_norm": 1.0665188018319987, "learning_rate": 4.6465088170987076e-05, "loss": 0.3598, "step": 6618 }, { "epoch": 0.7848926835052769, "grad_norm": 1.5582696771865934, "learning_rate": 4.646385755041454e-05, "loss": 0.546, "step": 6619 }, { "epoch": 0.7850112652674018, "grad_norm": 1.4715024267403254, "learning_rate": 4.646262673197156e-05, "loss": 0.3677, "step": 6620 }, { "epoch": 0.7851298470295268, "grad_norm": 1.301571737301572, "learning_rate": 4.646139571566949e-05, "loss": 0.4136, "step": 6621 }, { "epoch": 0.7852484287916518, "grad_norm": 1.5401951086643946, "learning_rate": 4.646016450151966e-05, "loss": 0.5287, "step": 6622 }, { "epoch": 0.7853670105537768, "grad_norm": 1.2142059957851523, "learning_rate": 4.6458933089533416e-05, "loss": 0.3049, "step": 6623 }, { "epoch": 0.7854855923159018, "grad_norm": 1.6300977595146753, "learning_rate": 4.645770147972214e-05, "loss": 0.4982, "step": 6624 }, { "epoch": 0.7856041740780269, "grad_norm": 1.4395547715528687, "learning_rate": 4.645646967209717e-05, "loss": 0.3524, "step": 6625 }, { "epoch": 0.7857227558401518, "grad_norm": 1.8461863985088411, "learning_rate": 4.645523766666986e-05, "loss": 0.597, "step": 6626 }, { "epoch": 0.7858413376022768, "grad_norm": 1.7274946257097235, "learning_rate": 4.645400546345157e-05, "loss": 0.593, "step": 6627 }, { "epoch": 0.7859599193644018, "grad_norm": 1.7788116565995369, "learning_rate": 4.6452773062453646e-05, "loss": 0.5449, "step": 6628 }, { "epoch": 0.7860785011265268, "grad_norm": 1.2055902326119754, "learning_rate": 4.645154046368747e-05, "loss": 0.3368, "step": 6629 }, { "epoch": 0.7861970828886518, "grad_norm": 1.4037112010367212, "learning_rate": 4.645030766716441e-05, "loss": 0.5296, "step": 6630 }, { "epoch": 0.7863156646507767, "grad_norm": 1.0622710806818882, "learning_rate": 4.64490746728958e-05, "loss": 0.3247, "step": 6631 }, { "epoch": 0.7864342464129017, "grad_norm": 1.961402237177919, "learning_rate": 4.644784148089303e-05, "loss": 0.5242, "step": 6632 }, { "epoch": 0.7865528281750267, "grad_norm": 1.7623930305234956, "learning_rate": 4.6446608091167456e-05, "loss": 0.4759, "step": 6633 }, { "epoch": 0.7866714099371517, "grad_norm": 1.6666386109048865, "learning_rate": 4.644537450373046e-05, "loss": 0.3788, "step": 6634 }, { "epoch": 0.7867899916992767, "grad_norm": 1.6457684504133057, "learning_rate": 4.644414071859341e-05, "loss": 0.5141, "step": 6635 }, { "epoch": 0.7869085734614016, "grad_norm": 1.385959081706645, "learning_rate": 4.644290673576768e-05, "loss": 0.4218, "step": 6636 }, { "epoch": 0.7870271552235266, "grad_norm": 1.1663279395481374, "learning_rate": 4.6441672555264645e-05, "loss": 0.3106, "step": 6637 }, { "epoch": 0.7871457369856516, "grad_norm": 1.1331546249382438, "learning_rate": 4.644043817709568e-05, "loss": 0.3627, "step": 6638 }, { "epoch": 0.7872643187477766, "grad_norm": 1.1383862064615475, "learning_rate": 4.643920360127217e-05, "loss": 0.3647, "step": 6639 }, { "epoch": 0.7873829005099016, "grad_norm": 1.7257923375862236, "learning_rate": 4.6437968827805486e-05, "loss": 0.446, "step": 6640 }, { "epoch": 0.7875014822720265, "grad_norm": 1.160297109887058, "learning_rate": 4.6436733856707025e-05, "loss": 0.3328, "step": 6641 }, { "epoch": 0.7876200640341515, "grad_norm": 1.454392590721401, "learning_rate": 4.643549868798817e-05, "loss": 0.413, "step": 6642 }, { "epoch": 0.7877386457962765, "grad_norm": 1.2750985393389926, "learning_rate": 4.643426332166029e-05, "loss": 0.3623, "step": 6643 }, { "epoch": 0.7878572275584015, "grad_norm": 1.6313985992965347, "learning_rate": 4.643302775773479e-05, "loss": 0.4306, "step": 6644 }, { "epoch": 0.7879758093205265, "grad_norm": 1.314634510862468, "learning_rate": 4.643179199622307e-05, "loss": 0.4179, "step": 6645 }, { "epoch": 0.7880943910826514, "grad_norm": 1.0705459733454226, "learning_rate": 4.6430556037136495e-05, "loss": 0.297, "step": 6646 }, { "epoch": 0.7882129728447764, "grad_norm": 1.3726886085215728, "learning_rate": 4.642931988048648e-05, "loss": 0.3636, "step": 6647 }, { "epoch": 0.7883315546069014, "grad_norm": 1.2046000989361858, "learning_rate": 4.642808352628442e-05, "loss": 0.351, "step": 6648 }, { "epoch": 0.7884501363690264, "grad_norm": 1.3509071657805323, "learning_rate": 4.6426846974541696e-05, "loss": 0.438, "step": 6649 }, { "epoch": 0.7885687181311515, "grad_norm": 1.4352684943496476, "learning_rate": 4.642561022526972e-05, "loss": 0.4469, "step": 6650 }, { "epoch": 0.7886872998932765, "grad_norm": 1.2221690424330958, "learning_rate": 4.642437327847989e-05, "loss": 0.3184, "step": 6651 }, { "epoch": 0.7888058816554014, "grad_norm": 1.2661749258692732, "learning_rate": 4.642313613418361e-05, "loss": 0.3689, "step": 6652 }, { "epoch": 0.7889244634175264, "grad_norm": 1.5911924899119352, "learning_rate": 4.642189879239229e-05, "loss": 0.4863, "step": 6653 }, { "epoch": 0.7890430451796514, "grad_norm": 1.922344576928574, "learning_rate": 4.642066125311733e-05, "loss": 0.5796, "step": 6654 }, { "epoch": 0.7891616269417764, "grad_norm": 1.8658967166874785, "learning_rate": 4.6419423516370144e-05, "loss": 0.4865, "step": 6655 }, { "epoch": 0.7892802087039014, "grad_norm": 1.7106563560318973, "learning_rate": 4.641818558216214e-05, "loss": 0.5357, "step": 6656 }, { "epoch": 0.7893987904660263, "grad_norm": 1.417282263547433, "learning_rate": 4.641694745050473e-05, "loss": 0.4297, "step": 6657 }, { "epoch": 0.7895173722281513, "grad_norm": 1.7981016934455447, "learning_rate": 4.641570912140932e-05, "loss": 0.4232, "step": 6658 }, { "epoch": 0.7896359539902763, "grad_norm": 1.4458587949690043, "learning_rate": 4.641447059488734e-05, "loss": 0.4114, "step": 6659 }, { "epoch": 0.7897545357524013, "grad_norm": 0.9917431294155186, "learning_rate": 4.64132318709502e-05, "loss": 0.253, "step": 6660 }, { "epoch": 0.7898731175145263, "grad_norm": 1.2872793736742065, "learning_rate": 4.6411992949609315e-05, "loss": 0.4621, "step": 6661 }, { "epoch": 0.7899916992766512, "grad_norm": 1.2676016765212923, "learning_rate": 4.6410753830876123e-05, "loss": 0.328, "step": 6662 }, { "epoch": 0.7901102810387762, "grad_norm": 1.5118165672069042, "learning_rate": 4.640951451476203e-05, "loss": 0.5385, "step": 6663 }, { "epoch": 0.7902288628009012, "grad_norm": 1.3920626060264858, "learning_rate": 4.640827500127847e-05, "loss": 0.5197, "step": 6664 }, { "epoch": 0.7903474445630262, "grad_norm": 1.4117466536387884, "learning_rate": 4.6407035290436864e-05, "loss": 0.4164, "step": 6665 }, { "epoch": 0.7904660263251512, "grad_norm": 1.5723947483681118, "learning_rate": 4.640579538224865e-05, "loss": 0.5232, "step": 6666 }, { "epoch": 0.7905846080872762, "grad_norm": 1.2088978653510445, "learning_rate": 4.640455527672525e-05, "loss": 0.3552, "step": 6667 }, { "epoch": 0.7907031898494011, "grad_norm": 1.2753403078876697, "learning_rate": 4.6403314973878084e-05, "loss": 0.4394, "step": 6668 }, { "epoch": 0.7908217716115261, "grad_norm": 1.666791683833038, "learning_rate": 4.6402074473718615e-05, "loss": 0.6381, "step": 6669 }, { "epoch": 0.7909403533736511, "grad_norm": 1.0753106163391204, "learning_rate": 4.6400833776258264e-05, "loss": 0.3624, "step": 6670 }, { "epoch": 0.7910589351357761, "grad_norm": 1.345230400217538, "learning_rate": 4.639959288150847e-05, "loss": 0.482, "step": 6671 }, { "epoch": 0.7911775168979011, "grad_norm": 1.251326785063224, "learning_rate": 4.639835178948066e-05, "loss": 0.3132, "step": 6672 }, { "epoch": 0.791296098660026, "grad_norm": 1.3020222012003455, "learning_rate": 4.63971105001863e-05, "loss": 0.4366, "step": 6673 }, { "epoch": 0.791414680422151, "grad_norm": 1.586571164742949, "learning_rate": 4.639586901363682e-05, "loss": 0.4924, "step": 6674 }, { "epoch": 0.7915332621842761, "grad_norm": 1.431575939082728, "learning_rate": 4.6394627329843654e-05, "loss": 0.4961, "step": 6675 }, { "epoch": 0.7916518439464011, "grad_norm": 1.220244542300123, "learning_rate": 4.6393385448818264e-05, "loss": 0.4741, "step": 6676 }, { "epoch": 0.7917704257085261, "grad_norm": 1.6706780264398904, "learning_rate": 4.63921433705721e-05, "loss": 0.5741, "step": 6677 }, { "epoch": 0.791889007470651, "grad_norm": 1.1431285251770469, "learning_rate": 4.63909010951166e-05, "loss": 0.3353, "step": 6678 }, { "epoch": 0.792007589232776, "grad_norm": 1.1266289472718634, "learning_rate": 4.638965862246323e-05, "loss": 0.3645, "step": 6679 }, { "epoch": 0.792126170994901, "grad_norm": 1.1443844818166071, "learning_rate": 4.638841595262343e-05, "loss": 0.3593, "step": 6680 }, { "epoch": 0.792244752757026, "grad_norm": 1.746237188602518, "learning_rate": 4.638717308560867e-05, "loss": 0.4405, "step": 6681 }, { "epoch": 0.792363334519151, "grad_norm": 1.8852425773643846, "learning_rate": 4.6385930021430396e-05, "loss": 0.4881, "step": 6682 }, { "epoch": 0.792481916281276, "grad_norm": 1.3265351915529169, "learning_rate": 4.638468676010007e-05, "loss": 0.3897, "step": 6683 }, { "epoch": 0.7926004980434009, "grad_norm": 1.3100356335585341, "learning_rate": 4.638344330162916e-05, "loss": 0.4241, "step": 6684 }, { "epoch": 0.7927190798055259, "grad_norm": 1.7136016588992626, "learning_rate": 4.6382199646029124e-05, "loss": 0.5109, "step": 6685 }, { "epoch": 0.7928376615676509, "grad_norm": 1.5351855879811311, "learning_rate": 4.638095579331143e-05, "loss": 0.4514, "step": 6686 }, { "epoch": 0.7929562433297759, "grad_norm": 1.4090800811536346, "learning_rate": 4.637971174348754e-05, "loss": 0.4267, "step": 6687 }, { "epoch": 0.7930748250919009, "grad_norm": 1.2238600826879646, "learning_rate": 4.637846749656893e-05, "loss": 0.3288, "step": 6688 }, { "epoch": 0.7931934068540258, "grad_norm": 1.6666269896008972, "learning_rate": 4.637722305256706e-05, "loss": 0.6522, "step": 6689 }, { "epoch": 0.7933119886161508, "grad_norm": 1.336914280757268, "learning_rate": 4.637597841149341e-05, "loss": 0.3779, "step": 6690 }, { "epoch": 0.7934305703782758, "grad_norm": 1.3165339599593469, "learning_rate": 4.6374733573359453e-05, "loss": 0.3525, "step": 6691 }, { "epoch": 0.7935491521404008, "grad_norm": 1.4495846181705692, "learning_rate": 4.637348853817666e-05, "loss": 0.4622, "step": 6692 }, { "epoch": 0.7936677339025258, "grad_norm": 1.2397410901664694, "learning_rate": 4.637224330595652e-05, "loss": 0.4024, "step": 6693 }, { "epoch": 0.7937863156646507, "grad_norm": 2.178579944601011, "learning_rate": 4.63709978767105e-05, "loss": 0.6014, "step": 6694 }, { "epoch": 0.7939048974267757, "grad_norm": 1.6911426207568345, "learning_rate": 4.6369752250450085e-05, "loss": 0.6074, "step": 6695 }, { "epoch": 0.7940234791889007, "grad_norm": 1.2733973553725428, "learning_rate": 4.636850642718677e-05, "loss": 0.4048, "step": 6696 }, { "epoch": 0.7941420609510257, "grad_norm": 1.2705817896073603, "learning_rate": 4.636726040693201e-05, "loss": 0.4256, "step": 6697 }, { "epoch": 0.7942606427131507, "grad_norm": 1.26153417010475, "learning_rate": 4.6366014189697326e-05, "loss": 0.4381, "step": 6698 }, { "epoch": 0.7943792244752758, "grad_norm": 1.4458932724137932, "learning_rate": 4.636476777549418e-05, "loss": 0.4574, "step": 6699 }, { "epoch": 0.7944978062374007, "grad_norm": 1.3847718982744737, "learning_rate": 4.636352116433409e-05, "loss": 0.4626, "step": 6700 }, { "epoch": 0.7946163879995257, "grad_norm": 1.1733619221177414, "learning_rate": 4.636227435622852e-05, "loss": 0.3841, "step": 6701 }, { "epoch": 0.7947349697616507, "grad_norm": 1.4828340460944935, "learning_rate": 4.636102735118899e-05, "loss": 0.4184, "step": 6702 }, { "epoch": 0.7948535515237757, "grad_norm": 1.3719508096530924, "learning_rate": 4.6359780149226966e-05, "loss": 0.5399, "step": 6703 }, { "epoch": 0.7949721332859007, "grad_norm": 1.1755658468001415, "learning_rate": 4.635853275035397e-05, "loss": 0.3341, "step": 6704 }, { "epoch": 0.7950907150480256, "grad_norm": 1.3386974465661787, "learning_rate": 4.63572851545815e-05, "loss": 0.3725, "step": 6705 }, { "epoch": 0.7952092968101506, "grad_norm": 1.1763790099697307, "learning_rate": 4.635603736192104e-05, "loss": 0.3718, "step": 6706 }, { "epoch": 0.7953278785722756, "grad_norm": 1.263641923261262, "learning_rate": 4.635478937238411e-05, "loss": 0.3392, "step": 6707 }, { "epoch": 0.7954464603344006, "grad_norm": 1.4254586789644557, "learning_rate": 4.6353541185982206e-05, "loss": 0.4604, "step": 6708 }, { "epoch": 0.7955650420965256, "grad_norm": 1.365725702813229, "learning_rate": 4.635229280272684e-05, "loss": 0.3901, "step": 6709 }, { "epoch": 0.7956836238586505, "grad_norm": 1.6291768065748808, "learning_rate": 4.635104422262952e-05, "loss": 0.3718, "step": 6710 }, { "epoch": 0.7958022056207755, "grad_norm": 1.486035358264411, "learning_rate": 4.634979544570175e-05, "loss": 0.3896, "step": 6711 }, { "epoch": 0.7959207873829005, "grad_norm": 1.1973979446494234, "learning_rate": 4.6348546471955046e-05, "loss": 0.3376, "step": 6712 }, { "epoch": 0.7960393691450255, "grad_norm": 1.2559156132832991, "learning_rate": 4.634729730140093e-05, "loss": 0.3614, "step": 6713 }, { "epoch": 0.7961579509071505, "grad_norm": 1.2888361019704597, "learning_rate": 4.634604793405091e-05, "loss": 0.2916, "step": 6714 }, { "epoch": 0.7962765326692754, "grad_norm": 1.4107957533908084, "learning_rate": 4.63447983699165e-05, "loss": 0.4299, "step": 6715 }, { "epoch": 0.7963951144314004, "grad_norm": 2.2125795983768137, "learning_rate": 4.634354860900923e-05, "loss": 0.53, "step": 6716 }, { "epoch": 0.7965136961935254, "grad_norm": 1.5867592989727513, "learning_rate": 4.63422986513406e-05, "loss": 0.4669, "step": 6717 }, { "epoch": 0.7966322779556504, "grad_norm": 1.8583076776464382, "learning_rate": 4.634104849692216e-05, "loss": 0.734, "step": 6718 }, { "epoch": 0.7967508597177754, "grad_norm": 1.3744373514353871, "learning_rate": 4.633979814576542e-05, "loss": 0.3466, "step": 6719 }, { "epoch": 0.7968694414799004, "grad_norm": 1.64428214350906, "learning_rate": 4.633854759788191e-05, "loss": 0.5211, "step": 6720 }, { "epoch": 0.7969880232420253, "grad_norm": 1.343838823858824, "learning_rate": 4.633729685328316e-05, "loss": 0.33, "step": 6721 }, { "epoch": 0.7971066050041503, "grad_norm": 1.439220607769147, "learning_rate": 4.63360459119807e-05, "loss": 0.5244, "step": 6722 }, { "epoch": 0.7972251867662753, "grad_norm": 1.4946768571618467, "learning_rate": 4.633479477398606e-05, "loss": 0.5399, "step": 6723 }, { "epoch": 0.7973437685284004, "grad_norm": 1.3879956847294836, "learning_rate": 4.633354343931077e-05, "loss": 0.4467, "step": 6724 }, { "epoch": 0.7974623502905254, "grad_norm": 2.030469936187782, "learning_rate": 4.633229190796637e-05, "loss": 0.8327, "step": 6725 }, { "epoch": 0.7975809320526503, "grad_norm": 1.2631796234751382, "learning_rate": 4.6331040179964405e-05, "loss": 0.4938, "step": 6726 }, { "epoch": 0.7976995138147753, "grad_norm": 1.667410457936118, "learning_rate": 4.6329788255316397e-05, "loss": 0.6807, "step": 6727 }, { "epoch": 0.7978180955769003, "grad_norm": 1.2729248595532918, "learning_rate": 4.6328536134033905e-05, "loss": 0.3497, "step": 6728 }, { "epoch": 0.7979366773390253, "grad_norm": 1.0734125280368987, "learning_rate": 4.632728381612846e-05, "loss": 0.4101, "step": 6729 }, { "epoch": 0.7980552591011503, "grad_norm": 1.631379794346881, "learning_rate": 4.6326031301611615e-05, "loss": 0.6069, "step": 6730 }, { "epoch": 0.7981738408632753, "grad_norm": 1.1221636771970052, "learning_rate": 4.632477859049492e-05, "loss": 0.351, "step": 6731 }, { "epoch": 0.7982924226254002, "grad_norm": 1.1297512728667818, "learning_rate": 4.6323525682789904e-05, "loss": 0.3994, "step": 6732 }, { "epoch": 0.7984110043875252, "grad_norm": 1.0434640166474354, "learning_rate": 4.632227257850813e-05, "loss": 0.3483, "step": 6733 }, { "epoch": 0.7985295861496502, "grad_norm": 1.8741624457167243, "learning_rate": 4.632101927766116e-05, "loss": 0.7487, "step": 6734 }, { "epoch": 0.7986481679117752, "grad_norm": 1.5213760539938963, "learning_rate": 4.6319765780260535e-05, "loss": 0.4414, "step": 6735 }, { "epoch": 0.7987667496739002, "grad_norm": 1.3818962983760439, "learning_rate": 4.6318512086317814e-05, "loss": 0.4314, "step": 6736 }, { "epoch": 0.7988853314360251, "grad_norm": 1.112353064202862, "learning_rate": 4.631725819584455e-05, "loss": 0.4016, "step": 6737 }, { "epoch": 0.7990039131981501, "grad_norm": 1.6388235361765107, "learning_rate": 4.6316004108852305e-05, "loss": 0.4274, "step": 6738 }, { "epoch": 0.7991224949602751, "grad_norm": 1.0504024429323928, "learning_rate": 4.631474982535265e-05, "loss": 0.3547, "step": 6739 }, { "epoch": 0.7992410767224001, "grad_norm": 1.4512305876863028, "learning_rate": 4.631349534535713e-05, "loss": 0.5515, "step": 6740 }, { "epoch": 0.7993596584845251, "grad_norm": 1.324672131983842, "learning_rate": 4.6312240668877324e-05, "loss": 0.4533, "step": 6741 }, { "epoch": 0.79947824024665, "grad_norm": 1.1436706263509928, "learning_rate": 4.631098579592479e-05, "loss": 0.3875, "step": 6742 }, { "epoch": 0.799596822008775, "grad_norm": 1.6441173047516482, "learning_rate": 4.6309730726511106e-05, "loss": 0.536, "step": 6743 }, { "epoch": 0.7997154037709, "grad_norm": 1.091040571826638, "learning_rate": 4.630847546064784e-05, "loss": 0.2915, "step": 6744 }, { "epoch": 0.799833985533025, "grad_norm": 1.4804899540293577, "learning_rate": 4.630721999834655e-05, "loss": 0.411, "step": 6745 }, { "epoch": 0.79995256729515, "grad_norm": 1.2747105966455636, "learning_rate": 4.6305964339618824e-05, "loss": 0.3576, "step": 6746 }, { "epoch": 0.8000711490572749, "grad_norm": 1.425162608608509, "learning_rate": 4.6304708484476235e-05, "loss": 0.4836, "step": 6747 }, { "epoch": 0.8001897308193999, "grad_norm": 1.3921251442110798, "learning_rate": 4.630345243293036e-05, "loss": 0.478, "step": 6748 }, { "epoch": 0.800308312581525, "grad_norm": 1.4628555432334376, "learning_rate": 4.6302196184992776e-05, "loss": 0.4291, "step": 6749 }, { "epoch": 0.80042689434365, "grad_norm": 1.690504358428121, "learning_rate": 4.630093974067506e-05, "loss": 0.4614, "step": 6750 }, { "epoch": 0.800545476105775, "grad_norm": 1.3294904304974926, "learning_rate": 4.629968309998881e-05, "loss": 0.3572, "step": 6751 }, { "epoch": 0.8006640578679, "grad_norm": 1.696175808254425, "learning_rate": 4.62984262629456e-05, "loss": 0.4245, "step": 6752 }, { "epoch": 0.8007826396300249, "grad_norm": 1.2519467803042679, "learning_rate": 4.629716922955701e-05, "loss": 0.3736, "step": 6753 }, { "epoch": 0.8009012213921499, "grad_norm": 1.2912918378135467, "learning_rate": 4.6295911999834643e-05, "loss": 0.3877, "step": 6754 }, { "epoch": 0.8010198031542749, "grad_norm": 1.321579796961692, "learning_rate": 4.629465457379008e-05, "loss": 0.4068, "step": 6755 }, { "epoch": 0.8011383849163999, "grad_norm": 1.5981171486208698, "learning_rate": 4.629339695143492e-05, "loss": 0.4079, "step": 6756 }, { "epoch": 0.8012569666785249, "grad_norm": 1.1584180896054728, "learning_rate": 4.6292139132780746e-05, "loss": 0.389, "step": 6757 }, { "epoch": 0.8013755484406498, "grad_norm": 1.208777134320982, "learning_rate": 4.629088111783916e-05, "loss": 0.4297, "step": 6758 }, { "epoch": 0.8014941302027748, "grad_norm": 1.9780625139852064, "learning_rate": 4.628962290662177e-05, "loss": 0.501, "step": 6759 }, { "epoch": 0.8016127119648998, "grad_norm": 1.4860800296803933, "learning_rate": 4.628836449914015e-05, "loss": 0.4463, "step": 6760 }, { "epoch": 0.8017312937270248, "grad_norm": 1.8130228353690667, "learning_rate": 4.6287105895405914e-05, "loss": 0.6035, "step": 6761 }, { "epoch": 0.8018498754891498, "grad_norm": 1.2689596267660956, "learning_rate": 4.628584709543067e-05, "loss": 0.3198, "step": 6762 }, { "epoch": 0.8019684572512747, "grad_norm": 0.9467343406728173, "learning_rate": 4.6284588099226015e-05, "loss": 0.2962, "step": 6763 }, { "epoch": 0.8020870390133997, "grad_norm": 1.2152850459659028, "learning_rate": 4.6283328906803564e-05, "loss": 0.3632, "step": 6764 }, { "epoch": 0.8022056207755247, "grad_norm": 1.2096351643534689, "learning_rate": 4.6282069518174917e-05, "loss": 0.4263, "step": 6765 }, { "epoch": 0.8023242025376497, "grad_norm": 1.0272148183248624, "learning_rate": 4.628080993335169e-05, "loss": 0.2338, "step": 6766 }, { "epoch": 0.8024427842997747, "grad_norm": 1.3215016855140491, "learning_rate": 4.627955015234548e-05, "loss": 0.3884, "step": 6767 }, { "epoch": 0.8025613660618997, "grad_norm": 1.4910180206463592, "learning_rate": 4.6278290175167924e-05, "loss": 0.4355, "step": 6768 }, { "epoch": 0.8026799478240246, "grad_norm": 1.1596409284079148, "learning_rate": 4.627703000183062e-05, "loss": 0.367, "step": 6769 }, { "epoch": 0.8027985295861496, "grad_norm": 1.8307524181095074, "learning_rate": 4.62757696323452e-05, "loss": 0.6003, "step": 6770 }, { "epoch": 0.8029171113482746, "grad_norm": 1.049280438150683, "learning_rate": 4.627450906672327e-05, "loss": 0.3249, "step": 6771 }, { "epoch": 0.8030356931103996, "grad_norm": 1.4884200270717227, "learning_rate": 4.627324830497645e-05, "loss": 0.41, "step": 6772 }, { "epoch": 0.8031542748725246, "grad_norm": 1.182004658549509, "learning_rate": 4.6271987347116376e-05, "loss": 0.4377, "step": 6773 }, { "epoch": 0.8032728566346496, "grad_norm": 1.1774188919633044, "learning_rate": 4.627072619315466e-05, "loss": 0.3573, "step": 6774 }, { "epoch": 0.8033914383967746, "grad_norm": 1.3717639480403467, "learning_rate": 4.626946484310293e-05, "loss": 0.3323, "step": 6775 }, { "epoch": 0.8035100201588996, "grad_norm": 1.3581740389125025, "learning_rate": 4.6268203296972814e-05, "loss": 0.326, "step": 6776 }, { "epoch": 0.8036286019210246, "grad_norm": 1.1182919662356705, "learning_rate": 4.626694155477595e-05, "loss": 0.3199, "step": 6777 }, { "epoch": 0.8037471836831496, "grad_norm": 1.8229771363188922, "learning_rate": 4.626567961652396e-05, "loss": 0.5005, "step": 6778 }, { "epoch": 0.8038657654452745, "grad_norm": 1.0649165194462038, "learning_rate": 4.6264417482228485e-05, "loss": 0.2618, "step": 6779 }, { "epoch": 0.8039843472073995, "grad_norm": 1.390740927899871, "learning_rate": 4.6263155151901154e-05, "loss": 0.3322, "step": 6780 }, { "epoch": 0.8041029289695245, "grad_norm": 1.3957716576277635, "learning_rate": 4.6261892625553615e-05, "loss": 0.3581, "step": 6781 }, { "epoch": 0.8042215107316495, "grad_norm": 1.1368453091336201, "learning_rate": 4.626062990319749e-05, "loss": 0.3366, "step": 6782 }, { "epoch": 0.8043400924937745, "grad_norm": 1.8273001753181397, "learning_rate": 4.6259366984844435e-05, "loss": 0.6242, "step": 6783 }, { "epoch": 0.8044586742558995, "grad_norm": 1.2600447884060986, "learning_rate": 4.625810387050608e-05, "loss": 0.3155, "step": 6784 }, { "epoch": 0.8045772560180244, "grad_norm": 2.3890332584770415, "learning_rate": 4.625684056019407e-05, "loss": 0.6922, "step": 6785 }, { "epoch": 0.8046958377801494, "grad_norm": 1.5823722143213492, "learning_rate": 4.625557705392007e-05, "loss": 0.392, "step": 6786 }, { "epoch": 0.8048144195422744, "grad_norm": 1.7098485650327264, "learning_rate": 4.625431335169571e-05, "loss": 0.341, "step": 6787 }, { "epoch": 0.8049330013043994, "grad_norm": 1.6615347613720943, "learning_rate": 4.625304945353265e-05, "loss": 0.5365, "step": 6788 }, { "epoch": 0.8050515830665244, "grad_norm": 1.8418967655655545, "learning_rate": 4.6251785359442526e-05, "loss": 0.4528, "step": 6789 }, { "epoch": 0.8051701648286493, "grad_norm": 1.198112842999033, "learning_rate": 4.6250521069437014e-05, "loss": 0.3127, "step": 6790 }, { "epoch": 0.8052887465907743, "grad_norm": 1.4498693372491798, "learning_rate": 4.6249256583527744e-05, "loss": 0.4799, "step": 6791 }, { "epoch": 0.8054073283528993, "grad_norm": 1.4145148818523667, "learning_rate": 4.62479919017264e-05, "loss": 0.3954, "step": 6792 }, { "epoch": 0.8055259101150243, "grad_norm": 1.2566889507502206, "learning_rate": 4.624672702404461e-05, "loss": 0.366, "step": 6793 }, { "epoch": 0.8056444918771493, "grad_norm": 1.5341680553321753, "learning_rate": 4.624546195049406e-05, "loss": 0.5208, "step": 6794 }, { "epoch": 0.8057630736392742, "grad_norm": 1.5019981134785798, "learning_rate": 4.62441966810864e-05, "loss": 0.4101, "step": 6795 }, { "epoch": 0.8058816554013992, "grad_norm": 1.515981419582991, "learning_rate": 4.624293121583331e-05, "loss": 0.4551, "step": 6796 }, { "epoch": 0.8060002371635242, "grad_norm": 1.2863712799731994, "learning_rate": 4.624166555474643e-05, "loss": 0.4005, "step": 6797 }, { "epoch": 0.8061188189256492, "grad_norm": 1.8588366002040344, "learning_rate": 4.624039969783745e-05, "loss": 0.573, "step": 6798 }, { "epoch": 0.8062374006877743, "grad_norm": 1.3573818324239404, "learning_rate": 4.6239133645118026e-05, "loss": 0.3359, "step": 6799 }, { "epoch": 0.8063559824498993, "grad_norm": 1.218570330752488, "learning_rate": 4.623786739659984e-05, "loss": 0.3297, "step": 6800 }, { "epoch": 0.8064745642120242, "grad_norm": 2.0186060556263894, "learning_rate": 4.623660095229456e-05, "loss": 0.6217, "step": 6801 }, { "epoch": 0.8065931459741492, "grad_norm": 1.1611995997099565, "learning_rate": 4.6235334312213865e-05, "loss": 0.371, "step": 6802 }, { "epoch": 0.8067117277362742, "grad_norm": 1.646205038824545, "learning_rate": 4.623406747636941e-05, "loss": 0.5689, "step": 6803 }, { "epoch": 0.8068303094983992, "grad_norm": 1.6496057037328447, "learning_rate": 4.62328004447729e-05, "loss": 0.5761, "step": 6804 }, { "epoch": 0.8069488912605242, "grad_norm": 1.3385475317127633, "learning_rate": 4.623153321743602e-05, "loss": 0.4549, "step": 6805 }, { "epoch": 0.8070674730226491, "grad_norm": 1.50874515272626, "learning_rate": 4.623026579437043e-05, "loss": 0.449, "step": 6806 }, { "epoch": 0.8071860547847741, "grad_norm": 1.7168630795434103, "learning_rate": 4.6228998175587824e-05, "loss": 0.5428, "step": 6807 }, { "epoch": 0.8073046365468991, "grad_norm": 1.45642122619262, "learning_rate": 4.622773036109989e-05, "loss": 0.5034, "step": 6808 }, { "epoch": 0.8074232183090241, "grad_norm": 1.3175714980746869, "learning_rate": 4.622646235091831e-05, "loss": 0.4434, "step": 6809 }, { "epoch": 0.8075418000711491, "grad_norm": 1.0743635370924898, "learning_rate": 4.622519414505478e-05, "loss": 0.2778, "step": 6810 }, { "epoch": 0.807660381833274, "grad_norm": 2.029662397279706, "learning_rate": 4.6223925743520986e-05, "loss": 0.5856, "step": 6811 }, { "epoch": 0.807778963595399, "grad_norm": 0.9836644394188714, "learning_rate": 4.6222657146328624e-05, "loss": 0.3557, "step": 6812 }, { "epoch": 0.807897545357524, "grad_norm": 1.3522311663609816, "learning_rate": 4.6221388353489385e-05, "loss": 0.5474, "step": 6813 }, { "epoch": 0.808016127119649, "grad_norm": 1.4476947608117725, "learning_rate": 4.622011936501497e-05, "loss": 0.4398, "step": 6814 }, { "epoch": 0.808134708881774, "grad_norm": 1.3965037285273243, "learning_rate": 4.6218850180917085e-05, "loss": 0.4168, "step": 6815 }, { "epoch": 0.808253290643899, "grad_norm": 1.2598709160885075, "learning_rate": 4.62175808012074e-05, "loss": 0.3739, "step": 6816 }, { "epoch": 0.8083718724060239, "grad_norm": 1.3622694534504167, "learning_rate": 4.621631122589766e-05, "loss": 0.5131, "step": 6817 }, { "epoch": 0.8084904541681489, "grad_norm": 1.2664714790914144, "learning_rate": 4.621504145499954e-05, "loss": 0.3809, "step": 6818 }, { "epoch": 0.8086090359302739, "grad_norm": 1.2767506904838986, "learning_rate": 4.621377148852476e-05, "loss": 0.3427, "step": 6819 }, { "epoch": 0.8087276176923989, "grad_norm": 1.1508347904751135, "learning_rate": 4.6212501326485015e-05, "loss": 0.3688, "step": 6820 }, { "epoch": 0.8088461994545239, "grad_norm": 1.3192845946962655, "learning_rate": 4.621123096889202e-05, "loss": 0.4747, "step": 6821 }, { "epoch": 0.8089647812166488, "grad_norm": 1.1994216502204298, "learning_rate": 4.620996041575748e-05, "loss": 0.3925, "step": 6822 }, { "epoch": 0.8090833629787738, "grad_norm": 1.2703867564385012, "learning_rate": 4.620868966709312e-05, "loss": 0.3827, "step": 6823 }, { "epoch": 0.8092019447408989, "grad_norm": 1.584460457119976, "learning_rate": 4.6207418722910653e-05, "loss": 0.473, "step": 6824 }, { "epoch": 0.8093205265030239, "grad_norm": 1.247271986232191, "learning_rate": 4.6206147583221785e-05, "loss": 0.3332, "step": 6825 }, { "epoch": 0.8094391082651489, "grad_norm": 1.3532235124007201, "learning_rate": 4.620487624803824e-05, "loss": 0.4384, "step": 6826 }, { "epoch": 0.8095576900272738, "grad_norm": 1.495863954669008, "learning_rate": 4.620360471737175e-05, "loss": 0.4208, "step": 6827 }, { "epoch": 0.8096762717893988, "grad_norm": 1.1005020585795202, "learning_rate": 4.620233299123402e-05, "loss": 0.3214, "step": 6828 }, { "epoch": 0.8097948535515238, "grad_norm": 1.4401486466099782, "learning_rate": 4.6201061069636775e-05, "loss": 0.2959, "step": 6829 }, { "epoch": 0.8099134353136488, "grad_norm": 1.6438650175049159, "learning_rate": 4.619978895259176e-05, "loss": 0.4883, "step": 6830 }, { "epoch": 0.8100320170757738, "grad_norm": 1.4991136535956624, "learning_rate": 4.619851664011067e-05, "loss": 0.393, "step": 6831 }, { "epoch": 0.8101505988378987, "grad_norm": 1.3816487362715175, "learning_rate": 4.619724413220526e-05, "loss": 0.4107, "step": 6832 }, { "epoch": 0.8102691806000237, "grad_norm": 1.6535599943865003, "learning_rate": 4.6195971428887256e-05, "loss": 0.5028, "step": 6833 }, { "epoch": 0.8103877623621487, "grad_norm": 1.2260893731413467, "learning_rate": 4.619469853016838e-05, "loss": 0.3284, "step": 6834 }, { "epoch": 0.8105063441242737, "grad_norm": 2.5253034677050983, "learning_rate": 4.619342543606038e-05, "loss": 0.5981, "step": 6835 }, { "epoch": 0.8106249258863987, "grad_norm": 1.2429938988860127, "learning_rate": 4.619215214657498e-05, "loss": 0.3117, "step": 6836 }, { "epoch": 0.8107435076485237, "grad_norm": 1.1821435787255659, "learning_rate": 4.6190878661723936e-05, "loss": 0.3213, "step": 6837 }, { "epoch": 0.8108620894106486, "grad_norm": 1.6662543864195645, "learning_rate": 4.618960498151897e-05, "loss": 0.6369, "step": 6838 }, { "epoch": 0.8109806711727736, "grad_norm": 1.133639612588016, "learning_rate": 4.618833110597183e-05, "loss": 0.3696, "step": 6839 }, { "epoch": 0.8110992529348986, "grad_norm": 1.8596646892366844, "learning_rate": 4.618705703509426e-05, "loss": 0.4764, "step": 6840 }, { "epoch": 0.8112178346970236, "grad_norm": 1.2308297537799875, "learning_rate": 4.6185782768898005e-05, "loss": 0.4001, "step": 6841 }, { "epoch": 0.8113364164591486, "grad_norm": 1.2644091652304206, "learning_rate": 4.6184508307394816e-05, "loss": 0.3262, "step": 6842 }, { "epoch": 0.8114549982212735, "grad_norm": 1.2634073886030825, "learning_rate": 4.618323365059644e-05, "loss": 0.3653, "step": 6843 }, { "epoch": 0.8115735799833985, "grad_norm": 1.7115150399734311, "learning_rate": 4.618195879851462e-05, "loss": 0.5678, "step": 6844 }, { "epoch": 0.8116921617455235, "grad_norm": 1.4235026811763483, "learning_rate": 4.618068375116111e-05, "loss": 0.443, "step": 6845 }, { "epoch": 0.8118107435076485, "grad_norm": 1.3909175873789308, "learning_rate": 4.617940850854768e-05, "loss": 0.4015, "step": 6846 }, { "epoch": 0.8119293252697735, "grad_norm": 1.5217482587372124, "learning_rate": 4.617813307068607e-05, "loss": 0.4855, "step": 6847 }, { "epoch": 0.8120479070318984, "grad_norm": 1.220270104125729, "learning_rate": 4.617685743758805e-05, "loss": 0.3778, "step": 6848 }, { "epoch": 0.8121664887940235, "grad_norm": 1.1134563614733826, "learning_rate": 4.617558160926537e-05, "loss": 0.418, "step": 6849 }, { "epoch": 0.8122850705561485, "grad_norm": 0.9838504620732718, "learning_rate": 4.617430558572979e-05, "loss": 0.3121, "step": 6850 }, { "epoch": 0.8124036523182735, "grad_norm": 1.3759878540917931, "learning_rate": 4.617302936699309e-05, "loss": 0.4981, "step": 6851 }, { "epoch": 0.8125222340803985, "grad_norm": 1.4386836280513957, "learning_rate": 4.617175295306701e-05, "loss": 0.4608, "step": 6852 }, { "epoch": 0.8126408158425235, "grad_norm": 1.5804106025878415, "learning_rate": 4.617047634396334e-05, "loss": 0.7304, "step": 6853 }, { "epoch": 0.8127593976046484, "grad_norm": 0.9954793797329403, "learning_rate": 4.616919953969383e-05, "loss": 0.333, "step": 6854 }, { "epoch": 0.8128779793667734, "grad_norm": 1.4196540831214783, "learning_rate": 4.6167922540270264e-05, "loss": 0.5437, "step": 6855 }, { "epoch": 0.8129965611288984, "grad_norm": 1.3429692168458418, "learning_rate": 4.6166645345704404e-05, "loss": 0.3509, "step": 6856 }, { "epoch": 0.8131151428910234, "grad_norm": 1.168199214597201, "learning_rate": 4.6165367956008046e-05, "loss": 0.3219, "step": 6857 }, { "epoch": 0.8132337246531484, "grad_norm": 1.1000093113469422, "learning_rate": 4.616409037119294e-05, "loss": 0.3962, "step": 6858 }, { "epoch": 0.8133523064152733, "grad_norm": 2.065214904002503, "learning_rate": 4.616281259127087e-05, "loss": 0.4488, "step": 6859 }, { "epoch": 0.8134708881773983, "grad_norm": 1.0881223381663112, "learning_rate": 4.6161534616253625e-05, "loss": 0.3594, "step": 6860 }, { "epoch": 0.8135894699395233, "grad_norm": 1.221095541680054, "learning_rate": 4.616025644615298e-05, "loss": 0.3308, "step": 6861 }, { "epoch": 0.8137080517016483, "grad_norm": 2.051796715699612, "learning_rate": 4.6158978080980717e-05, "loss": 0.7428, "step": 6862 }, { "epoch": 0.8138266334637733, "grad_norm": 1.3447349237850492, "learning_rate": 4.615769952074862e-05, "loss": 0.3481, "step": 6863 }, { "epoch": 0.8139452152258982, "grad_norm": 1.1663130974185572, "learning_rate": 4.615642076546849e-05, "loss": 0.3472, "step": 6864 }, { "epoch": 0.8140637969880232, "grad_norm": 1.3202199780962067, "learning_rate": 4.61551418151521e-05, "loss": 0.4315, "step": 6865 }, { "epoch": 0.8141823787501482, "grad_norm": 1.5380359064610487, "learning_rate": 4.615386266981124e-05, "loss": 0.4525, "step": 6866 }, { "epoch": 0.8143009605122732, "grad_norm": 1.2127741088355166, "learning_rate": 4.615258332945771e-05, "loss": 0.28, "step": 6867 }, { "epoch": 0.8144195422743982, "grad_norm": 1.3933895424341884, "learning_rate": 4.6151303794103296e-05, "loss": 0.4104, "step": 6868 }, { "epoch": 0.8145381240365231, "grad_norm": 1.4148077766710137, "learning_rate": 4.61500240637598e-05, "loss": 0.4638, "step": 6869 }, { "epoch": 0.8146567057986481, "grad_norm": 1.0787234244221315, "learning_rate": 4.6148744138439025e-05, "loss": 0.2766, "step": 6870 }, { "epoch": 0.8147752875607731, "grad_norm": 1.2476899152375787, "learning_rate": 4.6147464018152756e-05, "loss": 0.2847, "step": 6871 }, { "epoch": 0.8148938693228981, "grad_norm": 1.1583569318640041, "learning_rate": 4.61461837029128e-05, "loss": 0.2996, "step": 6872 }, { "epoch": 0.8150124510850231, "grad_norm": 1.7182155188424093, "learning_rate": 4.614490319273097e-05, "loss": 0.5489, "step": 6873 }, { "epoch": 0.8151310328471482, "grad_norm": 2.2439345219015663, "learning_rate": 4.6143622487619056e-05, "loss": 0.8763, "step": 6874 }, { "epoch": 0.8152496146092731, "grad_norm": 1.2263807428693767, "learning_rate": 4.6142341587588876e-05, "loss": 0.3048, "step": 6875 }, { "epoch": 0.8153681963713981, "grad_norm": 1.3209078645695385, "learning_rate": 4.614106049265223e-05, "loss": 0.4705, "step": 6876 }, { "epoch": 0.8154867781335231, "grad_norm": 1.3214971876632662, "learning_rate": 4.613977920282093e-05, "loss": 0.4664, "step": 6877 }, { "epoch": 0.8156053598956481, "grad_norm": 1.671277770411789, "learning_rate": 4.6138497718106785e-05, "loss": 0.4233, "step": 6878 }, { "epoch": 0.8157239416577731, "grad_norm": 1.3204691268025084, "learning_rate": 4.613721603852162e-05, "loss": 0.4971, "step": 6879 }, { "epoch": 0.815842523419898, "grad_norm": 1.2626571910719147, "learning_rate": 4.613593416407724e-05, "loss": 0.3536, "step": 6880 }, { "epoch": 0.815961105182023, "grad_norm": 1.3128331705669765, "learning_rate": 4.6134652094785466e-05, "loss": 0.3741, "step": 6881 }, { "epoch": 0.816079686944148, "grad_norm": 1.363344905576394, "learning_rate": 4.6133369830658116e-05, "loss": 0.4291, "step": 6882 }, { "epoch": 0.816198268706273, "grad_norm": 1.5375017234643447, "learning_rate": 4.613208737170701e-05, "loss": 0.3893, "step": 6883 }, { "epoch": 0.816316850468398, "grad_norm": 1.1178229881091326, "learning_rate": 4.6130804717943974e-05, "loss": 0.3799, "step": 6884 }, { "epoch": 0.816435432230523, "grad_norm": 1.4015221514688507, "learning_rate": 4.612952186938084e-05, "loss": 0.5045, "step": 6885 }, { "epoch": 0.8165540139926479, "grad_norm": 1.1770781327348847, "learning_rate": 4.612823882602941e-05, "loss": 0.3156, "step": 6886 }, { "epoch": 0.8166725957547729, "grad_norm": 2.9339463454413317, "learning_rate": 4.612695558790154e-05, "loss": 0.4756, "step": 6887 }, { "epoch": 0.8167911775168979, "grad_norm": 1.583152740399592, "learning_rate": 4.6125672155009036e-05, "loss": 0.5406, "step": 6888 }, { "epoch": 0.8169097592790229, "grad_norm": 1.1108214359170132, "learning_rate": 4.612438852736375e-05, "loss": 0.3156, "step": 6889 }, { "epoch": 0.8170283410411479, "grad_norm": 1.6948750920137083, "learning_rate": 4.61231047049775e-05, "loss": 0.4081, "step": 6890 }, { "epoch": 0.8171469228032728, "grad_norm": 1.0349214086108491, "learning_rate": 4.6121820687862125e-05, "loss": 0.2773, "step": 6891 }, { "epoch": 0.8172655045653978, "grad_norm": 1.4674265718232409, "learning_rate": 4.612053647602947e-05, "loss": 0.4955, "step": 6892 }, { "epoch": 0.8173840863275228, "grad_norm": 1.0359480098102285, "learning_rate": 4.611925206949137e-05, "loss": 0.2466, "step": 6893 }, { "epoch": 0.8175026680896478, "grad_norm": 1.6240907284068187, "learning_rate": 4.611796746825966e-05, "loss": 0.7163, "step": 6894 }, { "epoch": 0.8176212498517728, "grad_norm": 1.3752290232796494, "learning_rate": 4.611668267234619e-05, "loss": 0.3387, "step": 6895 }, { "epoch": 0.8177398316138977, "grad_norm": 1.2286465627789163, "learning_rate": 4.611539768176279e-05, "loss": 0.3316, "step": 6896 }, { "epoch": 0.8178584133760227, "grad_norm": 1.6973273589311157, "learning_rate": 4.6114112496521334e-05, "loss": 0.6149, "step": 6897 }, { "epoch": 0.8179769951381477, "grad_norm": 1.4361424993983911, "learning_rate": 4.611282711663364e-05, "loss": 0.5676, "step": 6898 }, { "epoch": 0.8180955769002728, "grad_norm": 1.0995131663956217, "learning_rate": 4.611154154211158e-05, "loss": 0.4743, "step": 6899 }, { "epoch": 0.8182141586623978, "grad_norm": 1.499281265397698, "learning_rate": 4.6110255772966985e-05, "loss": 0.4602, "step": 6900 }, { "epoch": 0.8183327404245228, "grad_norm": 1.8426544611975424, "learning_rate": 4.610896980921173e-05, "loss": 0.7691, "step": 6901 }, { "epoch": 0.8184513221866477, "grad_norm": 1.384752758375079, "learning_rate": 4.610768365085765e-05, "loss": 0.4458, "step": 6902 }, { "epoch": 0.8185699039487727, "grad_norm": 1.3961589614193446, "learning_rate": 4.6106397297916614e-05, "loss": 0.4618, "step": 6903 }, { "epoch": 0.8186884857108977, "grad_norm": 1.1320302161193398, "learning_rate": 4.6105110750400475e-05, "loss": 0.2972, "step": 6904 }, { "epoch": 0.8188070674730227, "grad_norm": 1.5923364126934356, "learning_rate": 4.610382400832109e-05, "loss": 0.409, "step": 6905 }, { "epoch": 0.8189256492351477, "grad_norm": 1.0330369975817901, "learning_rate": 4.6102537071690344e-05, "loss": 0.3401, "step": 6906 }, { "epoch": 0.8190442309972726, "grad_norm": 0.9943889280669523, "learning_rate": 4.6101249940520074e-05, "loss": 0.2859, "step": 6907 }, { "epoch": 0.8191628127593976, "grad_norm": 1.5970227772598284, "learning_rate": 4.6099962614822156e-05, "loss": 0.6476, "step": 6908 }, { "epoch": 0.8192813945215226, "grad_norm": 1.5420964472368286, "learning_rate": 4.6098675094608455e-05, "loss": 0.4387, "step": 6909 }, { "epoch": 0.8193999762836476, "grad_norm": 1.4424727064761438, "learning_rate": 4.609738737989084e-05, "loss": 0.4405, "step": 6910 }, { "epoch": 0.8195185580457726, "grad_norm": 1.4532229649857227, "learning_rate": 4.609609947068119e-05, "loss": 0.4847, "step": 6911 }, { "epoch": 0.8196371398078975, "grad_norm": 1.2975224320987593, "learning_rate": 4.609481136699138e-05, "loss": 0.4096, "step": 6912 }, { "epoch": 0.8197557215700225, "grad_norm": 1.4117700992675135, "learning_rate": 4.609352306883326e-05, "loss": 0.4583, "step": 6913 }, { "epoch": 0.8198743033321475, "grad_norm": 1.2785442007117673, "learning_rate": 4.609223457621874e-05, "loss": 0.3698, "step": 6914 }, { "epoch": 0.8199928850942725, "grad_norm": 1.253609643605438, "learning_rate": 4.6090945889159676e-05, "loss": 0.4613, "step": 6915 }, { "epoch": 0.8201114668563975, "grad_norm": 1.0669771517665434, "learning_rate": 4.6089657007667955e-05, "loss": 0.2513, "step": 6916 }, { "epoch": 0.8202300486185224, "grad_norm": 1.2736998279597227, "learning_rate": 4.6088367931755446e-05, "loss": 0.4175, "step": 6917 }, { "epoch": 0.8203486303806474, "grad_norm": 1.5503459770239172, "learning_rate": 4.608707866143406e-05, "loss": 0.5814, "step": 6918 }, { "epoch": 0.8204672121427724, "grad_norm": 1.6334806158264925, "learning_rate": 4.6085789196715665e-05, "loss": 0.4759, "step": 6919 }, { "epoch": 0.8205857939048974, "grad_norm": 1.2758505874743802, "learning_rate": 4.608449953761215e-05, "loss": 0.3696, "step": 6920 }, { "epoch": 0.8207043756670224, "grad_norm": 1.6456903507379623, "learning_rate": 4.60832096841354e-05, "loss": 0.7055, "step": 6921 }, { "epoch": 0.8208229574291473, "grad_norm": 1.219467641823786, "learning_rate": 4.608191963629732e-05, "loss": 0.3305, "step": 6922 }, { "epoch": 0.8209415391912723, "grad_norm": 1.441447590162349, "learning_rate": 4.608062939410979e-05, "loss": 0.5057, "step": 6923 }, { "epoch": 0.8210601209533974, "grad_norm": 0.946588941949069, "learning_rate": 4.6079338957584694e-05, "loss": 0.298, "step": 6924 }, { "epoch": 0.8211787027155224, "grad_norm": 1.8476888465786403, "learning_rate": 4.607804832673396e-05, "loss": 0.6497, "step": 6925 }, { "epoch": 0.8212972844776474, "grad_norm": 1.1156148696486041, "learning_rate": 4.607675750156946e-05, "loss": 0.2807, "step": 6926 }, { "epoch": 0.8214158662397724, "grad_norm": 1.4831571916055704, "learning_rate": 4.60754664821031e-05, "loss": 0.6265, "step": 6927 }, { "epoch": 0.8215344480018973, "grad_norm": 1.2703220386945084, "learning_rate": 4.6074175268346785e-05, "loss": 0.473, "step": 6928 }, { "epoch": 0.8216530297640223, "grad_norm": 1.503550051002392, "learning_rate": 4.607288386031242e-05, "loss": 0.5291, "step": 6929 }, { "epoch": 0.8217716115261473, "grad_norm": 1.4119589172598495, "learning_rate": 4.607159225801191e-05, "loss": 0.4992, "step": 6930 }, { "epoch": 0.8218901932882723, "grad_norm": 1.6574402216183943, "learning_rate": 4.607030046145715e-05, "loss": 0.4943, "step": 6931 }, { "epoch": 0.8220087750503973, "grad_norm": 1.458623539129203, "learning_rate": 4.606900847066006e-05, "loss": 0.5854, "step": 6932 }, { "epoch": 0.8221273568125222, "grad_norm": 1.2781132734012055, "learning_rate": 4.606771628563256e-05, "loss": 0.3845, "step": 6933 }, { "epoch": 0.8222459385746472, "grad_norm": 1.1542569844297383, "learning_rate": 4.606642390638654e-05, "loss": 0.3649, "step": 6934 }, { "epoch": 0.8223645203367722, "grad_norm": 1.3554223559110044, "learning_rate": 4.606513133293392e-05, "loss": 0.3884, "step": 6935 }, { "epoch": 0.8224831020988972, "grad_norm": 1.1868936047195198, "learning_rate": 4.606383856528663e-05, "loss": 0.3245, "step": 6936 }, { "epoch": 0.8226016838610222, "grad_norm": 1.2623820950366946, "learning_rate": 4.606254560345657e-05, "loss": 0.4638, "step": 6937 }, { "epoch": 0.8227202656231472, "grad_norm": 1.3341412846569125, "learning_rate": 4.6061252447455686e-05, "loss": 0.3575, "step": 6938 }, { "epoch": 0.8228388473852721, "grad_norm": 1.974442373446165, "learning_rate": 4.6059959097295867e-05, "loss": 0.6477, "step": 6939 }, { "epoch": 0.8229574291473971, "grad_norm": 1.5354035250887335, "learning_rate": 4.6058665552989055e-05, "loss": 0.4042, "step": 6940 }, { "epoch": 0.8230760109095221, "grad_norm": 1.5673451621791408, "learning_rate": 4.6057371814547165e-05, "loss": 0.5021, "step": 6941 }, { "epoch": 0.8231945926716471, "grad_norm": 1.457820319200091, "learning_rate": 4.6056077881982135e-05, "loss": 0.3568, "step": 6942 }, { "epoch": 0.823313174433772, "grad_norm": 1.6666815646071398, "learning_rate": 4.6054783755305884e-05, "loss": 0.5243, "step": 6943 }, { "epoch": 0.823431756195897, "grad_norm": 1.136554379497956, "learning_rate": 4.6053489434530354e-05, "loss": 0.3402, "step": 6944 }, { "epoch": 0.823550337958022, "grad_norm": 1.3902832161326595, "learning_rate": 4.6052194919667455e-05, "loss": 0.3765, "step": 6945 }, { "epoch": 0.823668919720147, "grad_norm": 1.2423878576582859, "learning_rate": 4.605090021072914e-05, "loss": 0.3154, "step": 6946 }, { "epoch": 0.823787501482272, "grad_norm": 1.5028227362354765, "learning_rate": 4.604960530772734e-05, "loss": 0.5243, "step": 6947 }, { "epoch": 0.823906083244397, "grad_norm": 1.2211456071869247, "learning_rate": 4.604831021067399e-05, "loss": 0.3656, "step": 6948 }, { "epoch": 0.824024665006522, "grad_norm": 1.8252466684665545, "learning_rate": 4.604701491958104e-05, "loss": 0.6281, "step": 6949 }, { "epoch": 0.824143246768647, "grad_norm": 1.808838162721825, "learning_rate": 4.604571943446042e-05, "loss": 0.5226, "step": 6950 }, { "epoch": 0.824261828530772, "grad_norm": 1.3533664398911698, "learning_rate": 4.604442375532407e-05, "loss": 0.3456, "step": 6951 }, { "epoch": 0.824380410292897, "grad_norm": 1.231223686368119, "learning_rate": 4.604312788218394e-05, "loss": 0.4113, "step": 6952 }, { "epoch": 0.824498992055022, "grad_norm": 1.1756746085916812, "learning_rate": 4.604183181505198e-05, "loss": 0.319, "step": 6953 }, { "epoch": 0.824617573817147, "grad_norm": 1.423148296952245, "learning_rate": 4.604053555394012e-05, "loss": 0.4873, "step": 6954 }, { "epoch": 0.8247361555792719, "grad_norm": 1.3273088697151094, "learning_rate": 4.6039239098860344e-05, "loss": 0.4259, "step": 6955 }, { "epoch": 0.8248547373413969, "grad_norm": 1.5229004248873121, "learning_rate": 4.603794244982457e-05, "loss": 0.4482, "step": 6956 }, { "epoch": 0.8249733191035219, "grad_norm": 1.4933619675322012, "learning_rate": 4.603664560684476e-05, "loss": 0.4042, "step": 6957 }, { "epoch": 0.8250919008656469, "grad_norm": 1.750799471665653, "learning_rate": 4.603534856993289e-05, "loss": 0.5006, "step": 6958 }, { "epoch": 0.8252104826277719, "grad_norm": 1.397020327172941, "learning_rate": 4.603405133910089e-05, "loss": 0.3103, "step": 6959 }, { "epoch": 0.8253290643898968, "grad_norm": 1.795183617652027, "learning_rate": 4.603275391436074e-05, "loss": 0.5132, "step": 6960 }, { "epoch": 0.8254476461520218, "grad_norm": 1.6485748786931103, "learning_rate": 4.6031456295724383e-05, "loss": 0.5868, "step": 6961 }, { "epoch": 0.8255662279141468, "grad_norm": 1.5684953372904735, "learning_rate": 4.603015848320379e-05, "loss": 0.4464, "step": 6962 }, { "epoch": 0.8256848096762718, "grad_norm": 1.5632327602450327, "learning_rate": 4.602886047681093e-05, "loss": 0.4294, "step": 6963 }, { "epoch": 0.8258033914383968, "grad_norm": 1.5001463880236297, "learning_rate": 4.602756227655776e-05, "loss": 0.4626, "step": 6964 }, { "epoch": 0.8259219732005217, "grad_norm": 1.4080889700794406, "learning_rate": 4.6026263882456254e-05, "loss": 0.4097, "step": 6965 }, { "epoch": 0.8260405549626467, "grad_norm": 1.3794714294782777, "learning_rate": 4.6024965294518375e-05, "loss": 0.4318, "step": 6966 }, { "epoch": 0.8261591367247717, "grad_norm": 1.0716514553930818, "learning_rate": 4.60236665127561e-05, "loss": 0.2836, "step": 6967 }, { "epoch": 0.8262777184868967, "grad_norm": 1.5122739657269972, "learning_rate": 4.60223675371814e-05, "loss": 0.426, "step": 6968 }, { "epoch": 0.8263963002490217, "grad_norm": 1.7292348727545745, "learning_rate": 4.6021068367806254e-05, "loss": 0.3883, "step": 6969 }, { "epoch": 0.8265148820111466, "grad_norm": 1.301283454409529, "learning_rate": 4.601976900464263e-05, "loss": 0.3437, "step": 6970 }, { "epoch": 0.8266334637732716, "grad_norm": 1.2990687082770953, "learning_rate": 4.601846944770252e-05, "loss": 0.4559, "step": 6971 }, { "epoch": 0.8267520455353966, "grad_norm": 1.677805448024113, "learning_rate": 4.6017169696997885e-05, "loss": 0.5687, "step": 6972 }, { "epoch": 0.8268706272975216, "grad_norm": 1.275535243061787, "learning_rate": 4.6015869752540726e-05, "loss": 0.3545, "step": 6973 }, { "epoch": 0.8269892090596467, "grad_norm": 1.7512093991972848, "learning_rate": 4.6014569614343015e-05, "loss": 0.5906, "step": 6974 }, { "epoch": 0.8271077908217717, "grad_norm": 1.4128695816313643, "learning_rate": 4.601326928241675e-05, "loss": 0.6029, "step": 6975 }, { "epoch": 0.8272263725838966, "grad_norm": 1.2237777444155091, "learning_rate": 4.601196875677391e-05, "loss": 0.4132, "step": 6976 }, { "epoch": 0.8273449543460216, "grad_norm": 1.2238879378782095, "learning_rate": 4.601066803742647e-05, "loss": 0.3819, "step": 6977 }, { "epoch": 0.8274635361081466, "grad_norm": 1.192288417155017, "learning_rate": 4.6009367124386446e-05, "loss": 0.4162, "step": 6978 }, { "epoch": 0.8275821178702716, "grad_norm": 1.3825399164737215, "learning_rate": 4.600806601766582e-05, "loss": 0.4884, "step": 6979 }, { "epoch": 0.8277006996323966, "grad_norm": 1.2906187145592458, "learning_rate": 4.600676471727658e-05, "loss": 0.4675, "step": 6980 }, { "epoch": 0.8278192813945215, "grad_norm": 1.2957363112187992, "learning_rate": 4.6005463223230747e-05, "loss": 0.3702, "step": 6981 }, { "epoch": 0.8279378631566465, "grad_norm": 1.286769334666312, "learning_rate": 4.600416153554029e-05, "loss": 0.3803, "step": 6982 }, { "epoch": 0.8280564449187715, "grad_norm": 1.201440980691219, "learning_rate": 4.600285965421722e-05, "loss": 0.4141, "step": 6983 }, { "epoch": 0.8281750266808965, "grad_norm": 1.4629779993047347, "learning_rate": 4.600155757927354e-05, "loss": 0.5193, "step": 6984 }, { "epoch": 0.8282936084430215, "grad_norm": 1.3676391736639164, "learning_rate": 4.6000255310721254e-05, "loss": 0.3998, "step": 6985 }, { "epoch": 0.8284121902051464, "grad_norm": 1.4316776908638378, "learning_rate": 4.5998952848572365e-05, "loss": 0.4908, "step": 6986 }, { "epoch": 0.8285307719672714, "grad_norm": 1.4839328772079194, "learning_rate": 4.599765019283888e-05, "loss": 0.6005, "step": 6987 }, { "epoch": 0.8286493537293964, "grad_norm": 1.4377477131650604, "learning_rate": 4.599634734353282e-05, "loss": 0.4846, "step": 6988 }, { "epoch": 0.8287679354915214, "grad_norm": 1.056073186504529, "learning_rate": 4.599504430066617e-05, "loss": 0.2784, "step": 6989 }, { "epoch": 0.8288865172536464, "grad_norm": 1.3670670524725252, "learning_rate": 4.599374106425097e-05, "loss": 0.5444, "step": 6990 }, { "epoch": 0.8290050990157714, "grad_norm": 1.0912284053833328, "learning_rate": 4.599243763429921e-05, "loss": 0.2909, "step": 6991 }, { "epoch": 0.8291236807778963, "grad_norm": 1.0938204015012638, "learning_rate": 4.599113401082293e-05, "loss": 0.3233, "step": 6992 }, { "epoch": 0.8292422625400213, "grad_norm": 1.6186364371919248, "learning_rate": 4.5989830193834125e-05, "loss": 0.5083, "step": 6993 }, { "epoch": 0.8293608443021463, "grad_norm": 1.20463992571718, "learning_rate": 4.5988526183344835e-05, "loss": 0.2843, "step": 6994 }, { "epoch": 0.8294794260642713, "grad_norm": 1.297824341384146, "learning_rate": 4.598722197936706e-05, "loss": 0.3951, "step": 6995 }, { "epoch": 0.8295980078263963, "grad_norm": 1.3483414726032132, "learning_rate": 4.5985917581912844e-05, "loss": 0.3697, "step": 6996 }, { "epoch": 0.8297165895885212, "grad_norm": 1.9845987365090307, "learning_rate": 4.598461299099419e-05, "loss": 0.5435, "step": 6997 }, { "epoch": 0.8298351713506462, "grad_norm": 2.023208583641064, "learning_rate": 4.5983308206623156e-05, "loss": 0.6105, "step": 6998 }, { "epoch": 0.8299537531127713, "grad_norm": 1.3047258227526979, "learning_rate": 4.598200322881174e-05, "loss": 0.3732, "step": 6999 }, { "epoch": 0.8300723348748963, "grad_norm": 1.7415085323948678, "learning_rate": 4.598069805757198e-05, "loss": 0.4355, "step": 7000 }, { "epoch": 0.8301909166370213, "grad_norm": 1.323108007798805, "learning_rate": 4.5979392692915926e-05, "loss": 0.3705, "step": 7001 }, { "epoch": 0.8303094983991463, "grad_norm": 1.3177974363006077, "learning_rate": 4.5978087134855584e-05, "loss": 0.3832, "step": 7002 }, { "epoch": 0.8304280801612712, "grad_norm": 1.4414650383152594, "learning_rate": 4.597678138340301e-05, "loss": 0.4101, "step": 7003 }, { "epoch": 0.8305466619233962, "grad_norm": 1.2780971317848426, "learning_rate": 4.597547543857024e-05, "loss": 0.4068, "step": 7004 }, { "epoch": 0.8306652436855212, "grad_norm": 1.474912335855074, "learning_rate": 4.5974169300369294e-05, "loss": 0.4016, "step": 7005 }, { "epoch": 0.8307838254476462, "grad_norm": 1.4057706102959007, "learning_rate": 4.5972862968812234e-05, "loss": 0.4014, "step": 7006 }, { "epoch": 0.8309024072097712, "grad_norm": 1.2985387662916321, "learning_rate": 4.59715564439111e-05, "loss": 0.426, "step": 7007 }, { "epoch": 0.8310209889718961, "grad_norm": 1.217946199130656, "learning_rate": 4.597024972567794e-05, "loss": 0.3529, "step": 7008 }, { "epoch": 0.8311395707340211, "grad_norm": 1.222818215572617, "learning_rate": 4.596894281412478e-05, "loss": 0.489, "step": 7009 }, { "epoch": 0.8312581524961461, "grad_norm": 1.1529796810884687, "learning_rate": 4.596763570926368e-05, "loss": 0.4121, "step": 7010 }, { "epoch": 0.8313767342582711, "grad_norm": 1.177618172693117, "learning_rate": 4.596632841110669e-05, "loss": 0.2922, "step": 7011 }, { "epoch": 0.8314953160203961, "grad_norm": 1.9259664332612327, "learning_rate": 4.596502091966587e-05, "loss": 0.4881, "step": 7012 }, { "epoch": 0.831613897782521, "grad_norm": 1.4278897329416238, "learning_rate": 4.596371323495327e-05, "loss": 0.3484, "step": 7013 }, { "epoch": 0.831732479544646, "grad_norm": 1.4325292044115803, "learning_rate": 4.596240535698094e-05, "loss": 0.5325, "step": 7014 }, { "epoch": 0.831851061306771, "grad_norm": 1.6014831359545445, "learning_rate": 4.596109728576093e-05, "loss": 0.4207, "step": 7015 }, { "epoch": 0.831969643068896, "grad_norm": 1.3809892362724274, "learning_rate": 4.595978902130531e-05, "loss": 0.4005, "step": 7016 }, { "epoch": 0.832088224831021, "grad_norm": 1.3629826009209878, "learning_rate": 4.595848056362614e-05, "loss": 0.4076, "step": 7017 }, { "epoch": 0.8322068065931459, "grad_norm": 1.2771677390653018, "learning_rate": 4.595717191273548e-05, "loss": 0.3686, "step": 7018 }, { "epoch": 0.8323253883552709, "grad_norm": 1.4861027517996053, "learning_rate": 4.59558630686454e-05, "loss": 0.4515, "step": 7019 }, { "epoch": 0.8324439701173959, "grad_norm": 1.4179092921865928, "learning_rate": 4.5954554031367944e-05, "loss": 0.3706, "step": 7020 }, { "epoch": 0.8325625518795209, "grad_norm": 1.7431695250480128, "learning_rate": 4.595324480091521e-05, "loss": 0.5178, "step": 7021 }, { "epoch": 0.8326811336416459, "grad_norm": 1.4752113720152238, "learning_rate": 4.595193537729925e-05, "loss": 0.4909, "step": 7022 }, { "epoch": 0.8327997154037708, "grad_norm": 1.303208485134168, "learning_rate": 4.595062576053214e-05, "loss": 0.5388, "step": 7023 }, { "epoch": 0.8329182971658959, "grad_norm": 1.3472554104888557, "learning_rate": 4.594931595062595e-05, "loss": 0.3815, "step": 7024 }, { "epoch": 0.8330368789280209, "grad_norm": 1.5961029701650007, "learning_rate": 4.5948005947592755e-05, "loss": 0.5441, "step": 7025 }, { "epoch": 0.8331554606901459, "grad_norm": 1.2503076915700297, "learning_rate": 4.5946695751444633e-05, "loss": 0.4817, "step": 7026 }, { "epoch": 0.8332740424522709, "grad_norm": 1.2956803279040314, "learning_rate": 4.5945385362193675e-05, "loss": 0.343, "step": 7027 }, { "epoch": 0.8333926242143959, "grad_norm": 1.407970940727167, "learning_rate": 4.5944074779851933e-05, "loss": 0.4484, "step": 7028 }, { "epoch": 0.8335112059765208, "grad_norm": 1.2176798813934289, "learning_rate": 4.594276400443151e-05, "loss": 0.2747, "step": 7029 }, { "epoch": 0.8336297877386458, "grad_norm": 1.3433885806433268, "learning_rate": 4.5941453035944485e-05, "loss": 0.5087, "step": 7030 }, { "epoch": 0.8337483695007708, "grad_norm": 1.3279504014628545, "learning_rate": 4.594014187440294e-05, "loss": 0.4877, "step": 7031 }, { "epoch": 0.8338669512628958, "grad_norm": 1.1905338524480276, "learning_rate": 4.593883051981897e-05, "loss": 0.4173, "step": 7032 }, { "epoch": 0.8339855330250208, "grad_norm": 1.0905512495411593, "learning_rate": 4.5937518972204665e-05, "loss": 0.3036, "step": 7033 }, { "epoch": 0.8341041147871457, "grad_norm": 1.5315953398477353, "learning_rate": 4.59362072315721e-05, "loss": 0.552, "step": 7034 }, { "epoch": 0.8342226965492707, "grad_norm": 1.1345950037972643, "learning_rate": 4.593489529793338e-05, "loss": 0.3352, "step": 7035 }, { "epoch": 0.8343412783113957, "grad_norm": 1.3299967567027684, "learning_rate": 4.59335831713006e-05, "loss": 0.3623, "step": 7036 }, { "epoch": 0.8344598600735207, "grad_norm": 1.385481521275535, "learning_rate": 4.5932270851685854e-05, "loss": 0.3638, "step": 7037 }, { "epoch": 0.8345784418356457, "grad_norm": 1.152502041801949, "learning_rate": 4.593095833910124e-05, "loss": 0.4568, "step": 7038 }, { "epoch": 0.8346970235977706, "grad_norm": 1.1840830489668557, "learning_rate": 4.592964563355886e-05, "loss": 0.362, "step": 7039 }, { "epoch": 0.8348156053598956, "grad_norm": 1.1721577865440138, "learning_rate": 4.5928332735070804e-05, "loss": 0.3801, "step": 7040 }, { "epoch": 0.8349341871220206, "grad_norm": 1.6825170299424048, "learning_rate": 4.5927019643649184e-05, "loss": 0.4554, "step": 7041 }, { "epoch": 0.8350527688841456, "grad_norm": 1.277039033052309, "learning_rate": 4.592570635930611e-05, "loss": 0.4206, "step": 7042 }, { "epoch": 0.8351713506462706, "grad_norm": 1.5029019206662342, "learning_rate": 4.592439288205368e-05, "loss": 0.4647, "step": 7043 }, { "epoch": 0.8352899324083956, "grad_norm": 1.1695388257990311, "learning_rate": 4.592307921190402e-05, "loss": 0.4493, "step": 7044 }, { "epoch": 0.8354085141705205, "grad_norm": 1.121537457174954, "learning_rate": 4.592176534886922e-05, "loss": 0.3126, "step": 7045 }, { "epoch": 0.8355270959326455, "grad_norm": 1.3393426096482335, "learning_rate": 4.592045129296139e-05, "loss": 0.2942, "step": 7046 }, { "epoch": 0.8356456776947705, "grad_norm": 1.651560973230691, "learning_rate": 4.591913704419266e-05, "loss": 0.5033, "step": 7047 }, { "epoch": 0.8357642594568955, "grad_norm": 1.6291888578208766, "learning_rate": 4.591782260257514e-05, "loss": 0.5085, "step": 7048 }, { "epoch": 0.8358828412190206, "grad_norm": 1.5552880942083616, "learning_rate": 4.5916507968120955e-05, "loss": 0.5093, "step": 7049 }, { "epoch": 0.8360014229811455, "grad_norm": 1.3934271721112037, "learning_rate": 4.5915193140842204e-05, "loss": 0.3936, "step": 7050 }, { "epoch": 0.8361200047432705, "grad_norm": 1.4141378237785043, "learning_rate": 4.5913878120751026e-05, "loss": 0.4652, "step": 7051 }, { "epoch": 0.8362385865053955, "grad_norm": 1.465209834025231, "learning_rate": 4.5912562907859536e-05, "loss": 0.4979, "step": 7052 }, { "epoch": 0.8363571682675205, "grad_norm": 1.997946803572848, "learning_rate": 4.591124750217986e-05, "loss": 0.4689, "step": 7053 }, { "epoch": 0.8364757500296455, "grad_norm": 1.3791090166662403, "learning_rate": 4.590993190372412e-05, "loss": 0.4335, "step": 7054 }, { "epoch": 0.8365943317917705, "grad_norm": 1.3987936562100025, "learning_rate": 4.590861611250447e-05, "loss": 0.4838, "step": 7055 }, { "epoch": 0.8367129135538954, "grad_norm": 1.4040617553804757, "learning_rate": 4.5907300128532994e-05, "loss": 0.4218, "step": 7056 }, { "epoch": 0.8368314953160204, "grad_norm": 1.3195334841099744, "learning_rate": 4.5905983951821864e-05, "loss": 0.4051, "step": 7057 }, { "epoch": 0.8369500770781454, "grad_norm": 1.3992787459947145, "learning_rate": 4.590466758238319e-05, "loss": 0.5247, "step": 7058 }, { "epoch": 0.8370686588402704, "grad_norm": 1.3510459284521714, "learning_rate": 4.590335102022912e-05, "loss": 0.4949, "step": 7059 }, { "epoch": 0.8371872406023954, "grad_norm": 1.34493887941941, "learning_rate": 4.590203426537179e-05, "loss": 0.4047, "step": 7060 }, { "epoch": 0.8373058223645203, "grad_norm": 1.3277005269775548, "learning_rate": 4.5900717317823336e-05, "loss": 0.4176, "step": 7061 }, { "epoch": 0.8374244041266453, "grad_norm": 1.4865371761835748, "learning_rate": 4.589940017759589e-05, "loss": 0.5581, "step": 7062 }, { "epoch": 0.8375429858887703, "grad_norm": 1.3166918451989758, "learning_rate": 4.5898082844701605e-05, "loss": 0.4412, "step": 7063 }, { "epoch": 0.8376615676508953, "grad_norm": 1.3356943839391102, "learning_rate": 4.589676531915264e-05, "loss": 0.4443, "step": 7064 }, { "epoch": 0.8377801494130203, "grad_norm": 1.7792351803217843, "learning_rate": 4.5895447600961104e-05, "loss": 0.6642, "step": 7065 }, { "epoch": 0.8378987311751452, "grad_norm": 1.3061607388991763, "learning_rate": 4.589412969013917e-05, "loss": 0.4908, "step": 7066 }, { "epoch": 0.8380173129372702, "grad_norm": 1.7249280742012534, "learning_rate": 4.589281158669898e-05, "loss": 0.5537, "step": 7067 }, { "epoch": 0.8381358946993952, "grad_norm": 1.4440626827836363, "learning_rate": 4.58914932906527e-05, "loss": 0.4878, "step": 7068 }, { "epoch": 0.8382544764615202, "grad_norm": 1.2131991762409167, "learning_rate": 4.5890174802012454e-05, "loss": 0.416, "step": 7069 }, { "epoch": 0.8383730582236452, "grad_norm": 1.598470333856422, "learning_rate": 4.588885612079042e-05, "loss": 0.4335, "step": 7070 }, { "epoch": 0.8384916399857701, "grad_norm": 1.1999180085997176, "learning_rate": 4.588753724699875e-05, "loss": 0.4231, "step": 7071 }, { "epoch": 0.8386102217478951, "grad_norm": 1.3047713823503646, "learning_rate": 4.588621818064961e-05, "loss": 0.3889, "step": 7072 }, { "epoch": 0.8387288035100201, "grad_norm": 1.2617567814350175, "learning_rate": 4.588489892175514e-05, "loss": 0.3253, "step": 7073 }, { "epoch": 0.8388473852721452, "grad_norm": 1.2286731625684353, "learning_rate": 4.588357947032751e-05, "loss": 0.335, "step": 7074 }, { "epoch": 0.8389659670342702, "grad_norm": 1.5926813583968211, "learning_rate": 4.588225982637889e-05, "loss": 0.3221, "step": 7075 }, { "epoch": 0.8390845487963952, "grad_norm": 1.1186845032068036, "learning_rate": 4.588093998992144e-05, "loss": 0.2998, "step": 7076 }, { "epoch": 0.8392031305585201, "grad_norm": 2.095914160803281, "learning_rate": 4.5879619960967334e-05, "loss": 0.5813, "step": 7077 }, { "epoch": 0.8393217123206451, "grad_norm": 0.9974205208866932, "learning_rate": 4.5878299739528726e-05, "loss": 0.3211, "step": 7078 }, { "epoch": 0.8394402940827701, "grad_norm": 1.5250642261056486, "learning_rate": 4.5876979325617805e-05, "loss": 0.4861, "step": 7079 }, { "epoch": 0.8395588758448951, "grad_norm": 2.074707334032716, "learning_rate": 4.587565871924674e-05, "loss": 0.6672, "step": 7080 }, { "epoch": 0.8396774576070201, "grad_norm": 1.3666359990800323, "learning_rate": 4.58743379204277e-05, "loss": 0.3735, "step": 7081 }, { "epoch": 0.839796039369145, "grad_norm": 1.6357284388119466, "learning_rate": 4.587301692917285e-05, "loss": 0.4192, "step": 7082 }, { "epoch": 0.83991462113127, "grad_norm": 1.04344638958246, "learning_rate": 4.5871695745494395e-05, "loss": 0.4071, "step": 7083 }, { "epoch": 0.840033202893395, "grad_norm": 1.2036471971444174, "learning_rate": 4.587037436940449e-05, "loss": 0.304, "step": 7084 }, { "epoch": 0.84015178465552, "grad_norm": 1.813439379748862, "learning_rate": 4.586905280091533e-05, "loss": 0.4434, "step": 7085 }, { "epoch": 0.840270366417645, "grad_norm": 1.1928467386921653, "learning_rate": 4.58677310400391e-05, "loss": 0.3803, "step": 7086 }, { "epoch": 0.84038894817977, "grad_norm": 1.0191750949701213, "learning_rate": 4.586640908678797e-05, "loss": 0.3215, "step": 7087 }, { "epoch": 0.8405075299418949, "grad_norm": 1.2531707690622746, "learning_rate": 4.586508694117414e-05, "loss": 0.3416, "step": 7088 }, { "epoch": 0.8406261117040199, "grad_norm": 1.1534343618513794, "learning_rate": 4.586376460320979e-05, "loss": 0.3042, "step": 7089 }, { "epoch": 0.8407446934661449, "grad_norm": 1.7457155837884886, "learning_rate": 4.586244207290712e-05, "loss": 0.4733, "step": 7090 }, { "epoch": 0.8408632752282699, "grad_norm": 1.4711053836806198, "learning_rate": 4.586111935027832e-05, "loss": 0.471, "step": 7091 }, { "epoch": 0.8409818569903948, "grad_norm": 0.9898910560389165, "learning_rate": 4.585979643533558e-05, "loss": 0.2836, "step": 7092 }, { "epoch": 0.8411004387525198, "grad_norm": 1.5649234658071023, "learning_rate": 4.58584733280911e-05, "loss": 0.4703, "step": 7093 }, { "epoch": 0.8412190205146448, "grad_norm": 1.2220656056139696, "learning_rate": 4.585715002855706e-05, "loss": 0.4042, "step": 7094 }, { "epoch": 0.8413376022767698, "grad_norm": 1.4315612745030768, "learning_rate": 4.585582653674568e-05, "loss": 0.4769, "step": 7095 }, { "epoch": 0.8414561840388948, "grad_norm": 1.4934335763558044, "learning_rate": 4.5854502852669164e-05, "loss": 0.5221, "step": 7096 }, { "epoch": 0.8415747658010198, "grad_norm": 1.626095139825517, "learning_rate": 4.585317897633969e-05, "loss": 0.5574, "step": 7097 }, { "epoch": 0.8416933475631447, "grad_norm": 1.74507221386313, "learning_rate": 4.5851854907769494e-05, "loss": 0.5843, "step": 7098 }, { "epoch": 0.8418119293252698, "grad_norm": 1.3088064323612574, "learning_rate": 4.585053064697076e-05, "loss": 0.3782, "step": 7099 }, { "epoch": 0.8419305110873948, "grad_norm": 1.129915061734744, "learning_rate": 4.5849206193955696e-05, "loss": 0.421, "step": 7100 }, { "epoch": 0.8420490928495198, "grad_norm": 1.1488437001107814, "learning_rate": 4.5847881548736525e-05, "loss": 0.3437, "step": 7101 }, { "epoch": 0.8421676746116448, "grad_norm": 1.4538757355004113, "learning_rate": 4.584655671132545e-05, "loss": 0.6788, "step": 7102 }, { "epoch": 0.8422862563737697, "grad_norm": 1.051608260963614, "learning_rate": 4.584523168173468e-05, "loss": 0.3167, "step": 7103 }, { "epoch": 0.8424048381358947, "grad_norm": 1.0065907038271593, "learning_rate": 4.5843906459976444e-05, "loss": 0.2773, "step": 7104 }, { "epoch": 0.8425234198980197, "grad_norm": 1.2168388787979423, "learning_rate": 4.584258104606295e-05, "loss": 0.3864, "step": 7105 }, { "epoch": 0.8426420016601447, "grad_norm": 1.511921838918595, "learning_rate": 4.584125544000642e-05, "loss": 0.4618, "step": 7106 }, { "epoch": 0.8427605834222697, "grad_norm": 1.1175289116855611, "learning_rate": 4.583992964181906e-05, "loss": 0.3626, "step": 7107 }, { "epoch": 0.8428791651843947, "grad_norm": 1.9887317852487079, "learning_rate": 4.583860365151311e-05, "loss": 0.7368, "step": 7108 }, { "epoch": 0.8429977469465196, "grad_norm": 1.3783653745217441, "learning_rate": 4.583727746910079e-05, "loss": 0.4926, "step": 7109 }, { "epoch": 0.8431163287086446, "grad_norm": 1.5291942744241511, "learning_rate": 4.583595109459432e-05, "loss": 0.4481, "step": 7110 }, { "epoch": 0.8432349104707696, "grad_norm": 1.4389913859236114, "learning_rate": 4.5834624528005945e-05, "loss": 0.4191, "step": 7111 }, { "epoch": 0.8433534922328946, "grad_norm": 1.310822690491155, "learning_rate": 4.583329776934787e-05, "loss": 0.3674, "step": 7112 }, { "epoch": 0.8434720739950196, "grad_norm": 1.424451152306023, "learning_rate": 4.583197081863233e-05, "loss": 0.513, "step": 7113 }, { "epoch": 0.8435906557571445, "grad_norm": 1.6624625637064836, "learning_rate": 4.5830643675871575e-05, "loss": 0.5131, "step": 7114 }, { "epoch": 0.8437092375192695, "grad_norm": 1.196579809051718, "learning_rate": 4.5829316341077836e-05, "loss": 0.3587, "step": 7115 }, { "epoch": 0.8438278192813945, "grad_norm": 1.4197360568337678, "learning_rate": 4.5827988814263326e-05, "loss": 0.4254, "step": 7116 }, { "epoch": 0.8439464010435195, "grad_norm": 1.152824042999966, "learning_rate": 4.582666109544032e-05, "loss": 0.3434, "step": 7117 }, { "epoch": 0.8440649828056445, "grad_norm": 1.3464443804008592, "learning_rate": 4.582533318462102e-05, "loss": 0.4305, "step": 7118 }, { "epoch": 0.8441835645677694, "grad_norm": 1.3909288006190614, "learning_rate": 4.582400508181769e-05, "loss": 0.4006, "step": 7119 }, { "epoch": 0.8443021463298944, "grad_norm": 1.218464298333134, "learning_rate": 4.5822676787042576e-05, "loss": 0.3378, "step": 7120 }, { "epoch": 0.8444207280920194, "grad_norm": 1.6334404048672002, "learning_rate": 4.582134830030791e-05, "loss": 0.5409, "step": 7121 }, { "epoch": 0.8445393098541444, "grad_norm": 1.4340212694181875, "learning_rate": 4.582001962162595e-05, "loss": 0.3957, "step": 7122 }, { "epoch": 0.8446578916162694, "grad_norm": 1.2939794677996002, "learning_rate": 4.5818690751008944e-05, "loss": 0.2975, "step": 7123 }, { "epoch": 0.8447764733783945, "grad_norm": 1.6975760181967092, "learning_rate": 4.581736168846913e-05, "loss": 0.6343, "step": 7124 }, { "epoch": 0.8448950551405194, "grad_norm": 1.2706705289630686, "learning_rate": 4.581603243401877e-05, "loss": 0.3457, "step": 7125 }, { "epoch": 0.8450136369026444, "grad_norm": 1.4775136783990745, "learning_rate": 4.5814702987670116e-05, "loss": 0.4266, "step": 7126 }, { "epoch": 0.8451322186647694, "grad_norm": 1.2670524345010614, "learning_rate": 4.581337334943543e-05, "loss": 0.358, "step": 7127 }, { "epoch": 0.8452508004268944, "grad_norm": 1.3161225225340951, "learning_rate": 4.5812043519326964e-05, "loss": 0.4068, "step": 7128 }, { "epoch": 0.8453693821890194, "grad_norm": 1.3684417052894882, "learning_rate": 4.581071349735698e-05, "loss": 0.4186, "step": 7129 }, { "epoch": 0.8454879639511443, "grad_norm": 1.1282050336378227, "learning_rate": 4.5809383283537736e-05, "loss": 0.3188, "step": 7130 }, { "epoch": 0.8456065457132693, "grad_norm": 1.4499393382749237, "learning_rate": 4.580805287788149e-05, "loss": 0.3888, "step": 7131 }, { "epoch": 0.8457251274753943, "grad_norm": 1.181365451756441, "learning_rate": 4.580672228040052e-05, "loss": 0.3403, "step": 7132 }, { "epoch": 0.8458437092375193, "grad_norm": 1.2331775536551288, "learning_rate": 4.580539149110708e-05, "loss": 0.404, "step": 7133 }, { "epoch": 0.8459622909996443, "grad_norm": 1.0477784082142796, "learning_rate": 4.5804060510013453e-05, "loss": 0.3819, "step": 7134 }, { "epoch": 0.8460808727617692, "grad_norm": 1.0686391539434563, "learning_rate": 4.580272933713189e-05, "loss": 0.3071, "step": 7135 }, { "epoch": 0.8461994545238942, "grad_norm": 0.8335998591155342, "learning_rate": 4.580139797247468e-05, "loss": 0.2631, "step": 7136 }, { "epoch": 0.8463180362860192, "grad_norm": 1.5728601191348452, "learning_rate": 4.580006641605409e-05, "loss": 0.5338, "step": 7137 }, { "epoch": 0.8464366180481442, "grad_norm": 1.7777007394438038, "learning_rate": 4.579873466788238e-05, "loss": 0.5172, "step": 7138 }, { "epoch": 0.8465551998102692, "grad_norm": 1.3866859914169023, "learning_rate": 4.5797402727971863e-05, "loss": 0.483, "step": 7139 }, { "epoch": 0.8466737815723941, "grad_norm": 1.2534540039187196, "learning_rate": 4.579607059633478e-05, "loss": 0.406, "step": 7140 }, { "epoch": 0.8467923633345191, "grad_norm": 1.7880250666648718, "learning_rate": 4.5794738272983426e-05, "loss": 0.3913, "step": 7141 }, { "epoch": 0.8469109450966441, "grad_norm": 1.4751278428566217, "learning_rate": 4.57934057579301e-05, "loss": 0.4833, "step": 7142 }, { "epoch": 0.8470295268587691, "grad_norm": 1.0430644173990251, "learning_rate": 4.579207305118706e-05, "loss": 0.2729, "step": 7143 }, { "epoch": 0.8471481086208941, "grad_norm": 1.1248108711877975, "learning_rate": 4.579074015276661e-05, "loss": 0.2185, "step": 7144 }, { "epoch": 0.847266690383019, "grad_norm": 1.1208936854298543, "learning_rate": 4.578940706268102e-05, "loss": 0.3103, "step": 7145 }, { "epoch": 0.847385272145144, "grad_norm": 1.1388180427140813, "learning_rate": 4.57880737809426e-05, "loss": 0.2301, "step": 7146 }, { "epoch": 0.847503853907269, "grad_norm": 1.5256019270633212, "learning_rate": 4.5786740307563636e-05, "loss": 0.3796, "step": 7147 }, { "epoch": 0.847622435669394, "grad_norm": 1.0476899328724834, "learning_rate": 4.57854066425564e-05, "loss": 0.2943, "step": 7148 }, { "epoch": 0.8477410174315191, "grad_norm": 1.300570746925377, "learning_rate": 4.5784072785933226e-05, "loss": 0.3186, "step": 7149 }, { "epoch": 0.8478595991936441, "grad_norm": 1.5456489869381351, "learning_rate": 4.578273873770638e-05, "loss": 0.4647, "step": 7150 }, { "epoch": 0.847978180955769, "grad_norm": 1.429610395998424, "learning_rate": 4.578140449788816e-05, "loss": 0.4229, "step": 7151 }, { "epoch": 0.848096762717894, "grad_norm": 1.6774194032141285, "learning_rate": 4.578007006649088e-05, "loss": 0.4414, "step": 7152 }, { "epoch": 0.848215344480019, "grad_norm": 1.3601667902058143, "learning_rate": 4.577873544352683e-05, "loss": 0.3169, "step": 7153 }, { "epoch": 0.848333926242144, "grad_norm": 1.6154494276466942, "learning_rate": 4.577740062900833e-05, "loss": 0.4521, "step": 7154 }, { "epoch": 0.848452508004269, "grad_norm": 1.2351056370607754, "learning_rate": 4.5776065622947665e-05, "loss": 0.2803, "step": 7155 }, { "epoch": 0.848571089766394, "grad_norm": 1.9120271250490748, "learning_rate": 4.577473042535716e-05, "loss": 0.5532, "step": 7156 }, { "epoch": 0.8486896715285189, "grad_norm": 1.6792942219496667, "learning_rate": 4.577339503624911e-05, "loss": 0.4541, "step": 7157 }, { "epoch": 0.8488082532906439, "grad_norm": 1.2644364383548468, "learning_rate": 4.5772059455635844e-05, "loss": 0.415, "step": 7158 }, { "epoch": 0.8489268350527689, "grad_norm": 1.3922781116616507, "learning_rate": 4.577072368352965e-05, "loss": 0.3586, "step": 7159 }, { "epoch": 0.8490454168148939, "grad_norm": 0.935445422358795, "learning_rate": 4.576938771994286e-05, "loss": 0.26, "step": 7160 }, { "epoch": 0.8491639985770189, "grad_norm": 1.5077433486512617, "learning_rate": 4.576805156488778e-05, "loss": 0.4953, "step": 7161 }, { "epoch": 0.8492825803391438, "grad_norm": 1.3186123811017356, "learning_rate": 4.576671521837673e-05, "loss": 0.3984, "step": 7162 }, { "epoch": 0.8494011621012688, "grad_norm": 1.330872120112983, "learning_rate": 4.576537868042204e-05, "loss": 0.3489, "step": 7163 }, { "epoch": 0.8495197438633938, "grad_norm": 1.0891832917653554, "learning_rate": 4.576404195103602e-05, "loss": 0.3281, "step": 7164 }, { "epoch": 0.8496383256255188, "grad_norm": 1.515086410381966, "learning_rate": 4.576270503023099e-05, "loss": 0.4242, "step": 7165 }, { "epoch": 0.8497569073876438, "grad_norm": 1.7235485331824882, "learning_rate": 4.576136791801929e-05, "loss": 0.5893, "step": 7166 }, { "epoch": 0.8498754891497687, "grad_norm": 1.3256536449194125, "learning_rate": 4.576003061441323e-05, "loss": 0.3107, "step": 7167 }, { "epoch": 0.8499940709118937, "grad_norm": 1.3174273370121818, "learning_rate": 4.575869311942515e-05, "loss": 0.4094, "step": 7168 }, { "epoch": 0.8501126526740187, "grad_norm": 1.2243347140538134, "learning_rate": 4.5757355433067373e-05, "loss": 0.3756, "step": 7169 }, { "epoch": 0.8502312344361437, "grad_norm": 1.3082500947105287, "learning_rate": 4.5756017555352226e-05, "loss": 0.4354, "step": 7170 }, { "epoch": 0.8503498161982687, "grad_norm": 1.0801769316093435, "learning_rate": 4.5754679486292064e-05, "loss": 0.2775, "step": 7171 }, { "epoch": 0.8504683979603936, "grad_norm": 1.0242673999179286, "learning_rate": 4.57533412258992e-05, "loss": 0.294, "step": 7172 }, { "epoch": 0.8505869797225186, "grad_norm": 1.1738601544046217, "learning_rate": 4.575200277418598e-05, "loss": 0.3292, "step": 7173 }, { "epoch": 0.8507055614846437, "grad_norm": 1.4139951983912715, "learning_rate": 4.575066413116474e-05, "loss": 0.4115, "step": 7174 }, { "epoch": 0.8508241432467687, "grad_norm": 1.2164110478035621, "learning_rate": 4.574932529684783e-05, "loss": 0.3473, "step": 7175 }, { "epoch": 0.8509427250088937, "grad_norm": 1.2618028074409744, "learning_rate": 4.574798627124758e-05, "loss": 0.3162, "step": 7176 }, { "epoch": 0.8510613067710187, "grad_norm": 1.428061747159574, "learning_rate": 4.574664705437634e-05, "loss": 0.3791, "step": 7177 }, { "epoch": 0.8511798885331436, "grad_norm": 1.3850605702384007, "learning_rate": 4.574530764624645e-05, "loss": 0.4022, "step": 7178 }, { "epoch": 0.8512984702952686, "grad_norm": 1.4093269129504427, "learning_rate": 4.574396804687028e-05, "loss": 0.4825, "step": 7179 }, { "epoch": 0.8514170520573936, "grad_norm": 1.5341413164778281, "learning_rate": 4.574262825626014e-05, "loss": 0.3963, "step": 7180 }, { "epoch": 0.8515356338195186, "grad_norm": 1.3567623961945527, "learning_rate": 4.574128827442842e-05, "loss": 0.4112, "step": 7181 }, { "epoch": 0.8516542155816436, "grad_norm": 2.446230034976584, "learning_rate": 4.5739948101387455e-05, "loss": 0.7262, "step": 7182 }, { "epoch": 0.8517727973437685, "grad_norm": 1.2086296192747115, "learning_rate": 4.5738607737149594e-05, "loss": 0.342, "step": 7183 }, { "epoch": 0.8518913791058935, "grad_norm": 1.321827540265069, "learning_rate": 4.573726718172721e-05, "loss": 0.3345, "step": 7184 }, { "epoch": 0.8520099608680185, "grad_norm": 1.2018009716091063, "learning_rate": 4.5735926435132644e-05, "loss": 0.3284, "step": 7185 }, { "epoch": 0.8521285426301435, "grad_norm": 0.8700022424153238, "learning_rate": 4.573458549737827e-05, "loss": 0.2763, "step": 7186 }, { "epoch": 0.8522471243922685, "grad_norm": 1.3451856798328825, "learning_rate": 4.573324436847644e-05, "loss": 0.4308, "step": 7187 }, { "epoch": 0.8523657061543934, "grad_norm": 0.9938305128542443, "learning_rate": 4.573190304843953e-05, "loss": 0.2891, "step": 7188 }, { "epoch": 0.8524842879165184, "grad_norm": 1.450430468433095, "learning_rate": 4.573056153727989e-05, "loss": 0.4329, "step": 7189 }, { "epoch": 0.8526028696786434, "grad_norm": 1.151552524427243, "learning_rate": 4.572921983500989e-05, "loss": 0.3145, "step": 7190 }, { "epoch": 0.8527214514407684, "grad_norm": 1.07585458432924, "learning_rate": 4.57278779416419e-05, "loss": 0.3334, "step": 7191 }, { "epoch": 0.8528400332028934, "grad_norm": 1.9184042944349622, "learning_rate": 4.5726535857188316e-05, "loss": 0.7702, "step": 7192 }, { "epoch": 0.8529586149650183, "grad_norm": 1.2997294969661808, "learning_rate": 4.572519358166147e-05, "loss": 0.3647, "step": 7193 }, { "epoch": 0.8530771967271433, "grad_norm": 1.2397749364991366, "learning_rate": 4.572385111507376e-05, "loss": 0.3666, "step": 7194 }, { "epoch": 0.8531957784892683, "grad_norm": 1.3291678853883875, "learning_rate": 4.572250845743755e-05, "loss": 0.4318, "step": 7195 }, { "epoch": 0.8533143602513933, "grad_norm": 1.184146312080771, "learning_rate": 4.5721165608765225e-05, "loss": 0.385, "step": 7196 }, { "epoch": 0.8534329420135183, "grad_norm": 1.081240635788778, "learning_rate": 4.571982256906917e-05, "loss": 0.406, "step": 7197 }, { "epoch": 0.8535515237756434, "grad_norm": 1.2394027547660684, "learning_rate": 4.571847933836175e-05, "loss": 0.42, "step": 7198 }, { "epoch": 0.8536701055377683, "grad_norm": 1.212363036208342, "learning_rate": 4.571713591665536e-05, "loss": 0.3068, "step": 7199 }, { "epoch": 0.8537886872998933, "grad_norm": 1.6620619292836225, "learning_rate": 4.5715792303962386e-05, "loss": 0.5966, "step": 7200 }, { "epoch": 0.8539072690620183, "grad_norm": 1.4126581643668192, "learning_rate": 4.57144485002952e-05, "loss": 0.3321, "step": 7201 }, { "epoch": 0.8540258508241433, "grad_norm": 1.3820042969067414, "learning_rate": 4.5713104505666213e-05, "loss": 0.3713, "step": 7202 }, { "epoch": 0.8541444325862683, "grad_norm": 1.2879440354263358, "learning_rate": 4.5711760320087803e-05, "loss": 0.4177, "step": 7203 }, { "epoch": 0.8542630143483932, "grad_norm": 1.4790864268609756, "learning_rate": 4.571041594357235e-05, "loss": 0.4029, "step": 7204 }, { "epoch": 0.8543815961105182, "grad_norm": 1.5732935327504733, "learning_rate": 4.570907137613227e-05, "loss": 0.5195, "step": 7205 }, { "epoch": 0.8545001778726432, "grad_norm": 1.1219793877650712, "learning_rate": 4.5707726617779943e-05, "loss": 0.2925, "step": 7206 }, { "epoch": 0.8546187596347682, "grad_norm": 1.244067696569319, "learning_rate": 4.570638166852776e-05, "loss": 0.3946, "step": 7207 }, { "epoch": 0.8547373413968932, "grad_norm": 1.7764259090649517, "learning_rate": 4.570503652838815e-05, "loss": 0.4663, "step": 7208 }, { "epoch": 0.8548559231590181, "grad_norm": 1.2322336770989168, "learning_rate": 4.570369119737348e-05, "loss": 0.3218, "step": 7209 }, { "epoch": 0.8549745049211431, "grad_norm": 1.573261261542684, "learning_rate": 4.5702345675496166e-05, "loss": 0.4099, "step": 7210 }, { "epoch": 0.8550930866832681, "grad_norm": 1.2343368110775814, "learning_rate": 4.5700999962768614e-05, "loss": 0.2962, "step": 7211 }, { "epoch": 0.8552116684453931, "grad_norm": 1.6675441839032255, "learning_rate": 4.5699654059203225e-05, "loss": 0.474, "step": 7212 }, { "epoch": 0.8553302502075181, "grad_norm": 1.1715109092534155, "learning_rate": 4.569830796481241e-05, "loss": 0.3609, "step": 7213 }, { "epoch": 0.855448831969643, "grad_norm": 1.6089259214319938, "learning_rate": 4.5696961679608584e-05, "loss": 0.4562, "step": 7214 }, { "epoch": 0.855567413731768, "grad_norm": 1.4412469978839653, "learning_rate": 4.5695615203604146e-05, "loss": 0.3955, "step": 7215 }, { "epoch": 0.855685995493893, "grad_norm": 1.3827848078066278, "learning_rate": 4.5694268536811514e-05, "loss": 0.4146, "step": 7216 }, { "epoch": 0.855804577256018, "grad_norm": 1.2689538946340666, "learning_rate": 4.569292167924311e-05, "loss": 0.4168, "step": 7217 }, { "epoch": 0.855923159018143, "grad_norm": 1.6551243620809284, "learning_rate": 4.5691574630911336e-05, "loss": 0.4076, "step": 7218 }, { "epoch": 0.856041740780268, "grad_norm": 1.4175770036879314, "learning_rate": 4.5690227391828624e-05, "loss": 0.566, "step": 7219 }, { "epoch": 0.8561603225423929, "grad_norm": 1.6677587536128793, "learning_rate": 4.568887996200738e-05, "loss": 0.5586, "step": 7220 }, { "epoch": 0.8562789043045179, "grad_norm": 1.5621912674001708, "learning_rate": 4.5687532341460035e-05, "loss": 0.4438, "step": 7221 }, { "epoch": 0.8563974860666429, "grad_norm": 1.5197221377336454, "learning_rate": 4.568618453019901e-05, "loss": 0.4657, "step": 7222 }, { "epoch": 0.856516067828768, "grad_norm": 1.4254461731026589, "learning_rate": 4.568483652823673e-05, "loss": 0.48, "step": 7223 }, { "epoch": 0.856634649590893, "grad_norm": 1.4387945232129289, "learning_rate": 4.568348833558563e-05, "loss": 0.4482, "step": 7224 }, { "epoch": 0.856753231353018, "grad_norm": 1.1836569369377596, "learning_rate": 4.568213995225812e-05, "loss": 0.3391, "step": 7225 }, { "epoch": 0.8568718131151429, "grad_norm": 1.2195785612446952, "learning_rate": 4.568079137826665e-05, "loss": 0.4256, "step": 7226 }, { "epoch": 0.8569903948772679, "grad_norm": 1.4204226500928934, "learning_rate": 4.5679442613623644e-05, "loss": 0.3891, "step": 7227 }, { "epoch": 0.8571089766393929, "grad_norm": 1.3435542071392623, "learning_rate": 4.567809365834154e-05, "loss": 0.3556, "step": 7228 }, { "epoch": 0.8572275584015179, "grad_norm": 1.4999118970009129, "learning_rate": 4.567674451243276e-05, "loss": 0.4395, "step": 7229 }, { "epoch": 0.8573461401636429, "grad_norm": 1.1289444365423957, "learning_rate": 4.567539517590975e-05, "loss": 0.3238, "step": 7230 }, { "epoch": 0.8574647219257678, "grad_norm": 1.264474620617719, "learning_rate": 4.567404564878496e-05, "loss": 0.3813, "step": 7231 }, { "epoch": 0.8575833036878928, "grad_norm": 1.4625307855594791, "learning_rate": 4.5672695931070806e-05, "loss": 0.5091, "step": 7232 }, { "epoch": 0.8577018854500178, "grad_norm": 1.1142959631792297, "learning_rate": 4.567134602277976e-05, "loss": 0.3361, "step": 7233 }, { "epoch": 0.8578204672121428, "grad_norm": 1.4860897652517404, "learning_rate": 4.566999592392425e-05, "loss": 0.535, "step": 7234 }, { "epoch": 0.8579390489742678, "grad_norm": 1.5479028396742451, "learning_rate": 4.5668645634516726e-05, "loss": 0.4568, "step": 7235 }, { "epoch": 0.8580576307363927, "grad_norm": 1.2961927964718378, "learning_rate": 4.566729515456963e-05, "loss": 0.4268, "step": 7236 }, { "epoch": 0.8581762124985177, "grad_norm": 1.5087685748886726, "learning_rate": 4.566594448409542e-05, "loss": 0.3923, "step": 7237 }, { "epoch": 0.8582947942606427, "grad_norm": 1.0165692840669505, "learning_rate": 4.5664593623106544e-05, "loss": 0.2594, "step": 7238 }, { "epoch": 0.8584133760227677, "grad_norm": 2.277122188758163, "learning_rate": 4.566324257161545e-05, "loss": 0.6586, "step": 7239 }, { "epoch": 0.8585319577848927, "grad_norm": 0.9789638963805193, "learning_rate": 4.566189132963461e-05, "loss": 0.2685, "step": 7240 }, { "epoch": 0.8586505395470176, "grad_norm": 1.2959229486519026, "learning_rate": 4.566053989717646e-05, "loss": 0.3687, "step": 7241 }, { "epoch": 0.8587691213091426, "grad_norm": 1.90405346566684, "learning_rate": 4.5659188274253465e-05, "loss": 0.5954, "step": 7242 }, { "epoch": 0.8588877030712676, "grad_norm": 1.407913105975911, "learning_rate": 4.56578364608781e-05, "loss": 0.5968, "step": 7243 }, { "epoch": 0.8590062848333926, "grad_norm": 1.4751038551192563, "learning_rate": 4.565648445706281e-05, "loss": 0.3709, "step": 7244 }, { "epoch": 0.8591248665955176, "grad_norm": 1.4039496073803597, "learning_rate": 4.5655132262820056e-05, "loss": 0.4476, "step": 7245 }, { "epoch": 0.8592434483576425, "grad_norm": 1.1966749253824351, "learning_rate": 4.565377987816232e-05, "loss": 0.3857, "step": 7246 }, { "epoch": 0.8593620301197675, "grad_norm": 1.535926851951442, "learning_rate": 4.565242730310206e-05, "loss": 0.4, "step": 7247 }, { "epoch": 0.8594806118818926, "grad_norm": 1.3177854834484768, "learning_rate": 4.565107453765174e-05, "loss": 0.4102, "step": 7248 }, { "epoch": 0.8595991936440176, "grad_norm": 1.1153218508366356, "learning_rate": 4.564972158182384e-05, "loss": 0.3483, "step": 7249 }, { "epoch": 0.8597177754061426, "grad_norm": 0.9991740844194126, "learning_rate": 4.5648368435630834e-05, "loss": 0.3274, "step": 7250 }, { "epoch": 0.8598363571682676, "grad_norm": 1.1991559897951332, "learning_rate": 4.564701509908519e-05, "loss": 0.3228, "step": 7251 }, { "epoch": 0.8599549389303925, "grad_norm": 1.580928061408572, "learning_rate": 4.564566157219938e-05, "loss": 0.6491, "step": 7252 }, { "epoch": 0.8600735206925175, "grad_norm": 1.3620669990585974, "learning_rate": 4.5644307854985894e-05, "loss": 0.4298, "step": 7253 }, { "epoch": 0.8601921024546425, "grad_norm": 1.2506920734641562, "learning_rate": 4.56429539474572e-05, "loss": 0.3907, "step": 7254 }, { "epoch": 0.8603106842167675, "grad_norm": 1.81220072272726, "learning_rate": 4.5641599849625794e-05, "loss": 0.549, "step": 7255 }, { "epoch": 0.8604292659788925, "grad_norm": 1.6901194818035248, "learning_rate": 4.564024556150414e-05, "loss": 0.5167, "step": 7256 }, { "epoch": 0.8605478477410174, "grad_norm": 1.6550745626631662, "learning_rate": 4.5638891083104743e-05, "loss": 0.5885, "step": 7257 }, { "epoch": 0.8606664295031424, "grad_norm": 1.196782985394039, "learning_rate": 4.5637536414440065e-05, "loss": 0.3136, "step": 7258 }, { "epoch": 0.8607850112652674, "grad_norm": 1.247181198969873, "learning_rate": 4.563618155552262e-05, "loss": 0.4084, "step": 7259 }, { "epoch": 0.8609035930273924, "grad_norm": 1.2218942297562996, "learning_rate": 4.563482650636488e-05, "loss": 0.2182, "step": 7260 }, { "epoch": 0.8610221747895174, "grad_norm": 1.2992411118129639, "learning_rate": 4.563347126697935e-05, "loss": 0.4852, "step": 7261 }, { "epoch": 0.8611407565516424, "grad_norm": 1.351514278724903, "learning_rate": 4.563211583737851e-05, "loss": 0.4644, "step": 7262 }, { "epoch": 0.8612593383137673, "grad_norm": 1.4680034859532027, "learning_rate": 4.563076021757487e-05, "loss": 0.4816, "step": 7263 }, { "epoch": 0.8613779200758923, "grad_norm": 1.3502992356354593, "learning_rate": 4.562940440758091e-05, "loss": 0.3825, "step": 7264 }, { "epoch": 0.8614965018380173, "grad_norm": 1.0757651705675577, "learning_rate": 4.5628048407409155e-05, "loss": 0.3005, "step": 7265 }, { "epoch": 0.8616150836001423, "grad_norm": 1.6180130033516864, "learning_rate": 4.5626692217072074e-05, "loss": 0.5993, "step": 7266 }, { "epoch": 0.8617336653622673, "grad_norm": 1.5881343174749234, "learning_rate": 4.5625335836582195e-05, "loss": 0.533, "step": 7267 }, { "epoch": 0.8618522471243922, "grad_norm": 1.3225607468519462, "learning_rate": 4.562397926595201e-05, "loss": 0.506, "step": 7268 }, { "epoch": 0.8619708288865172, "grad_norm": 1.2355348588850255, "learning_rate": 4.5622622505194024e-05, "loss": 0.3525, "step": 7269 }, { "epoch": 0.8620894106486422, "grad_norm": 1.5086741503387628, "learning_rate": 4.562126555432075e-05, "loss": 0.5134, "step": 7270 }, { "epoch": 0.8622079924107672, "grad_norm": 1.6567325945214533, "learning_rate": 4.5619908413344694e-05, "loss": 0.5708, "step": 7271 }, { "epoch": 0.8623265741728922, "grad_norm": 1.337268046844791, "learning_rate": 4.561855108227836e-05, "loss": 0.299, "step": 7272 }, { "epoch": 0.8624451559350172, "grad_norm": 1.8350238533769732, "learning_rate": 4.561719356113428e-05, "loss": 0.4984, "step": 7273 }, { "epoch": 0.8625637376971422, "grad_norm": 1.4626656977901709, "learning_rate": 4.5615835849924955e-05, "loss": 0.4396, "step": 7274 }, { "epoch": 0.8626823194592672, "grad_norm": 1.3794101013005622, "learning_rate": 4.561447794866291e-05, "loss": 0.4039, "step": 7275 }, { "epoch": 0.8628009012213922, "grad_norm": 1.4743154596282135, "learning_rate": 4.561311985736064e-05, "loss": 0.4753, "step": 7276 }, { "epoch": 0.8629194829835172, "grad_norm": 1.3044749067279588, "learning_rate": 4.56117615760307e-05, "loss": 0.448, "step": 7277 }, { "epoch": 0.8630380647456422, "grad_norm": 1.2313529451815786, "learning_rate": 4.561040310468558e-05, "loss": 0.3353, "step": 7278 }, { "epoch": 0.8631566465077671, "grad_norm": 0.9761117459170572, "learning_rate": 4.5609044443337834e-05, "loss": 0.2572, "step": 7279 }, { "epoch": 0.8632752282698921, "grad_norm": 1.4772815452986243, "learning_rate": 4.560768559199996e-05, "loss": 0.4457, "step": 7280 }, { "epoch": 0.8633938100320171, "grad_norm": 1.5556919850799436, "learning_rate": 4.56063265506845e-05, "loss": 0.5264, "step": 7281 }, { "epoch": 0.8635123917941421, "grad_norm": 1.3856253910314773, "learning_rate": 4.560496731940398e-05, "loss": 0.3668, "step": 7282 }, { "epoch": 0.8636309735562671, "grad_norm": 1.327469926517054, "learning_rate": 4.560360789817092e-05, "loss": 0.4575, "step": 7283 }, { "epoch": 0.863749555318392, "grad_norm": 1.2585881078908525, "learning_rate": 4.560224828699786e-05, "loss": 0.4443, "step": 7284 }, { "epoch": 0.863868137080517, "grad_norm": 1.4485166280687547, "learning_rate": 4.5600888485897346e-05, "loss": 0.3869, "step": 7285 }, { "epoch": 0.863986718842642, "grad_norm": 1.117627504897691, "learning_rate": 4.55995284948819e-05, "loss": 0.333, "step": 7286 }, { "epoch": 0.864105300604767, "grad_norm": 1.612365582833256, "learning_rate": 4.5598168313964064e-05, "loss": 0.4684, "step": 7287 }, { "epoch": 0.864223882366892, "grad_norm": 1.3844785365958128, "learning_rate": 4.559680794315637e-05, "loss": 0.4613, "step": 7288 }, { "epoch": 0.8643424641290169, "grad_norm": 1.4126870795048978, "learning_rate": 4.559544738247136e-05, "loss": 0.4052, "step": 7289 }, { "epoch": 0.8644610458911419, "grad_norm": 1.998573003667266, "learning_rate": 4.5594086631921584e-05, "loss": 0.5056, "step": 7290 }, { "epoch": 0.8645796276532669, "grad_norm": 1.2040630968484565, "learning_rate": 4.559272569151959e-05, "loss": 0.35, "step": 7291 }, { "epoch": 0.8646982094153919, "grad_norm": 1.65640061563667, "learning_rate": 4.559136456127792e-05, "loss": 0.4595, "step": 7292 }, { "epoch": 0.8648167911775169, "grad_norm": 1.4814822336600904, "learning_rate": 4.559000324120911e-05, "loss": 0.367, "step": 7293 }, { "epoch": 0.8649353729396418, "grad_norm": 1.5132246235247184, "learning_rate": 4.558864173132572e-05, "loss": 0.5336, "step": 7294 }, { "epoch": 0.8650539547017668, "grad_norm": 1.1176134558115531, "learning_rate": 4.55872800316403e-05, "loss": 0.3577, "step": 7295 }, { "epoch": 0.8651725364638918, "grad_norm": 1.090306694268348, "learning_rate": 4.558591814216541e-05, "loss": 0.2857, "step": 7296 }, { "epoch": 0.8652911182260168, "grad_norm": 1.433719317049325, "learning_rate": 4.558455606291359e-05, "loss": 0.4027, "step": 7297 }, { "epoch": 0.8654096999881419, "grad_norm": 1.6721417577004416, "learning_rate": 4.5583193793897414e-05, "loss": 0.5055, "step": 7298 }, { "epoch": 0.8655282817502669, "grad_norm": 1.2089968469512535, "learning_rate": 4.558183133512943e-05, "loss": 0.4274, "step": 7299 }, { "epoch": 0.8656468635123918, "grad_norm": 1.6055891498798285, "learning_rate": 4.5580468686622194e-05, "loss": 0.5293, "step": 7300 }, { "epoch": 0.8657654452745168, "grad_norm": 1.4425252929514543, "learning_rate": 4.557910584838828e-05, "loss": 0.3672, "step": 7301 }, { "epoch": 0.8658840270366418, "grad_norm": 1.2747186941396376, "learning_rate": 4.557774282044025e-05, "loss": 0.4354, "step": 7302 }, { "epoch": 0.8660026087987668, "grad_norm": 1.4102791704856672, "learning_rate": 4.557637960279066e-05, "loss": 0.4173, "step": 7303 }, { "epoch": 0.8661211905608918, "grad_norm": 1.2143207345699873, "learning_rate": 4.557501619545208e-05, "loss": 0.3154, "step": 7304 }, { "epoch": 0.8662397723230167, "grad_norm": 1.176887771433976, "learning_rate": 4.557365259843708e-05, "loss": 0.3401, "step": 7305 }, { "epoch": 0.8663583540851417, "grad_norm": 1.6748457475953071, "learning_rate": 4.557228881175824e-05, "loss": 0.3923, "step": 7306 }, { "epoch": 0.8664769358472667, "grad_norm": 1.8718797406931287, "learning_rate": 4.557092483542812e-05, "loss": 0.648, "step": 7307 }, { "epoch": 0.8665955176093917, "grad_norm": 1.3440814574674707, "learning_rate": 4.55695606694593e-05, "loss": 0.398, "step": 7308 }, { "epoch": 0.8667140993715167, "grad_norm": 1.2981662704299994, "learning_rate": 4.5568196313864356e-05, "loss": 0.3446, "step": 7309 }, { "epoch": 0.8668326811336416, "grad_norm": 1.1927849497728173, "learning_rate": 4.556683176865586e-05, "loss": 0.4024, "step": 7310 }, { "epoch": 0.8669512628957666, "grad_norm": 1.257503295530811, "learning_rate": 4.5565467033846406e-05, "loss": 0.3471, "step": 7311 }, { "epoch": 0.8670698446578916, "grad_norm": 1.41468994836031, "learning_rate": 4.5564102109448554e-05, "loss": 0.4202, "step": 7312 }, { "epoch": 0.8671884264200166, "grad_norm": 1.4653973129115616, "learning_rate": 4.55627369954749e-05, "loss": 0.5187, "step": 7313 }, { "epoch": 0.8673070081821416, "grad_norm": 1.2161406509889499, "learning_rate": 4.5561371691938024e-05, "loss": 0.2886, "step": 7314 }, { "epoch": 0.8674255899442666, "grad_norm": 1.2474042306496682, "learning_rate": 4.5560006198850526e-05, "loss": 0.3675, "step": 7315 }, { "epoch": 0.8675441717063915, "grad_norm": 2.017584314892035, "learning_rate": 4.555864051622497e-05, "loss": 0.6731, "step": 7316 }, { "epoch": 0.8676627534685165, "grad_norm": 1.5942216169585766, "learning_rate": 4.5557274644073974e-05, "loss": 0.4822, "step": 7317 }, { "epoch": 0.8677813352306415, "grad_norm": 1.3491880081944014, "learning_rate": 4.55559085824101e-05, "loss": 0.3458, "step": 7318 }, { "epoch": 0.8678999169927665, "grad_norm": 1.1501681512053088, "learning_rate": 4.555454233124596e-05, "loss": 0.3004, "step": 7319 }, { "epoch": 0.8680184987548915, "grad_norm": 1.4401299590622108, "learning_rate": 4.555317589059415e-05, "loss": 0.3717, "step": 7320 }, { "epoch": 0.8681370805170164, "grad_norm": 1.1386699355063572, "learning_rate": 4.555180926046726e-05, "loss": 0.3464, "step": 7321 }, { "epoch": 0.8682556622791414, "grad_norm": 1.4254965165847429, "learning_rate": 4.5550442440877896e-05, "loss": 0.5318, "step": 7322 }, { "epoch": 0.8683742440412665, "grad_norm": 1.2053035241929624, "learning_rate": 4.554907543183865e-05, "loss": 0.3676, "step": 7323 }, { "epoch": 0.8684928258033915, "grad_norm": 1.8512066705449168, "learning_rate": 4.5547708233362117e-05, "loss": 0.7014, "step": 7324 }, { "epoch": 0.8686114075655165, "grad_norm": 1.117403592933197, "learning_rate": 4.554634084546092e-05, "loss": 0.3245, "step": 7325 }, { "epoch": 0.8687299893276414, "grad_norm": 1.5440509552945505, "learning_rate": 4.5544973268147664e-05, "loss": 0.4129, "step": 7326 }, { "epoch": 0.8688485710897664, "grad_norm": 1.2824227864574294, "learning_rate": 4.5543605501434935e-05, "loss": 0.387, "step": 7327 }, { "epoch": 0.8689671528518914, "grad_norm": 1.0650820880751042, "learning_rate": 4.554223754533536e-05, "loss": 0.2984, "step": 7328 }, { "epoch": 0.8690857346140164, "grad_norm": 1.0576952981903065, "learning_rate": 4.554086939986155e-05, "loss": 0.3245, "step": 7329 }, { "epoch": 0.8692043163761414, "grad_norm": 1.4285320037603566, "learning_rate": 4.553950106502611e-05, "loss": 0.4047, "step": 7330 }, { "epoch": 0.8693228981382664, "grad_norm": 1.329206109413159, "learning_rate": 4.553813254084166e-05, "loss": 0.3306, "step": 7331 }, { "epoch": 0.8694414799003913, "grad_norm": 1.8243529509906597, "learning_rate": 4.55367638273208e-05, "loss": 0.4869, "step": 7332 }, { "epoch": 0.8695600616625163, "grad_norm": 1.2139700087511072, "learning_rate": 4.553539492447618e-05, "loss": 0.3355, "step": 7333 }, { "epoch": 0.8696786434246413, "grad_norm": 1.3971359873504205, "learning_rate": 4.5534025832320395e-05, "loss": 0.4779, "step": 7334 }, { "epoch": 0.8697972251867663, "grad_norm": 1.2427392172016494, "learning_rate": 4.5532656550866066e-05, "loss": 0.3066, "step": 7335 }, { "epoch": 0.8699158069488913, "grad_norm": 1.71943801000163, "learning_rate": 4.553128708012583e-05, "loss": 0.4433, "step": 7336 }, { "epoch": 0.8700343887110162, "grad_norm": 1.3957801775648397, "learning_rate": 4.55299174201123e-05, "loss": 0.3785, "step": 7337 }, { "epoch": 0.8701529704731412, "grad_norm": 1.7273384349037717, "learning_rate": 4.552854757083811e-05, "loss": 0.5541, "step": 7338 }, { "epoch": 0.8702715522352662, "grad_norm": 1.88961035694688, "learning_rate": 4.552717753231588e-05, "loss": 0.5279, "step": 7339 }, { "epoch": 0.8703901339973912, "grad_norm": 1.172766369151261, "learning_rate": 4.552580730455824e-05, "loss": 0.3119, "step": 7340 }, { "epoch": 0.8705087157595162, "grad_norm": 1.1879080005085287, "learning_rate": 4.552443688757784e-05, "loss": 0.3303, "step": 7341 }, { "epoch": 0.8706272975216411, "grad_norm": 1.5657196552764294, "learning_rate": 4.552306628138729e-05, "loss": 0.4455, "step": 7342 }, { "epoch": 0.8707458792837661, "grad_norm": 1.51141049630003, "learning_rate": 4.552169548599925e-05, "loss": 0.4988, "step": 7343 }, { "epoch": 0.8708644610458911, "grad_norm": 1.4465151909557281, "learning_rate": 4.552032450142633e-05, "loss": 0.4373, "step": 7344 }, { "epoch": 0.8709830428080161, "grad_norm": 1.304997528595201, "learning_rate": 4.551895332768119e-05, "loss": 0.3168, "step": 7345 }, { "epoch": 0.8711016245701411, "grad_norm": 1.4006279009759413, "learning_rate": 4.551758196477646e-05, "loss": 0.4118, "step": 7346 }, { "epoch": 0.871220206332266, "grad_norm": 1.6058380543940962, "learning_rate": 4.551621041272478e-05, "loss": 0.3664, "step": 7347 }, { "epoch": 0.8713387880943911, "grad_norm": 1.282195515787323, "learning_rate": 4.5514838671538804e-05, "loss": 0.2976, "step": 7348 }, { "epoch": 0.8714573698565161, "grad_norm": 1.139299830685338, "learning_rate": 4.551346674123117e-05, "loss": 0.2998, "step": 7349 }, { "epoch": 0.8715759516186411, "grad_norm": 1.6773530100454919, "learning_rate": 4.551209462181453e-05, "loss": 0.4718, "step": 7350 }, { "epoch": 0.8716945333807661, "grad_norm": 1.4767097813520451, "learning_rate": 4.551072231330153e-05, "loss": 0.5514, "step": 7351 }, { "epoch": 0.8718131151428911, "grad_norm": 1.3425300748359141, "learning_rate": 4.550934981570482e-05, "loss": 0.497, "step": 7352 }, { "epoch": 0.871931696905016, "grad_norm": 1.1034425783569044, "learning_rate": 4.550797712903705e-05, "loss": 0.2837, "step": 7353 }, { "epoch": 0.872050278667141, "grad_norm": 1.576009466773589, "learning_rate": 4.5506604253310895e-05, "loss": 0.5901, "step": 7354 }, { "epoch": 0.872168860429266, "grad_norm": 1.4206196372364686, "learning_rate": 4.5505231188538974e-05, "loss": 0.3551, "step": 7355 }, { "epoch": 0.872287442191391, "grad_norm": 1.4692112578420655, "learning_rate": 4.5503857934733985e-05, "loss": 0.4228, "step": 7356 }, { "epoch": 0.872406023953516, "grad_norm": 1.359715528153113, "learning_rate": 4.550248449190856e-05, "loss": 0.4521, "step": 7357 }, { "epoch": 0.8725246057156409, "grad_norm": 1.1886360175929769, "learning_rate": 4.550111086007538e-05, "loss": 0.2916, "step": 7358 }, { "epoch": 0.8726431874777659, "grad_norm": 1.4211127106401604, "learning_rate": 4.5499737039247084e-05, "loss": 0.4596, "step": 7359 }, { "epoch": 0.8727617692398909, "grad_norm": 1.14806114369829, "learning_rate": 4.5498363029436364e-05, "loss": 0.3095, "step": 7360 }, { "epoch": 0.8728803510020159, "grad_norm": 1.3734226065174444, "learning_rate": 4.5496988830655866e-05, "loss": 0.4486, "step": 7361 }, { "epoch": 0.8729989327641409, "grad_norm": 1.1246872719593164, "learning_rate": 4.549561444291827e-05, "loss": 0.3625, "step": 7362 }, { "epoch": 0.8731175145262658, "grad_norm": 1.216917599264302, "learning_rate": 4.549423986623623e-05, "loss": 0.3113, "step": 7363 }, { "epoch": 0.8732360962883908, "grad_norm": 1.3633339304097434, "learning_rate": 4.549286510062245e-05, "loss": 0.3906, "step": 7364 }, { "epoch": 0.8733546780505158, "grad_norm": 1.1557294005356344, "learning_rate": 4.549149014608957e-05, "loss": 0.3898, "step": 7365 }, { "epoch": 0.8734732598126408, "grad_norm": 1.4791551645135457, "learning_rate": 4.5490115002650286e-05, "loss": 0.4225, "step": 7366 }, { "epoch": 0.8735918415747658, "grad_norm": 1.2526673774739785, "learning_rate": 4.548873967031727e-05, "loss": 0.3602, "step": 7367 }, { "epoch": 0.8737104233368908, "grad_norm": 1.283930356058675, "learning_rate": 4.5487364149103185e-05, "loss": 0.335, "step": 7368 }, { "epoch": 0.8738290050990157, "grad_norm": 1.2593852666426293, "learning_rate": 4.548598843902074e-05, "loss": 0.3793, "step": 7369 }, { "epoch": 0.8739475868611407, "grad_norm": 1.2698143995947708, "learning_rate": 4.5484612540082594e-05, "loss": 0.4456, "step": 7370 }, { "epoch": 0.8740661686232657, "grad_norm": 1.309544748815254, "learning_rate": 4.548323645230145e-05, "loss": 0.4765, "step": 7371 }, { "epoch": 0.8741847503853907, "grad_norm": 1.246448442342234, "learning_rate": 4.5481860175689974e-05, "loss": 0.4613, "step": 7372 }, { "epoch": 0.8743033321475158, "grad_norm": 1.133107490221832, "learning_rate": 4.548048371026087e-05, "loss": 0.2896, "step": 7373 }, { "epoch": 0.8744219139096407, "grad_norm": 1.5834921412981975, "learning_rate": 4.547910705602682e-05, "loss": 0.5313, "step": 7374 }, { "epoch": 0.8745404956717657, "grad_norm": 1.6771592893318699, "learning_rate": 4.547773021300051e-05, "loss": 0.5145, "step": 7375 }, { "epoch": 0.8746590774338907, "grad_norm": 1.5251563510450794, "learning_rate": 4.5476353181194654e-05, "loss": 0.4674, "step": 7376 }, { "epoch": 0.8747776591960157, "grad_norm": 1.4855851966590392, "learning_rate": 4.547497596062192e-05, "loss": 0.4321, "step": 7377 }, { "epoch": 0.8748962409581407, "grad_norm": 1.1008718533772521, "learning_rate": 4.547359855129502e-05, "loss": 0.3546, "step": 7378 }, { "epoch": 0.8750148227202657, "grad_norm": 1.1344392645136292, "learning_rate": 4.547222095322664e-05, "loss": 0.2922, "step": 7379 }, { "epoch": 0.8751334044823906, "grad_norm": 1.1223864985053509, "learning_rate": 4.54708431664295e-05, "loss": 0.3644, "step": 7380 }, { "epoch": 0.8752519862445156, "grad_norm": 1.140189346351853, "learning_rate": 4.546946519091628e-05, "loss": 0.3066, "step": 7381 }, { "epoch": 0.8753705680066406, "grad_norm": 1.6984563160798498, "learning_rate": 4.546808702669969e-05, "loss": 0.5904, "step": 7382 }, { "epoch": 0.8754891497687656, "grad_norm": 0.8285924862770526, "learning_rate": 4.546670867379245e-05, "loss": 0.2548, "step": 7383 }, { "epoch": 0.8756077315308906, "grad_norm": 1.771462391051809, "learning_rate": 4.546533013220724e-05, "loss": 0.5366, "step": 7384 }, { "epoch": 0.8757263132930155, "grad_norm": 1.2733237047674875, "learning_rate": 4.5463951401956784e-05, "loss": 0.4741, "step": 7385 }, { "epoch": 0.8758448950551405, "grad_norm": 1.3081674955061557, "learning_rate": 4.54625724830538e-05, "loss": 0.4689, "step": 7386 }, { "epoch": 0.8759634768172655, "grad_norm": 1.2106821970544333, "learning_rate": 4.546119337551098e-05, "loss": 0.4048, "step": 7387 }, { "epoch": 0.8760820585793905, "grad_norm": 1.497032896249036, "learning_rate": 4.5459814079341054e-05, "loss": 0.4431, "step": 7388 }, { "epoch": 0.8762006403415155, "grad_norm": 1.3555800419645154, "learning_rate": 4.545843459455673e-05, "loss": 0.4848, "step": 7389 }, { "epoch": 0.8763192221036404, "grad_norm": 1.3329015842868543, "learning_rate": 4.545705492117073e-05, "loss": 0.3176, "step": 7390 }, { "epoch": 0.8764378038657654, "grad_norm": 1.2645174177689273, "learning_rate": 4.545567505919577e-05, "loss": 0.4389, "step": 7391 }, { "epoch": 0.8765563856278904, "grad_norm": 1.623111428326522, "learning_rate": 4.545429500864456e-05, "loss": 0.5109, "step": 7392 }, { "epoch": 0.8766749673900154, "grad_norm": 1.1740662203383168, "learning_rate": 4.545291476952985e-05, "loss": 0.3536, "step": 7393 }, { "epoch": 0.8767935491521404, "grad_norm": 1.6322385074365713, "learning_rate": 4.545153434186433e-05, "loss": 0.6483, "step": 7394 }, { "epoch": 0.8769121309142653, "grad_norm": 1.6570657756963805, "learning_rate": 4.5450153725660755e-05, "loss": 0.5423, "step": 7395 }, { "epoch": 0.8770307126763903, "grad_norm": 1.238899546264121, "learning_rate": 4.5448772920931836e-05, "loss": 0.335, "step": 7396 }, { "epoch": 0.8771492944385153, "grad_norm": 1.2575776314704383, "learning_rate": 4.544739192769031e-05, "loss": 0.4462, "step": 7397 }, { "epoch": 0.8772678762006404, "grad_norm": 1.2907989550238699, "learning_rate": 4.5446010745948895e-05, "loss": 0.3954, "step": 7398 }, { "epoch": 0.8773864579627654, "grad_norm": 1.2403822736466519, "learning_rate": 4.544462937572034e-05, "loss": 0.3314, "step": 7399 }, { "epoch": 0.8775050397248904, "grad_norm": 0.8921602166907387, "learning_rate": 4.544324781701737e-05, "loss": 0.2817, "step": 7400 }, { "epoch": 0.8776236214870153, "grad_norm": 1.1047767010083496, "learning_rate": 4.544186606985273e-05, "loss": 0.3303, "step": 7401 }, { "epoch": 0.8777422032491403, "grad_norm": 1.3439043672133621, "learning_rate": 4.5440484134239146e-05, "loss": 0.3863, "step": 7402 }, { "epoch": 0.8778607850112653, "grad_norm": 1.2147159944492925, "learning_rate": 4.5439102010189375e-05, "loss": 0.4477, "step": 7403 }, { "epoch": 0.8779793667733903, "grad_norm": 1.455164523621153, "learning_rate": 4.543771969771613e-05, "loss": 0.541, "step": 7404 }, { "epoch": 0.8780979485355153, "grad_norm": 1.2539517111536944, "learning_rate": 4.543633719683219e-05, "loss": 0.3689, "step": 7405 }, { "epoch": 0.8782165302976402, "grad_norm": 1.3011222947766705, "learning_rate": 4.543495450755027e-05, "loss": 0.4651, "step": 7406 }, { "epoch": 0.8783351120597652, "grad_norm": 1.485531209048176, "learning_rate": 4.5433571629883134e-05, "loss": 0.5985, "step": 7407 }, { "epoch": 0.8784536938218902, "grad_norm": 1.6911435362466862, "learning_rate": 4.5432188563843516e-05, "loss": 0.4357, "step": 7408 }, { "epoch": 0.8785722755840152, "grad_norm": 1.8140269950860681, "learning_rate": 4.543080530944418e-05, "loss": 0.6002, "step": 7409 }, { "epoch": 0.8786908573461402, "grad_norm": 1.3130561502283085, "learning_rate": 4.542942186669788e-05, "loss": 0.4256, "step": 7410 }, { "epoch": 0.8788094391082651, "grad_norm": 1.4585855044897411, "learning_rate": 4.5428038235617356e-05, "loss": 0.4432, "step": 7411 }, { "epoch": 0.8789280208703901, "grad_norm": 1.2304939384987652, "learning_rate": 4.542665441621537e-05, "loss": 0.3274, "step": 7412 }, { "epoch": 0.8790466026325151, "grad_norm": 1.1922538363357853, "learning_rate": 4.5425270408504676e-05, "loss": 0.3028, "step": 7413 }, { "epoch": 0.8791651843946401, "grad_norm": 1.0594932421220316, "learning_rate": 4.542388621249805e-05, "loss": 0.3196, "step": 7414 }, { "epoch": 0.8792837661567651, "grad_norm": 1.3611899596243262, "learning_rate": 4.542250182820822e-05, "loss": 0.3945, "step": 7415 }, { "epoch": 0.87940234791889, "grad_norm": 1.1249516245148061, "learning_rate": 4.542111725564798e-05, "loss": 0.3685, "step": 7416 }, { "epoch": 0.879520929681015, "grad_norm": 1.3807266475930489, "learning_rate": 4.541973249483008e-05, "loss": 0.3935, "step": 7417 }, { "epoch": 0.87963951144314, "grad_norm": 1.6217665528333411, "learning_rate": 4.5418347545767276e-05, "loss": 0.3805, "step": 7418 }, { "epoch": 0.879758093205265, "grad_norm": 1.4248023288678837, "learning_rate": 4.5416962408472354e-05, "loss": 0.5099, "step": 7419 }, { "epoch": 0.87987667496739, "grad_norm": 1.4665969005309927, "learning_rate": 4.541557708295807e-05, "loss": 0.3685, "step": 7420 }, { "epoch": 0.879995256729515, "grad_norm": 1.2759976079611317, "learning_rate": 4.541419156923721e-05, "loss": 0.3833, "step": 7421 }, { "epoch": 0.8801138384916399, "grad_norm": 1.364078508758613, "learning_rate": 4.541280586732253e-05, "loss": 0.5891, "step": 7422 }, { "epoch": 0.880232420253765, "grad_norm": 1.1495915234589507, "learning_rate": 4.541141997722682e-05, "loss": 0.3645, "step": 7423 }, { "epoch": 0.88035100201589, "grad_norm": 1.1085110813404089, "learning_rate": 4.541003389896284e-05, "loss": 0.2659, "step": 7424 }, { "epoch": 0.880469583778015, "grad_norm": 0.927660882508683, "learning_rate": 4.5408647632543374e-05, "loss": 0.2395, "step": 7425 }, { "epoch": 0.88058816554014, "grad_norm": 1.7633260255846206, "learning_rate": 4.540726117798121e-05, "loss": 0.53, "step": 7426 }, { "epoch": 0.880706747302265, "grad_norm": 1.5225850659388012, "learning_rate": 4.540587453528912e-05, "loss": 0.4563, "step": 7427 }, { "epoch": 0.8808253290643899, "grad_norm": 1.1524167988124348, "learning_rate": 4.540448770447989e-05, "loss": 0.3048, "step": 7428 }, { "epoch": 0.8809439108265149, "grad_norm": 1.458953566367985, "learning_rate": 4.540310068556631e-05, "loss": 0.438, "step": 7429 }, { "epoch": 0.8810624925886399, "grad_norm": 1.5565289837742644, "learning_rate": 4.540171347856116e-05, "loss": 0.3996, "step": 7430 }, { "epoch": 0.8811810743507649, "grad_norm": 1.1035806845238953, "learning_rate": 4.540032608347722e-05, "loss": 0.2983, "step": 7431 }, { "epoch": 0.8812996561128899, "grad_norm": 1.3940222241105604, "learning_rate": 4.5398938500327306e-05, "loss": 0.2827, "step": 7432 }, { "epoch": 0.8814182378750148, "grad_norm": 1.4378767981369645, "learning_rate": 4.539755072912418e-05, "loss": 0.41, "step": 7433 }, { "epoch": 0.8815368196371398, "grad_norm": 1.2877424340437988, "learning_rate": 4.539616276988066e-05, "loss": 0.3699, "step": 7434 }, { "epoch": 0.8816554013992648, "grad_norm": 1.228235176875652, "learning_rate": 4.5394774622609525e-05, "loss": 0.365, "step": 7435 }, { "epoch": 0.8817739831613898, "grad_norm": 1.3388305434404646, "learning_rate": 4.539338628732358e-05, "loss": 0.4483, "step": 7436 }, { "epoch": 0.8818925649235148, "grad_norm": 1.800362397471492, "learning_rate": 4.539199776403562e-05, "loss": 0.5145, "step": 7437 }, { "epoch": 0.8820111466856397, "grad_norm": 1.253933885820247, "learning_rate": 4.539060905275844e-05, "loss": 0.3825, "step": 7438 }, { "epoch": 0.8821297284477647, "grad_norm": 1.1439189396406135, "learning_rate": 4.538922015350486e-05, "loss": 0.2829, "step": 7439 }, { "epoch": 0.8822483102098897, "grad_norm": 1.4009906300650106, "learning_rate": 4.5387831066287664e-05, "loss": 0.3966, "step": 7440 }, { "epoch": 0.8823668919720147, "grad_norm": 1.4328750243049735, "learning_rate": 4.5386441791119674e-05, "loss": 0.46, "step": 7441 }, { "epoch": 0.8824854737341397, "grad_norm": 1.068162529221504, "learning_rate": 4.538505232801369e-05, "loss": 0.3141, "step": 7442 }, { "epoch": 0.8826040554962646, "grad_norm": 1.7132475595448937, "learning_rate": 4.538366267698252e-05, "loss": 0.5287, "step": 7443 }, { "epoch": 0.8827226372583896, "grad_norm": 1.2951134115714935, "learning_rate": 4.538227283803897e-05, "loss": 0.4536, "step": 7444 }, { "epoch": 0.8828412190205146, "grad_norm": 1.315576384671872, "learning_rate": 4.5380882811195866e-05, "loss": 0.4417, "step": 7445 }, { "epoch": 0.8829598007826396, "grad_norm": 1.2220443778786267, "learning_rate": 4.537949259646601e-05, "loss": 0.4138, "step": 7446 }, { "epoch": 0.8830783825447646, "grad_norm": 1.1616525945594198, "learning_rate": 4.5378102193862224e-05, "loss": 0.3506, "step": 7447 }, { "epoch": 0.8831969643068897, "grad_norm": 1.1348782513685463, "learning_rate": 4.537671160339733e-05, "loss": 0.3406, "step": 7448 }, { "epoch": 0.8833155460690146, "grad_norm": 1.2295321090690572, "learning_rate": 4.537532082508414e-05, "loss": 0.4341, "step": 7449 }, { "epoch": 0.8834341278311396, "grad_norm": 1.0330260534403048, "learning_rate": 4.5373929858935483e-05, "loss": 0.2963, "step": 7450 }, { "epoch": 0.8835527095932646, "grad_norm": 1.225152965075056, "learning_rate": 4.5372538704964174e-05, "loss": 0.3985, "step": 7451 }, { "epoch": 0.8836712913553896, "grad_norm": 1.1716191465770958, "learning_rate": 4.5371147363183034e-05, "loss": 0.3311, "step": 7452 }, { "epoch": 0.8837898731175146, "grad_norm": 1.105796903943377, "learning_rate": 4.53697558336049e-05, "loss": 0.3375, "step": 7453 }, { "epoch": 0.8839084548796395, "grad_norm": 1.2820648972825353, "learning_rate": 4.53683641162426e-05, "loss": 0.3775, "step": 7454 }, { "epoch": 0.8840270366417645, "grad_norm": 1.2631314141152539, "learning_rate": 4.5366972211108953e-05, "loss": 0.4757, "step": 7455 }, { "epoch": 0.8841456184038895, "grad_norm": 1.5617999303736578, "learning_rate": 4.536558011821681e-05, "loss": 0.3675, "step": 7456 }, { "epoch": 0.8842642001660145, "grad_norm": 1.379780220704358, "learning_rate": 4.5364187837578975e-05, "loss": 0.4975, "step": 7457 }, { "epoch": 0.8843827819281395, "grad_norm": 1.983911440215965, "learning_rate": 4.536279536920831e-05, "loss": 0.6987, "step": 7458 }, { "epoch": 0.8845013636902644, "grad_norm": 1.3495770625559849, "learning_rate": 4.536140271311764e-05, "loss": 0.3996, "step": 7459 }, { "epoch": 0.8846199454523894, "grad_norm": 1.4744562866523487, "learning_rate": 4.536000986931981e-05, "loss": 0.4256, "step": 7460 }, { "epoch": 0.8847385272145144, "grad_norm": 0.9318111110010412, "learning_rate": 4.535861683782766e-05, "loss": 0.3048, "step": 7461 }, { "epoch": 0.8848571089766394, "grad_norm": 1.4473418359441452, "learning_rate": 4.535722361865402e-05, "loss": 0.3552, "step": 7462 }, { "epoch": 0.8849756907387644, "grad_norm": 1.5702515037311215, "learning_rate": 4.535583021181174e-05, "loss": 0.5568, "step": 7463 }, { "epoch": 0.8850942725008893, "grad_norm": 1.514145693167217, "learning_rate": 4.5354436617313674e-05, "loss": 0.4409, "step": 7464 }, { "epoch": 0.8852128542630143, "grad_norm": 1.046737380531244, "learning_rate": 4.5353042835172656e-05, "loss": 0.267, "step": 7465 }, { "epoch": 0.8853314360251393, "grad_norm": 1.2783467075664185, "learning_rate": 4.535164886540155e-05, "loss": 0.316, "step": 7466 }, { "epoch": 0.8854500177872643, "grad_norm": 1.5101740311875667, "learning_rate": 4.535025470801319e-05, "loss": 0.5353, "step": 7467 }, { "epoch": 0.8855685995493893, "grad_norm": 1.293522863787606, "learning_rate": 4.534886036302044e-05, "loss": 0.3852, "step": 7468 }, { "epoch": 0.8856871813115142, "grad_norm": 1.4374488666137482, "learning_rate": 4.534746583043615e-05, "loss": 0.4818, "step": 7469 }, { "epoch": 0.8858057630736392, "grad_norm": 1.3772340669162901, "learning_rate": 4.534607111027318e-05, "loss": 0.3932, "step": 7470 }, { "epoch": 0.8859243448357642, "grad_norm": 1.1397633828786091, "learning_rate": 4.5344676202544376e-05, "loss": 0.2866, "step": 7471 }, { "epoch": 0.8860429265978892, "grad_norm": 1.5595051087781746, "learning_rate": 4.534328110726262e-05, "loss": 0.5676, "step": 7472 }, { "epoch": 0.8861615083600143, "grad_norm": 1.2735092668574903, "learning_rate": 4.534188582444075e-05, "loss": 0.3575, "step": 7473 }, { "epoch": 0.8862800901221393, "grad_norm": 1.3024464942242433, "learning_rate": 4.534049035409164e-05, "loss": 0.4319, "step": 7474 }, { "epoch": 0.8863986718842642, "grad_norm": 1.635675784725513, "learning_rate": 4.533909469622815e-05, "loss": 0.458, "step": 7475 }, { "epoch": 0.8865172536463892, "grad_norm": 1.2411939669213774, "learning_rate": 4.533769885086315e-05, "loss": 0.3037, "step": 7476 }, { "epoch": 0.8866358354085142, "grad_norm": 1.161875858115989, "learning_rate": 4.53363028180095e-05, "loss": 0.3842, "step": 7477 }, { "epoch": 0.8867544171706392, "grad_norm": 1.626727765953996, "learning_rate": 4.533490659768008e-05, "loss": 0.6438, "step": 7478 }, { "epoch": 0.8868729989327642, "grad_norm": 1.237703173633034, "learning_rate": 4.533351018988776e-05, "loss": 0.3479, "step": 7479 }, { "epoch": 0.8869915806948891, "grad_norm": 1.4961251329445258, "learning_rate": 4.533211359464541e-05, "loss": 0.352, "step": 7480 }, { "epoch": 0.8871101624570141, "grad_norm": 1.5143411420458186, "learning_rate": 4.5330716811965905e-05, "loss": 0.4711, "step": 7481 }, { "epoch": 0.8872287442191391, "grad_norm": 1.4223487328311617, "learning_rate": 4.532931984186212e-05, "loss": 0.421, "step": 7482 }, { "epoch": 0.8873473259812641, "grad_norm": 1.574922957186531, "learning_rate": 4.532792268434694e-05, "loss": 0.4992, "step": 7483 }, { "epoch": 0.8874659077433891, "grad_norm": 1.8334756847513334, "learning_rate": 4.5326525339433236e-05, "loss": 0.5734, "step": 7484 }, { "epoch": 0.887584489505514, "grad_norm": 1.5073881985684319, "learning_rate": 4.53251278071339e-05, "loss": 0.3589, "step": 7485 }, { "epoch": 0.887703071267639, "grad_norm": 1.2309674932839478, "learning_rate": 4.53237300874618e-05, "loss": 0.4563, "step": 7486 }, { "epoch": 0.887821653029764, "grad_norm": 1.3431965679090963, "learning_rate": 4.532233218042984e-05, "loss": 0.2716, "step": 7487 }, { "epoch": 0.887940234791889, "grad_norm": 1.658740503806538, "learning_rate": 4.53209340860509e-05, "loss": 0.4523, "step": 7488 }, { "epoch": 0.888058816554014, "grad_norm": 1.5982759811054892, "learning_rate": 4.5319535804337866e-05, "loss": 0.5507, "step": 7489 }, { "epoch": 0.888177398316139, "grad_norm": 1.1605884425655701, "learning_rate": 4.531813733530362e-05, "loss": 0.377, "step": 7490 }, { "epoch": 0.8882959800782639, "grad_norm": 1.4024739922754021, "learning_rate": 4.531673867896107e-05, "loss": 0.4278, "step": 7491 }, { "epoch": 0.8884145618403889, "grad_norm": 1.3233299472995177, "learning_rate": 4.53153398353231e-05, "loss": 0.3907, "step": 7492 }, { "epoch": 0.8885331436025139, "grad_norm": 1.3560703556260767, "learning_rate": 4.5313940804402615e-05, "loss": 0.2931, "step": 7493 }, { "epoch": 0.8886517253646389, "grad_norm": 1.1687818495918931, "learning_rate": 4.5312541586212505e-05, "loss": 0.3532, "step": 7494 }, { "epoch": 0.8887703071267639, "grad_norm": 1.116227499762771, "learning_rate": 4.531114218076567e-05, "loss": 0.2988, "step": 7495 }, { "epoch": 0.8888888888888888, "grad_norm": 1.6347523654235325, "learning_rate": 4.530974258807501e-05, "loss": 0.4236, "step": 7496 }, { "epoch": 0.8890074706510138, "grad_norm": 1.6800674514267606, "learning_rate": 4.5308342808153426e-05, "loss": 0.5833, "step": 7497 }, { "epoch": 0.8891260524131389, "grad_norm": 1.1475240099820048, "learning_rate": 4.530694284101383e-05, "loss": 0.291, "step": 7498 }, { "epoch": 0.8892446341752639, "grad_norm": 1.319536894339529, "learning_rate": 4.530554268666912e-05, "loss": 0.3983, "step": 7499 }, { "epoch": 0.8893632159373889, "grad_norm": 1.467827202603896, "learning_rate": 4.530414234513221e-05, "loss": 0.4666, "step": 7500 }, { "epoch": 0.8894817976995139, "grad_norm": 1.2655876163217412, "learning_rate": 4.530274181641601e-05, "loss": 0.2855, "step": 7501 }, { "epoch": 0.8896003794616388, "grad_norm": 1.2835862033966863, "learning_rate": 4.530134110053341e-05, "loss": 0.3567, "step": 7502 }, { "epoch": 0.8897189612237638, "grad_norm": 1.654656670846327, "learning_rate": 4.529994019749735e-05, "loss": 0.5484, "step": 7503 }, { "epoch": 0.8898375429858888, "grad_norm": 1.0611448647848152, "learning_rate": 4.529853910732074e-05, "loss": 0.3054, "step": 7504 }, { "epoch": 0.8899561247480138, "grad_norm": 1.6389657966975257, "learning_rate": 4.529713783001649e-05, "loss": 0.5649, "step": 7505 }, { "epoch": 0.8900747065101388, "grad_norm": 1.271627396448637, "learning_rate": 4.529573636559752e-05, "loss": 0.3167, "step": 7506 }, { "epoch": 0.8901932882722637, "grad_norm": 1.340538347093671, "learning_rate": 4.529433471407674e-05, "loss": 0.345, "step": 7507 }, { "epoch": 0.8903118700343887, "grad_norm": 1.4105896354868457, "learning_rate": 4.5292932875467087e-05, "loss": 0.4468, "step": 7508 }, { "epoch": 0.8904304517965137, "grad_norm": 1.4869243922911697, "learning_rate": 4.5291530849781475e-05, "loss": 0.4622, "step": 7509 }, { "epoch": 0.8905490335586387, "grad_norm": 1.6814625459797656, "learning_rate": 4.529012863703284e-05, "loss": 0.6405, "step": 7510 }, { "epoch": 0.8906676153207637, "grad_norm": 1.2751004639507215, "learning_rate": 4.528872623723409e-05, "loss": 0.4186, "step": 7511 }, { "epoch": 0.8907861970828886, "grad_norm": 1.233910030843671, "learning_rate": 4.5287323650398164e-05, "loss": 0.3098, "step": 7512 }, { "epoch": 0.8909047788450136, "grad_norm": 1.2005533935268675, "learning_rate": 4.5285920876538e-05, "loss": 0.37, "step": 7513 }, { "epoch": 0.8910233606071386, "grad_norm": 1.575228780921675, "learning_rate": 4.528451791566651e-05, "loss": 0.3426, "step": 7514 }, { "epoch": 0.8911419423692636, "grad_norm": 1.023563386327721, "learning_rate": 4.528311476779665e-05, "loss": 0.3365, "step": 7515 }, { "epoch": 0.8912605241313886, "grad_norm": 1.0517907720678317, "learning_rate": 4.528171143294134e-05, "loss": 0.3054, "step": 7516 }, { "epoch": 0.8913791058935135, "grad_norm": 1.1236773799483701, "learning_rate": 4.528030791111353e-05, "loss": 0.3884, "step": 7517 }, { "epoch": 0.8914976876556385, "grad_norm": 1.146417296845729, "learning_rate": 4.527890420232614e-05, "loss": 0.3212, "step": 7518 }, { "epoch": 0.8916162694177635, "grad_norm": 1.6294921046640385, "learning_rate": 4.527750030659212e-05, "loss": 0.5284, "step": 7519 }, { "epoch": 0.8917348511798885, "grad_norm": 1.4007261924705083, "learning_rate": 4.527609622392442e-05, "loss": 0.3842, "step": 7520 }, { "epoch": 0.8918534329420135, "grad_norm": 1.4826411130064627, "learning_rate": 4.527469195433598e-05, "loss": 0.4624, "step": 7521 }, { "epoch": 0.8919720147041384, "grad_norm": 1.453207265717605, "learning_rate": 4.5273287497839735e-05, "loss": 0.4269, "step": 7522 }, { "epoch": 0.8920905964662635, "grad_norm": 1.1749695273630563, "learning_rate": 4.527188285444864e-05, "loss": 0.3403, "step": 7523 }, { "epoch": 0.8922091782283885, "grad_norm": 1.207116221028276, "learning_rate": 4.527047802417564e-05, "loss": 0.2872, "step": 7524 }, { "epoch": 0.8923277599905135, "grad_norm": 1.1727978791380111, "learning_rate": 4.5269073007033703e-05, "loss": 0.3454, "step": 7525 }, { "epoch": 0.8924463417526385, "grad_norm": 1.2077134239271858, "learning_rate": 4.526766780303576e-05, "loss": 0.3354, "step": 7526 }, { "epoch": 0.8925649235147635, "grad_norm": 1.574055736474613, "learning_rate": 4.5266262412194774e-05, "loss": 0.2895, "step": 7527 }, { "epoch": 0.8926835052768884, "grad_norm": 1.2618052450169956, "learning_rate": 4.52648568345237e-05, "loss": 0.3141, "step": 7528 }, { "epoch": 0.8928020870390134, "grad_norm": 1.1666752797445716, "learning_rate": 4.52634510700355e-05, "loss": 0.3105, "step": 7529 }, { "epoch": 0.8929206688011384, "grad_norm": 1.480077403906003, "learning_rate": 4.526204511874313e-05, "loss": 0.4035, "step": 7530 }, { "epoch": 0.8930392505632634, "grad_norm": 1.2362963662189608, "learning_rate": 4.5260638980659546e-05, "loss": 0.3356, "step": 7531 }, { "epoch": 0.8931578323253884, "grad_norm": 1.129740865355936, "learning_rate": 4.5259232655797725e-05, "loss": 0.3093, "step": 7532 }, { "epoch": 0.8932764140875133, "grad_norm": 1.3157891275921105, "learning_rate": 4.5257826144170615e-05, "loss": 0.2934, "step": 7533 }, { "epoch": 0.8933949958496383, "grad_norm": 1.6843390716777602, "learning_rate": 4.5256419445791185e-05, "loss": 0.457, "step": 7534 }, { "epoch": 0.8935135776117633, "grad_norm": 1.1981148226494402, "learning_rate": 4.525501256067242e-05, "loss": 0.3601, "step": 7535 }, { "epoch": 0.8936321593738883, "grad_norm": 1.9627346791174598, "learning_rate": 4.5253605488827264e-05, "loss": 0.5465, "step": 7536 }, { "epoch": 0.8937507411360133, "grad_norm": 1.3178186334718536, "learning_rate": 4.5252198230268713e-05, "loss": 0.3977, "step": 7537 }, { "epoch": 0.8938693228981383, "grad_norm": 1.481868833752932, "learning_rate": 4.525079078500972e-05, "loss": 0.4507, "step": 7538 }, { "epoch": 0.8939879046602632, "grad_norm": 0.9290292170322662, "learning_rate": 4.5249383153063286e-05, "loss": 0.2478, "step": 7539 }, { "epoch": 0.8941064864223882, "grad_norm": 1.413300412541907, "learning_rate": 4.5247975334442356e-05, "loss": 0.3135, "step": 7540 }, { "epoch": 0.8942250681845132, "grad_norm": 1.4785063813656771, "learning_rate": 4.524656732915993e-05, "loss": 0.3627, "step": 7541 }, { "epoch": 0.8943436499466382, "grad_norm": 1.3926038791625226, "learning_rate": 4.524515913722897e-05, "loss": 0.3796, "step": 7542 }, { "epoch": 0.8944622317087632, "grad_norm": 1.5291764411367228, "learning_rate": 4.524375075866248e-05, "loss": 0.4498, "step": 7543 }, { "epoch": 0.8945808134708881, "grad_norm": 1.3630393563145187, "learning_rate": 4.524234219347343e-05, "loss": 0.3841, "step": 7544 }, { "epoch": 0.8946993952330131, "grad_norm": 1.3361139917799805, "learning_rate": 4.5240933441674806e-05, "loss": 0.3533, "step": 7545 }, { "epoch": 0.8948179769951381, "grad_norm": 1.3442169705210165, "learning_rate": 4.523952450327959e-05, "loss": 0.4053, "step": 7546 }, { "epoch": 0.8949365587572631, "grad_norm": 1.367150186718433, "learning_rate": 4.5238115378300795e-05, "loss": 0.3624, "step": 7547 }, { "epoch": 0.8950551405193882, "grad_norm": 1.4583397445951978, "learning_rate": 4.5236706066751377e-05, "loss": 0.5091, "step": 7548 }, { "epoch": 0.8951737222815132, "grad_norm": 1.4740976024399954, "learning_rate": 4.523529656864436e-05, "loss": 0.3879, "step": 7549 }, { "epoch": 0.8952923040436381, "grad_norm": 1.0557585225640393, "learning_rate": 4.523388688399272e-05, "loss": 0.2845, "step": 7550 }, { "epoch": 0.8954108858057631, "grad_norm": 1.2039208162487118, "learning_rate": 4.5232477012809444e-05, "loss": 0.3087, "step": 7551 }, { "epoch": 0.8955294675678881, "grad_norm": 1.1957511329251593, "learning_rate": 4.523106695510755e-05, "loss": 0.3128, "step": 7552 }, { "epoch": 0.8956480493300131, "grad_norm": 1.197687010172547, "learning_rate": 4.522965671090002e-05, "loss": 0.4216, "step": 7553 }, { "epoch": 0.895766631092138, "grad_norm": 1.029743689777649, "learning_rate": 4.522824628019986e-05, "loss": 0.3026, "step": 7554 }, { "epoch": 0.895885212854263, "grad_norm": 1.364639951730422, "learning_rate": 4.522683566302008e-05, "loss": 0.3192, "step": 7555 }, { "epoch": 0.896003794616388, "grad_norm": 1.3058909251919892, "learning_rate": 4.522542485937369e-05, "loss": 0.3117, "step": 7556 }, { "epoch": 0.896122376378513, "grad_norm": 1.598257496011694, "learning_rate": 4.5224013869273676e-05, "loss": 0.4408, "step": 7557 }, { "epoch": 0.896240958140638, "grad_norm": 1.6348324884773038, "learning_rate": 4.522260269273305e-05, "loss": 0.3923, "step": 7558 }, { "epoch": 0.896359539902763, "grad_norm": 1.401434591464561, "learning_rate": 4.5221191329764836e-05, "loss": 0.4256, "step": 7559 }, { "epoch": 0.8964781216648879, "grad_norm": 1.4165761747850558, "learning_rate": 4.521977978038202e-05, "loss": 0.358, "step": 7560 }, { "epoch": 0.8965967034270129, "grad_norm": 1.5190554216741796, "learning_rate": 4.521836804459764e-05, "loss": 0.4619, "step": 7561 }, { "epoch": 0.8967152851891379, "grad_norm": 1.368581521688346, "learning_rate": 4.5216956122424705e-05, "loss": 0.4075, "step": 7562 }, { "epoch": 0.8968338669512629, "grad_norm": 1.1085603365538104, "learning_rate": 4.521554401387621e-05, "loss": 0.3227, "step": 7563 }, { "epoch": 0.8969524487133879, "grad_norm": 1.2513947550875535, "learning_rate": 4.52141317189652e-05, "loss": 0.244, "step": 7564 }, { "epoch": 0.8970710304755128, "grad_norm": 1.6458696727327704, "learning_rate": 4.521271923770468e-05, "loss": 0.4539, "step": 7565 }, { "epoch": 0.8971896122376378, "grad_norm": 1.4217868752516982, "learning_rate": 4.5211306570107683e-05, "loss": 0.4374, "step": 7566 }, { "epoch": 0.8973081939997628, "grad_norm": 1.311860388229482, "learning_rate": 4.520989371618722e-05, "loss": 0.3176, "step": 7567 }, { "epoch": 0.8974267757618878, "grad_norm": 1.4163368181148714, "learning_rate": 4.520848067595632e-05, "loss": 0.5609, "step": 7568 }, { "epoch": 0.8975453575240128, "grad_norm": 1.0460203776654151, "learning_rate": 4.5207067449428004e-05, "loss": 0.2779, "step": 7569 }, { "epoch": 0.8976639392861377, "grad_norm": 2.1344436901762385, "learning_rate": 4.520565403661531e-05, "loss": 0.6181, "step": 7570 }, { "epoch": 0.8977825210482627, "grad_norm": 1.2701913697384388, "learning_rate": 4.520424043753126e-05, "loss": 0.3862, "step": 7571 }, { "epoch": 0.8979011028103877, "grad_norm": 1.4259866855576457, "learning_rate": 4.520282665218889e-05, "loss": 0.3782, "step": 7572 }, { "epoch": 0.8980196845725128, "grad_norm": 1.7200997754684433, "learning_rate": 4.520141268060124e-05, "loss": 0.608, "step": 7573 }, { "epoch": 0.8981382663346378, "grad_norm": 1.1349932982948479, "learning_rate": 4.519999852278133e-05, "loss": 0.3569, "step": 7574 }, { "epoch": 0.8982568480967628, "grad_norm": 1.4156876254977044, "learning_rate": 4.519858417874221e-05, "loss": 0.4542, "step": 7575 }, { "epoch": 0.8983754298588877, "grad_norm": 1.0241545326244827, "learning_rate": 4.5197169648496905e-05, "loss": 0.256, "step": 7576 }, { "epoch": 0.8984940116210127, "grad_norm": 1.2722574831569817, "learning_rate": 4.519575493205847e-05, "loss": 0.4176, "step": 7577 }, { "epoch": 0.8986125933831377, "grad_norm": 1.2569880195476344, "learning_rate": 4.519434002943994e-05, "loss": 0.347, "step": 7578 }, { "epoch": 0.8987311751452627, "grad_norm": 1.3754892970874861, "learning_rate": 4.519292494065436e-05, "loss": 0.4595, "step": 7579 }, { "epoch": 0.8988497569073877, "grad_norm": 1.2149123111509081, "learning_rate": 4.519150966571477e-05, "loss": 0.3042, "step": 7580 }, { "epoch": 0.8989683386695126, "grad_norm": 1.2666562658212768, "learning_rate": 4.5190094204634225e-05, "loss": 0.3809, "step": 7581 }, { "epoch": 0.8990869204316376, "grad_norm": 1.314368838388571, "learning_rate": 4.5188678557425765e-05, "loss": 0.3581, "step": 7582 }, { "epoch": 0.8992055021937626, "grad_norm": 1.123515797024075, "learning_rate": 4.5187262724102455e-05, "loss": 0.2839, "step": 7583 }, { "epoch": 0.8993240839558876, "grad_norm": 1.2574596153794537, "learning_rate": 4.518584670467733e-05, "loss": 0.4264, "step": 7584 }, { "epoch": 0.8994426657180126, "grad_norm": 1.4240014958594773, "learning_rate": 4.518443049916346e-05, "loss": 0.4715, "step": 7585 }, { "epoch": 0.8995612474801375, "grad_norm": 1.3102892472566667, "learning_rate": 4.518301410757388e-05, "loss": 0.3989, "step": 7586 }, { "epoch": 0.8996798292422625, "grad_norm": 1.655841783384617, "learning_rate": 4.518159752992166e-05, "loss": 0.5198, "step": 7587 }, { "epoch": 0.8997984110043875, "grad_norm": 1.5988325287391265, "learning_rate": 4.518018076621987e-05, "loss": 0.4547, "step": 7588 }, { "epoch": 0.8999169927665125, "grad_norm": 1.441588761446772, "learning_rate": 4.5178763816481554e-05, "loss": 0.3601, "step": 7589 }, { "epoch": 0.9000355745286375, "grad_norm": 1.162418093704123, "learning_rate": 4.517734668071979e-05, "loss": 0.3459, "step": 7590 }, { "epoch": 0.9001541562907625, "grad_norm": 1.526947218950448, "learning_rate": 4.517592935894762e-05, "loss": 0.4758, "step": 7591 }, { "epoch": 0.9002727380528874, "grad_norm": 1.4384887434471367, "learning_rate": 4.5174511851178126e-05, "loss": 0.3656, "step": 7592 }, { "epoch": 0.9003913198150124, "grad_norm": 1.4003422015973448, "learning_rate": 4.517309415742437e-05, "loss": 0.3354, "step": 7593 }, { "epoch": 0.9005099015771374, "grad_norm": 1.6139938693952318, "learning_rate": 4.517167627769943e-05, "loss": 0.4804, "step": 7594 }, { "epoch": 0.9006284833392624, "grad_norm": 1.833876686615319, "learning_rate": 4.517025821201637e-05, "loss": 0.5674, "step": 7595 }, { "epoch": 0.9007470651013874, "grad_norm": 1.1505440490053005, "learning_rate": 4.5168839960388257e-05, "loss": 0.2948, "step": 7596 }, { "epoch": 0.9008656468635123, "grad_norm": 1.2744372883229853, "learning_rate": 4.516742152282818e-05, "loss": 0.3201, "step": 7597 }, { "epoch": 0.9009842286256374, "grad_norm": 1.3577729157388, "learning_rate": 4.5166002899349205e-05, "loss": 0.4334, "step": 7598 }, { "epoch": 0.9011028103877624, "grad_norm": 1.1139368066947266, "learning_rate": 4.5164584089964414e-05, "loss": 0.3076, "step": 7599 }, { "epoch": 0.9012213921498874, "grad_norm": 0.9938339029374205, "learning_rate": 4.516316509468688e-05, "loss": 0.2953, "step": 7600 }, { "epoch": 0.9013399739120124, "grad_norm": 1.7276343683208693, "learning_rate": 4.51617459135297e-05, "loss": 0.5405, "step": 7601 }, { "epoch": 0.9014585556741374, "grad_norm": 1.362089511712901, "learning_rate": 4.516032654650593e-05, "loss": 0.3423, "step": 7602 }, { "epoch": 0.9015771374362623, "grad_norm": 1.1769861374575195, "learning_rate": 4.515890699362868e-05, "loss": 0.3706, "step": 7603 }, { "epoch": 0.9016957191983873, "grad_norm": 1.8069746495561436, "learning_rate": 4.515748725491103e-05, "loss": 0.746, "step": 7604 }, { "epoch": 0.9018143009605123, "grad_norm": 1.5217458235243717, "learning_rate": 4.515606733036607e-05, "loss": 0.4631, "step": 7605 }, { "epoch": 0.9019328827226373, "grad_norm": 1.361591502477298, "learning_rate": 4.515464722000687e-05, "loss": 0.3687, "step": 7606 }, { "epoch": 0.9020514644847623, "grad_norm": 1.6113232327837583, "learning_rate": 4.515322692384656e-05, "loss": 0.4565, "step": 7607 }, { "epoch": 0.9021700462468872, "grad_norm": 1.4923675269265537, "learning_rate": 4.51518064418982e-05, "loss": 0.4211, "step": 7608 }, { "epoch": 0.9022886280090122, "grad_norm": 1.371461780432518, "learning_rate": 4.51503857741749e-05, "loss": 0.3482, "step": 7609 }, { "epoch": 0.9024072097711372, "grad_norm": 1.5277033450761366, "learning_rate": 4.514896492068974e-05, "loss": 0.4395, "step": 7610 }, { "epoch": 0.9025257915332622, "grad_norm": 1.6352280182067422, "learning_rate": 4.514754388145584e-05, "loss": 0.4457, "step": 7611 }, { "epoch": 0.9026443732953872, "grad_norm": 1.0569352654481983, "learning_rate": 4.51461226564863e-05, "loss": 0.3433, "step": 7612 }, { "epoch": 0.9027629550575121, "grad_norm": 1.240984361371545, "learning_rate": 4.514470124579421e-05, "loss": 0.3419, "step": 7613 }, { "epoch": 0.9028815368196371, "grad_norm": 1.156852058944384, "learning_rate": 4.5143279649392664e-05, "loss": 0.2841, "step": 7614 }, { "epoch": 0.9030001185817621, "grad_norm": 1.4283757661143701, "learning_rate": 4.51418578672948e-05, "loss": 0.4653, "step": 7615 }, { "epoch": 0.9031187003438871, "grad_norm": 1.2757610090235179, "learning_rate": 4.51404358995137e-05, "loss": 0.3994, "step": 7616 }, { "epoch": 0.9032372821060121, "grad_norm": 1.0841713201786027, "learning_rate": 4.513901374606248e-05, "loss": 0.3764, "step": 7617 }, { "epoch": 0.903355863868137, "grad_norm": 1.9116776328309448, "learning_rate": 4.513759140695424e-05, "loss": 0.5992, "step": 7618 }, { "epoch": 0.903474445630262, "grad_norm": 1.0820298334938925, "learning_rate": 4.513616888220211e-05, "loss": 0.2809, "step": 7619 }, { "epoch": 0.903593027392387, "grad_norm": 1.3262955824543454, "learning_rate": 4.513474617181919e-05, "loss": 0.4512, "step": 7620 }, { "epoch": 0.903711609154512, "grad_norm": 1.7892672199925554, "learning_rate": 4.513332327581861e-05, "loss": 0.583, "step": 7621 }, { "epoch": 0.903830190916637, "grad_norm": 1.6235759935535257, "learning_rate": 4.5131900194213475e-05, "loss": 0.4629, "step": 7622 }, { "epoch": 0.9039487726787621, "grad_norm": 1.5072165077648865, "learning_rate": 4.51304769270169e-05, "loss": 0.4914, "step": 7623 }, { "epoch": 0.904067354440887, "grad_norm": 1.0347153279879222, "learning_rate": 4.5129053474242024e-05, "loss": 0.2785, "step": 7624 }, { "epoch": 0.904185936203012, "grad_norm": 1.5113352185737285, "learning_rate": 4.512762983590195e-05, "loss": 0.3312, "step": 7625 }, { "epoch": 0.904304517965137, "grad_norm": 1.1976578552888544, "learning_rate": 4.5126206012009814e-05, "loss": 0.3467, "step": 7626 }, { "epoch": 0.904423099727262, "grad_norm": 1.0677171228276967, "learning_rate": 4.512478200257874e-05, "loss": 0.2718, "step": 7627 }, { "epoch": 0.904541681489387, "grad_norm": 1.3467072116902425, "learning_rate": 4.512335780762186e-05, "loss": 0.3378, "step": 7628 }, { "epoch": 0.9046602632515119, "grad_norm": 1.4685889967389898, "learning_rate": 4.512193342715229e-05, "loss": 0.3656, "step": 7629 }, { "epoch": 0.9047788450136369, "grad_norm": 1.7047580839614918, "learning_rate": 4.512050886118317e-05, "loss": 0.385, "step": 7630 }, { "epoch": 0.9048974267757619, "grad_norm": 1.1407480669824916, "learning_rate": 4.5119084109727625e-05, "loss": 0.3554, "step": 7631 }, { "epoch": 0.9050160085378869, "grad_norm": 1.2689037438633692, "learning_rate": 4.511765917279881e-05, "loss": 0.3311, "step": 7632 }, { "epoch": 0.9051345903000119, "grad_norm": 1.1727043063497256, "learning_rate": 4.5116234050409844e-05, "loss": 0.3193, "step": 7633 }, { "epoch": 0.9052531720621368, "grad_norm": 1.0472693290287487, "learning_rate": 4.511480874257387e-05, "loss": 0.3126, "step": 7634 }, { "epoch": 0.9053717538242618, "grad_norm": 1.189323386561484, "learning_rate": 4.511338324930402e-05, "loss": 0.2846, "step": 7635 }, { "epoch": 0.9054903355863868, "grad_norm": 1.3210375123031637, "learning_rate": 4.511195757061344e-05, "loss": 0.3441, "step": 7636 }, { "epoch": 0.9056089173485118, "grad_norm": 1.3587720228212135, "learning_rate": 4.5110531706515276e-05, "loss": 0.3931, "step": 7637 }, { "epoch": 0.9057274991106368, "grad_norm": 1.436033234600989, "learning_rate": 4.510910565702267e-05, "loss": 0.4052, "step": 7638 }, { "epoch": 0.9058460808727617, "grad_norm": 0.9418791559367704, "learning_rate": 4.510767942214878e-05, "loss": 0.2322, "step": 7639 }, { "epoch": 0.9059646626348867, "grad_norm": 1.972050158895936, "learning_rate": 4.510625300190673e-05, "loss": 0.5868, "step": 7640 }, { "epoch": 0.9060832443970117, "grad_norm": 1.1638788297177294, "learning_rate": 4.510482639630969e-05, "loss": 0.2705, "step": 7641 }, { "epoch": 0.9062018261591367, "grad_norm": 1.0694999404555772, "learning_rate": 4.510339960537079e-05, "loss": 0.2388, "step": 7642 }, { "epoch": 0.9063204079212617, "grad_norm": 1.615111097332677, "learning_rate": 4.5101972629103214e-05, "loss": 0.3293, "step": 7643 }, { "epoch": 0.9064389896833867, "grad_norm": 1.2812729394065092, "learning_rate": 4.5100545467520095e-05, "loss": 0.3697, "step": 7644 }, { "epoch": 0.9065575714455116, "grad_norm": 1.4810629737856582, "learning_rate": 4.50991181206346e-05, "loss": 0.422, "step": 7645 }, { "epoch": 0.9066761532076366, "grad_norm": 1.380906431966548, "learning_rate": 4.509769058845988e-05, "loss": 0.3633, "step": 7646 }, { "epoch": 0.9067947349697616, "grad_norm": 1.0861355708242368, "learning_rate": 4.5096262871009096e-05, "loss": 0.2496, "step": 7647 }, { "epoch": 0.9069133167318867, "grad_norm": 1.0844665747632678, "learning_rate": 4.509483496829542e-05, "loss": 0.2956, "step": 7648 }, { "epoch": 0.9070318984940117, "grad_norm": 1.304548659951187, "learning_rate": 4.5093406880332e-05, "loss": 0.3878, "step": 7649 }, { "epoch": 0.9071504802561366, "grad_norm": 1.4042248641449977, "learning_rate": 4.5091978607132015e-05, "loss": 0.354, "step": 7650 }, { "epoch": 0.9072690620182616, "grad_norm": 1.2627285659242993, "learning_rate": 4.509055014870862e-05, "loss": 0.3794, "step": 7651 }, { "epoch": 0.9073876437803866, "grad_norm": 1.1345841769064382, "learning_rate": 4.508912150507499e-05, "loss": 0.3156, "step": 7652 }, { "epoch": 0.9075062255425116, "grad_norm": 1.3792118275635625, "learning_rate": 4.5087692676244295e-05, "loss": 0.3413, "step": 7653 }, { "epoch": 0.9076248073046366, "grad_norm": 1.1826370230088494, "learning_rate": 4.508626366222971e-05, "loss": 0.3494, "step": 7654 }, { "epoch": 0.9077433890667616, "grad_norm": 1.018137837716447, "learning_rate": 4.50848344630444e-05, "loss": 0.3236, "step": 7655 }, { "epoch": 0.9078619708288865, "grad_norm": 2.1759517445493075, "learning_rate": 4.5083405078701554e-05, "loss": 0.5963, "step": 7656 }, { "epoch": 0.9079805525910115, "grad_norm": 1.6062260201587681, "learning_rate": 4.508197550921434e-05, "loss": 0.5152, "step": 7657 }, { "epoch": 0.9080991343531365, "grad_norm": 0.8331092582519394, "learning_rate": 4.508054575459593e-05, "loss": 0.2494, "step": 7658 }, { "epoch": 0.9082177161152615, "grad_norm": 1.1139760386927537, "learning_rate": 4.507911581485952e-05, "loss": 0.331, "step": 7659 }, { "epoch": 0.9083362978773865, "grad_norm": 1.4758245264035799, "learning_rate": 4.5077685690018276e-05, "loss": 0.4066, "step": 7660 }, { "epoch": 0.9084548796395114, "grad_norm": 1.4839626983062053, "learning_rate": 4.50762553800854e-05, "loss": 0.4746, "step": 7661 }, { "epoch": 0.9085734614016364, "grad_norm": 1.517629519944083, "learning_rate": 4.507482488507406e-05, "loss": 0.4439, "step": 7662 }, { "epoch": 0.9086920431637614, "grad_norm": 1.2109338771160942, "learning_rate": 4.507339420499746e-05, "loss": 0.4219, "step": 7663 }, { "epoch": 0.9088106249258864, "grad_norm": 1.4511374615506445, "learning_rate": 4.5071963339868775e-05, "loss": 0.4183, "step": 7664 }, { "epoch": 0.9089292066880114, "grad_norm": 1.8907267873200995, "learning_rate": 4.507053228970121e-05, "loss": 0.5963, "step": 7665 }, { "epoch": 0.9090477884501363, "grad_norm": 1.063339428668752, "learning_rate": 4.506910105450795e-05, "loss": 0.2566, "step": 7666 }, { "epoch": 0.9091663702122613, "grad_norm": 1.635825715213325, "learning_rate": 4.506766963430218e-05, "loss": 0.5889, "step": 7667 }, { "epoch": 0.9092849519743863, "grad_norm": 1.3002983081858677, "learning_rate": 4.50662380290971e-05, "loss": 0.2589, "step": 7668 }, { "epoch": 0.9094035337365113, "grad_norm": 1.6255231142940179, "learning_rate": 4.506480623890592e-05, "loss": 0.4548, "step": 7669 }, { "epoch": 0.9095221154986363, "grad_norm": 1.171122896763931, "learning_rate": 4.506337426374183e-05, "loss": 0.2956, "step": 7670 }, { "epoch": 0.9096406972607612, "grad_norm": 1.0574921610417867, "learning_rate": 4.506194210361804e-05, "loss": 0.2886, "step": 7671 }, { "epoch": 0.9097592790228862, "grad_norm": 0.8213130926893109, "learning_rate": 4.506050975854773e-05, "loss": 0.2092, "step": 7672 }, { "epoch": 0.9098778607850113, "grad_norm": 1.1599921557383326, "learning_rate": 4.505907722854413e-05, "loss": 0.3549, "step": 7673 }, { "epoch": 0.9099964425471363, "grad_norm": 1.3208738309051264, "learning_rate": 4.505764451362044e-05, "loss": 0.412, "step": 7674 }, { "epoch": 0.9101150243092613, "grad_norm": 1.511083454075665, "learning_rate": 4.5056211613789856e-05, "loss": 0.5389, "step": 7675 }, { "epoch": 0.9102336060713863, "grad_norm": 1.2081578218188465, "learning_rate": 4.50547785290656e-05, "loss": 0.3078, "step": 7676 }, { "epoch": 0.9103521878335112, "grad_norm": 1.4144430412015212, "learning_rate": 4.505334525946088e-05, "loss": 0.3656, "step": 7677 }, { "epoch": 0.9104707695956362, "grad_norm": 1.1437440407991664, "learning_rate": 4.50519118049889e-05, "loss": 0.3226, "step": 7678 }, { "epoch": 0.9105893513577612, "grad_norm": 1.1660838375465763, "learning_rate": 4.505047816566289e-05, "loss": 0.3526, "step": 7679 }, { "epoch": 0.9107079331198862, "grad_norm": 1.477119765731285, "learning_rate": 4.504904434149606e-05, "loss": 0.3818, "step": 7680 }, { "epoch": 0.9108265148820112, "grad_norm": 1.2036628431667507, "learning_rate": 4.5047610332501624e-05, "loss": 0.2814, "step": 7681 }, { "epoch": 0.9109450966441361, "grad_norm": 1.7013263901607723, "learning_rate": 4.50461761386928e-05, "loss": 0.4727, "step": 7682 }, { "epoch": 0.9110636784062611, "grad_norm": 1.675130444889837, "learning_rate": 4.504474176008282e-05, "loss": 0.5428, "step": 7683 }, { "epoch": 0.9111822601683861, "grad_norm": 1.4137985124790995, "learning_rate": 4.5043307196684895e-05, "loss": 0.4122, "step": 7684 }, { "epoch": 0.9113008419305111, "grad_norm": 1.273262266576916, "learning_rate": 4.504187244851226e-05, "loss": 0.3498, "step": 7685 }, { "epoch": 0.9114194236926361, "grad_norm": 1.5569199956430209, "learning_rate": 4.504043751557814e-05, "loss": 0.4072, "step": 7686 }, { "epoch": 0.911538005454761, "grad_norm": 1.4150380293196185, "learning_rate": 4.503900239789576e-05, "loss": 0.4747, "step": 7687 }, { "epoch": 0.911656587216886, "grad_norm": 1.38414872306566, "learning_rate": 4.503756709547835e-05, "loss": 0.3839, "step": 7688 }, { "epoch": 0.911775168979011, "grad_norm": 1.3807314393300076, "learning_rate": 4.503613160833915e-05, "loss": 0.3979, "step": 7689 }, { "epoch": 0.911893750741136, "grad_norm": 1.2061788482990152, "learning_rate": 4.503469593649138e-05, "loss": 0.3431, "step": 7690 }, { "epoch": 0.912012332503261, "grad_norm": 0.9712296714000724, "learning_rate": 4.503326007994828e-05, "loss": 0.309, "step": 7691 }, { "epoch": 0.912130914265386, "grad_norm": 1.3255974120177378, "learning_rate": 4.503182403872309e-05, "loss": 0.2816, "step": 7692 }, { "epoch": 0.9122494960275109, "grad_norm": 1.7071403192633634, "learning_rate": 4.503038781282905e-05, "loss": 0.4749, "step": 7693 }, { "epoch": 0.9123680777896359, "grad_norm": 1.3729855806599593, "learning_rate": 4.50289514022794e-05, "loss": 0.3956, "step": 7694 }, { "epoch": 0.9124866595517609, "grad_norm": 1.2462613834121212, "learning_rate": 4.502751480708737e-05, "loss": 0.3871, "step": 7695 }, { "epoch": 0.9126052413138859, "grad_norm": 1.138251638426353, "learning_rate": 4.502607802726621e-05, "loss": 0.3142, "step": 7696 }, { "epoch": 0.912723823076011, "grad_norm": 1.2921820241406097, "learning_rate": 4.5024641062829185e-05, "loss": 0.3981, "step": 7697 }, { "epoch": 0.912842404838136, "grad_norm": 0.9615263250883, "learning_rate": 4.502320391378951e-05, "loss": 0.2454, "step": 7698 }, { "epoch": 0.9129609866002609, "grad_norm": 1.4042691672533647, "learning_rate": 4.502176658016046e-05, "loss": 0.5004, "step": 7699 }, { "epoch": 0.9130795683623859, "grad_norm": 1.1200615216348662, "learning_rate": 4.502032906195527e-05, "loss": 0.337, "step": 7700 }, { "epoch": 0.9131981501245109, "grad_norm": 1.4519937717845874, "learning_rate": 4.501889135918719e-05, "loss": 0.5086, "step": 7701 }, { "epoch": 0.9133167318866359, "grad_norm": 1.6130620612462245, "learning_rate": 4.5017453471869484e-05, "loss": 0.4938, "step": 7702 }, { "epoch": 0.9134353136487608, "grad_norm": 1.1965081450360677, "learning_rate": 4.501601540001541e-05, "loss": 0.3261, "step": 7703 }, { "epoch": 0.9135538954108858, "grad_norm": 1.2206727168967226, "learning_rate": 4.501457714363821e-05, "loss": 0.3007, "step": 7704 }, { "epoch": 0.9136724771730108, "grad_norm": 1.6039047338414572, "learning_rate": 4.5013138702751166e-05, "loss": 0.4393, "step": 7705 }, { "epoch": 0.9137910589351358, "grad_norm": 2.2491317851873167, "learning_rate": 4.5011700077367516e-05, "loss": 0.7463, "step": 7706 }, { "epoch": 0.9139096406972608, "grad_norm": 1.1874133827961217, "learning_rate": 4.501026126750053e-05, "loss": 0.3606, "step": 7707 }, { "epoch": 0.9140282224593858, "grad_norm": 0.9512269570210055, "learning_rate": 4.500882227316348e-05, "loss": 0.2632, "step": 7708 }, { "epoch": 0.9141468042215107, "grad_norm": 0.8199160742025952, "learning_rate": 4.500738309436962e-05, "loss": 0.2457, "step": 7709 }, { "epoch": 0.9142653859836357, "grad_norm": 1.2683709105002199, "learning_rate": 4.500594373113222e-05, "loss": 0.3402, "step": 7710 }, { "epoch": 0.9143839677457607, "grad_norm": 1.4816787440459755, "learning_rate": 4.5004504183464555e-05, "loss": 0.496, "step": 7711 }, { "epoch": 0.9145025495078857, "grad_norm": 1.634744056547108, "learning_rate": 4.50030644513799e-05, "loss": 0.4356, "step": 7712 }, { "epoch": 0.9146211312700107, "grad_norm": 1.174486586674016, "learning_rate": 4.5001624534891506e-05, "loss": 0.3455, "step": 7713 }, { "epoch": 0.9147397130321356, "grad_norm": 1.213793091609422, "learning_rate": 4.500018443401267e-05, "loss": 0.3747, "step": 7714 }, { "epoch": 0.9148582947942606, "grad_norm": 1.1225562729783105, "learning_rate": 4.499874414875666e-05, "loss": 0.3452, "step": 7715 }, { "epoch": 0.9149768765563856, "grad_norm": 1.8597111459046882, "learning_rate": 4.4997303679136745e-05, "loss": 0.4706, "step": 7716 }, { "epoch": 0.9150954583185106, "grad_norm": 1.985115860260531, "learning_rate": 4.4995863025166216e-05, "loss": 0.6212, "step": 7717 }, { "epoch": 0.9152140400806356, "grad_norm": 1.2896826761593936, "learning_rate": 4.499442218685835e-05, "loss": 0.348, "step": 7718 }, { "epoch": 0.9153326218427605, "grad_norm": 1.3813634717277044, "learning_rate": 4.4992981164226437e-05, "loss": 0.3686, "step": 7719 }, { "epoch": 0.9154512036048855, "grad_norm": 1.4682222762545816, "learning_rate": 4.499153995728374e-05, "loss": 0.4991, "step": 7720 }, { "epoch": 0.9155697853670105, "grad_norm": 1.0376009256948489, "learning_rate": 4.499009856604358e-05, "loss": 0.2197, "step": 7721 }, { "epoch": 0.9156883671291356, "grad_norm": 1.3851420555517167, "learning_rate": 4.4988656990519215e-05, "loss": 0.506, "step": 7722 }, { "epoch": 0.9158069488912606, "grad_norm": 1.1444727163929822, "learning_rate": 4.4987215230723944e-05, "loss": 0.3404, "step": 7723 }, { "epoch": 0.9159255306533856, "grad_norm": 1.3359278138288624, "learning_rate": 4.4985773286671055e-05, "loss": 0.4172, "step": 7724 }, { "epoch": 0.9160441124155105, "grad_norm": 1.274091100791459, "learning_rate": 4.498433115837385e-05, "loss": 0.2933, "step": 7725 }, { "epoch": 0.9161626941776355, "grad_norm": 1.223343759689751, "learning_rate": 4.498288884584562e-05, "loss": 0.2683, "step": 7726 }, { "epoch": 0.9162812759397605, "grad_norm": 1.106464979742115, "learning_rate": 4.498144634909965e-05, "loss": 0.3483, "step": 7727 }, { "epoch": 0.9163998577018855, "grad_norm": 1.4926558516131125, "learning_rate": 4.498000366814925e-05, "loss": 0.3605, "step": 7728 }, { "epoch": 0.9165184394640105, "grad_norm": 1.2760492343248497, "learning_rate": 4.4978560803007726e-05, "loss": 0.3403, "step": 7729 }, { "epoch": 0.9166370212261354, "grad_norm": 1.6229488094191824, "learning_rate": 4.4977117753688365e-05, "loss": 0.5266, "step": 7730 }, { "epoch": 0.9167556029882604, "grad_norm": 1.4070076177336808, "learning_rate": 4.497567452020447e-05, "loss": 0.4731, "step": 7731 }, { "epoch": 0.9168741847503854, "grad_norm": 1.199092379035364, "learning_rate": 4.497423110256936e-05, "loss": 0.3266, "step": 7732 }, { "epoch": 0.9169927665125104, "grad_norm": 1.7048799958656824, "learning_rate": 4.497278750079633e-05, "loss": 0.5039, "step": 7733 }, { "epoch": 0.9171113482746354, "grad_norm": 1.6579895101685898, "learning_rate": 4.497134371489869e-05, "loss": 0.509, "step": 7734 }, { "epoch": 0.9172299300367603, "grad_norm": 1.4785513169855502, "learning_rate": 4.4969899744889754e-05, "loss": 0.3045, "step": 7735 }, { "epoch": 0.9173485117988853, "grad_norm": 1.2631043574427585, "learning_rate": 4.496845559078283e-05, "loss": 0.3695, "step": 7736 }, { "epoch": 0.9174670935610103, "grad_norm": 0.9002564889910122, "learning_rate": 4.496701125259124e-05, "loss": 0.2759, "step": 7737 }, { "epoch": 0.9175856753231353, "grad_norm": 1.1540190694457242, "learning_rate": 4.4965566730328276e-05, "loss": 0.3049, "step": 7738 }, { "epoch": 0.9177042570852603, "grad_norm": 1.7374797453709618, "learning_rate": 4.4964122024007284e-05, "loss": 0.5661, "step": 7739 }, { "epoch": 0.9178228388473852, "grad_norm": 1.8064114537865115, "learning_rate": 4.4962677133641565e-05, "loss": 0.3956, "step": 7740 }, { "epoch": 0.9179414206095102, "grad_norm": 1.2339673062852203, "learning_rate": 4.496123205924444e-05, "loss": 0.3407, "step": 7741 }, { "epoch": 0.9180600023716352, "grad_norm": 1.5520125529019726, "learning_rate": 4.495978680082923e-05, "loss": 0.4136, "step": 7742 }, { "epoch": 0.9181785841337602, "grad_norm": 1.342748952675133, "learning_rate": 4.495834135840927e-05, "loss": 0.3739, "step": 7743 }, { "epoch": 0.9182971658958852, "grad_norm": 1.1636095490783083, "learning_rate": 4.495689573199787e-05, "loss": 0.2398, "step": 7744 }, { "epoch": 0.9184157476580102, "grad_norm": 0.9907607123120113, "learning_rate": 4.495544992160837e-05, "loss": 0.2759, "step": 7745 }, { "epoch": 0.9185343294201351, "grad_norm": 1.1290160577879085, "learning_rate": 4.495400392725409e-05, "loss": 0.4066, "step": 7746 }, { "epoch": 0.9186529111822602, "grad_norm": 1.453740632832844, "learning_rate": 4.4952557748948365e-05, "loss": 0.4522, "step": 7747 }, { "epoch": 0.9187714929443852, "grad_norm": 1.2145791747992798, "learning_rate": 4.4951111386704524e-05, "loss": 0.2882, "step": 7748 }, { "epoch": 0.9188900747065102, "grad_norm": 1.0987434552313782, "learning_rate": 4.49496648405359e-05, "loss": 0.3841, "step": 7749 }, { "epoch": 0.9190086564686352, "grad_norm": 1.3533597149787853, "learning_rate": 4.4948218110455834e-05, "loss": 0.3534, "step": 7750 }, { "epoch": 0.9191272382307601, "grad_norm": 1.3079569691993735, "learning_rate": 4.4946771196477656e-05, "loss": 0.413, "step": 7751 }, { "epoch": 0.9192458199928851, "grad_norm": 1.0710746039232166, "learning_rate": 4.4945324098614703e-05, "loss": 0.3431, "step": 7752 }, { "epoch": 0.9193644017550101, "grad_norm": 1.5123226067311801, "learning_rate": 4.4943876816880325e-05, "loss": 0.4487, "step": 7753 }, { "epoch": 0.9194829835171351, "grad_norm": 1.3054719213535602, "learning_rate": 4.4942429351287865e-05, "loss": 0.422, "step": 7754 }, { "epoch": 0.9196015652792601, "grad_norm": 1.2754969400118263, "learning_rate": 4.494098170185066e-05, "loss": 0.4157, "step": 7755 }, { "epoch": 0.919720147041385, "grad_norm": 1.0353761916412845, "learning_rate": 4.493953386858205e-05, "loss": 0.2645, "step": 7756 }, { "epoch": 0.91983872880351, "grad_norm": 0.960425663683585, "learning_rate": 4.4938085851495396e-05, "loss": 0.2899, "step": 7757 }, { "epoch": 0.919957310565635, "grad_norm": 1.4405704187411301, "learning_rate": 4.493663765060403e-05, "loss": 0.374, "step": 7758 }, { "epoch": 0.92007589232776, "grad_norm": 1.406107698127145, "learning_rate": 4.493518926592132e-05, "loss": 0.3383, "step": 7759 }, { "epoch": 0.920194474089885, "grad_norm": 1.392004196012602, "learning_rate": 4.493374069746061e-05, "loss": 0.261, "step": 7760 }, { "epoch": 0.92031305585201, "grad_norm": 1.9732687724175109, "learning_rate": 4.493229194523526e-05, "loss": 0.5543, "step": 7761 }, { "epoch": 0.9204316376141349, "grad_norm": 1.3085620373539597, "learning_rate": 4.493084300925862e-05, "loss": 0.3644, "step": 7762 }, { "epoch": 0.9205502193762599, "grad_norm": 1.0632606161570382, "learning_rate": 4.492939388954404e-05, "loss": 0.2715, "step": 7763 }, { "epoch": 0.9206688011383849, "grad_norm": 1.5046128090348205, "learning_rate": 4.492794458610489e-05, "loss": 0.4582, "step": 7764 }, { "epoch": 0.9207873829005099, "grad_norm": 1.3508783636181265, "learning_rate": 4.492649509895453e-05, "loss": 0.4085, "step": 7765 }, { "epoch": 0.9209059646626349, "grad_norm": 1.2293576191152629, "learning_rate": 4.4925045428106326e-05, "loss": 0.2648, "step": 7766 }, { "epoch": 0.9210245464247598, "grad_norm": 1.1868116106358735, "learning_rate": 4.492359557357363e-05, "loss": 0.3057, "step": 7767 }, { "epoch": 0.9211431281868848, "grad_norm": 1.2450503505631003, "learning_rate": 4.492214553536982e-05, "loss": 0.3368, "step": 7768 }, { "epoch": 0.9212617099490098, "grad_norm": 1.1714029604154623, "learning_rate": 4.492069531350826e-05, "loss": 0.2999, "step": 7769 }, { "epoch": 0.9213802917111348, "grad_norm": 1.1053039104032298, "learning_rate": 4.4919244908002306e-05, "loss": 0.2773, "step": 7770 }, { "epoch": 0.9214988734732598, "grad_norm": 1.7467015601541371, "learning_rate": 4.4917794318865345e-05, "loss": 0.4541, "step": 7771 }, { "epoch": 0.9216174552353849, "grad_norm": 1.8997667684381532, "learning_rate": 4.491634354611075e-05, "loss": 0.4688, "step": 7772 }, { "epoch": 0.9217360369975098, "grad_norm": 1.326029053658454, "learning_rate": 4.491489258975189e-05, "loss": 0.3658, "step": 7773 }, { "epoch": 0.9218546187596348, "grad_norm": 1.234995333356106, "learning_rate": 4.491344144980214e-05, "loss": 0.3367, "step": 7774 }, { "epoch": 0.9219732005217598, "grad_norm": 1.28478611468339, "learning_rate": 4.491199012627487e-05, "loss": 0.3281, "step": 7775 }, { "epoch": 0.9220917822838848, "grad_norm": 1.0645342712730936, "learning_rate": 4.4910538619183484e-05, "loss": 0.3272, "step": 7776 }, { "epoch": 0.9222103640460098, "grad_norm": 1.694435002766982, "learning_rate": 4.490908692854134e-05, "loss": 0.4679, "step": 7777 }, { "epoch": 0.9223289458081347, "grad_norm": 1.1724174861342698, "learning_rate": 4.490763505436183e-05, "loss": 0.3552, "step": 7778 }, { "epoch": 0.9224475275702597, "grad_norm": 1.1921281212186352, "learning_rate": 4.490618299665834e-05, "loss": 0.3254, "step": 7779 }, { "epoch": 0.9225661093323847, "grad_norm": 1.849401987964552, "learning_rate": 4.490473075544425e-05, "loss": 0.4285, "step": 7780 }, { "epoch": 0.9226846910945097, "grad_norm": 1.6810878097266828, "learning_rate": 4.490327833073295e-05, "loss": 0.5888, "step": 7781 }, { "epoch": 0.9228032728566347, "grad_norm": 1.0557502911062124, "learning_rate": 4.490182572253783e-05, "loss": 0.2591, "step": 7782 }, { "epoch": 0.9229218546187596, "grad_norm": 1.4082389944819793, "learning_rate": 4.4900372930872293e-05, "loss": 0.441, "step": 7783 }, { "epoch": 0.9230404363808846, "grad_norm": 1.6583354988312733, "learning_rate": 4.489891995574971e-05, "loss": 0.4747, "step": 7784 }, { "epoch": 0.9231590181430096, "grad_norm": 1.3582323683861814, "learning_rate": 4.489746679718349e-05, "loss": 0.476, "step": 7785 }, { "epoch": 0.9232775999051346, "grad_norm": 1.1417796657090502, "learning_rate": 4.489601345518702e-05, "loss": 0.2925, "step": 7786 }, { "epoch": 0.9233961816672596, "grad_norm": 1.114183049129594, "learning_rate": 4.489455992977371e-05, "loss": 0.2992, "step": 7787 }, { "epoch": 0.9235147634293845, "grad_norm": 1.1692663367404215, "learning_rate": 4.4893106220956956e-05, "loss": 0.3529, "step": 7788 }, { "epoch": 0.9236333451915095, "grad_norm": 0.9311203608842018, "learning_rate": 4.4891652328750154e-05, "loss": 0.3438, "step": 7789 }, { "epoch": 0.9237519269536345, "grad_norm": 1.3408234057398656, "learning_rate": 4.489019825316671e-05, "loss": 0.4041, "step": 7790 }, { "epoch": 0.9238705087157595, "grad_norm": 1.1566618021141706, "learning_rate": 4.4888743994220026e-05, "loss": 0.3666, "step": 7791 }, { "epoch": 0.9239890904778845, "grad_norm": 1.0540250896765162, "learning_rate": 4.4887289551923514e-05, "loss": 0.3165, "step": 7792 }, { "epoch": 0.9241076722400094, "grad_norm": 1.0502523512011264, "learning_rate": 4.488583492629058e-05, "loss": 0.2931, "step": 7793 }, { "epoch": 0.9242262540021344, "grad_norm": 1.7305521282442995, "learning_rate": 4.488438011733464e-05, "loss": 0.5915, "step": 7794 }, { "epoch": 0.9243448357642594, "grad_norm": 1.4355646696957487, "learning_rate": 4.4882925125069096e-05, "loss": 0.381, "step": 7795 }, { "epoch": 0.9244634175263844, "grad_norm": 1.2119279890716508, "learning_rate": 4.488146994950736e-05, "loss": 0.3972, "step": 7796 }, { "epoch": 0.9245819992885095, "grad_norm": 1.3533266868043552, "learning_rate": 4.4880014590662856e-05, "loss": 0.3728, "step": 7797 }, { "epoch": 0.9247005810506345, "grad_norm": 1.0461933564120343, "learning_rate": 4.487855904854899e-05, "loss": 0.3, "step": 7798 }, { "epoch": 0.9248191628127594, "grad_norm": 1.4354944586498377, "learning_rate": 4.487710332317919e-05, "loss": 0.4518, "step": 7799 }, { "epoch": 0.9249377445748844, "grad_norm": 1.1135224619163588, "learning_rate": 4.4875647414566876e-05, "loss": 0.3069, "step": 7800 }, { "epoch": 0.9250563263370094, "grad_norm": 1.34825534621954, "learning_rate": 4.4874191322725455e-05, "loss": 0.3885, "step": 7801 }, { "epoch": 0.9251749080991344, "grad_norm": 1.103581954364754, "learning_rate": 4.487273504766837e-05, "loss": 0.2995, "step": 7802 }, { "epoch": 0.9252934898612594, "grad_norm": 1.049265812119674, "learning_rate": 4.487127858940904e-05, "loss": 0.2386, "step": 7803 }, { "epoch": 0.9254120716233843, "grad_norm": 1.2216943670211076, "learning_rate": 4.486982194796088e-05, "loss": 0.3258, "step": 7804 }, { "epoch": 0.9255306533855093, "grad_norm": 1.1602022379160677, "learning_rate": 4.486836512333734e-05, "loss": 0.3356, "step": 7805 }, { "epoch": 0.9256492351476343, "grad_norm": 1.3074472591938728, "learning_rate": 4.4866908115551826e-05, "loss": 0.4347, "step": 7806 }, { "epoch": 0.9257678169097593, "grad_norm": 2.23380788788739, "learning_rate": 4.4865450924617786e-05, "loss": 0.6296, "step": 7807 }, { "epoch": 0.9258863986718843, "grad_norm": 1.2642368374191435, "learning_rate": 4.486399355054865e-05, "loss": 0.3576, "step": 7808 }, { "epoch": 0.9260049804340093, "grad_norm": 1.327129019083146, "learning_rate": 4.486253599335785e-05, "loss": 0.4236, "step": 7809 }, { "epoch": 0.9261235621961342, "grad_norm": 1.2427406330294402, "learning_rate": 4.486107825305883e-05, "loss": 0.4971, "step": 7810 }, { "epoch": 0.9262421439582592, "grad_norm": 1.0740218556655832, "learning_rate": 4.485962032966502e-05, "loss": 0.3215, "step": 7811 }, { "epoch": 0.9263607257203842, "grad_norm": 1.0993673509794941, "learning_rate": 4.485816222318986e-05, "loss": 0.2751, "step": 7812 }, { "epoch": 0.9264793074825092, "grad_norm": 1.4535560799380876, "learning_rate": 4.4856703933646805e-05, "loss": 0.4098, "step": 7813 }, { "epoch": 0.9265978892446342, "grad_norm": 1.2492611872200596, "learning_rate": 4.485524546104928e-05, "loss": 0.3422, "step": 7814 }, { "epoch": 0.9267164710067591, "grad_norm": 1.2266483888374327, "learning_rate": 4.485378680541075e-05, "loss": 0.3532, "step": 7815 }, { "epoch": 0.9268350527688841, "grad_norm": 1.5210786781429928, "learning_rate": 4.485232796674465e-05, "loss": 0.5057, "step": 7816 }, { "epoch": 0.9269536345310091, "grad_norm": 1.2822330896168308, "learning_rate": 4.485086894506442e-05, "loss": 0.4074, "step": 7817 }, { "epoch": 0.9270722162931341, "grad_norm": 1.4353409075424353, "learning_rate": 4.484940974038353e-05, "loss": 0.3878, "step": 7818 }, { "epoch": 0.9271907980552591, "grad_norm": 1.247079212779066, "learning_rate": 4.484795035271542e-05, "loss": 0.349, "step": 7819 }, { "epoch": 0.927309379817384, "grad_norm": 1.393291279251636, "learning_rate": 4.484649078207355e-05, "loss": 0.3462, "step": 7820 }, { "epoch": 0.927427961579509, "grad_norm": 1.7772361292463945, "learning_rate": 4.484503102847137e-05, "loss": 0.413, "step": 7821 }, { "epoch": 0.9275465433416341, "grad_norm": 0.9408083827503743, "learning_rate": 4.4843571091922346e-05, "loss": 0.2288, "step": 7822 }, { "epoch": 0.9276651251037591, "grad_norm": 1.4865304903677392, "learning_rate": 4.484211097243992e-05, "loss": 0.3711, "step": 7823 }, { "epoch": 0.9277837068658841, "grad_norm": 1.3373965518004143, "learning_rate": 4.484065067003757e-05, "loss": 0.4139, "step": 7824 }, { "epoch": 0.927902288628009, "grad_norm": 1.2838123002941628, "learning_rate": 4.483919018472875e-05, "loss": 0.3333, "step": 7825 }, { "epoch": 0.928020870390134, "grad_norm": 1.1496044077867684, "learning_rate": 4.4837729516526924e-05, "loss": 0.316, "step": 7826 }, { "epoch": 0.928139452152259, "grad_norm": 1.1830967789911926, "learning_rate": 4.483626866544556e-05, "loss": 0.2934, "step": 7827 }, { "epoch": 0.928258033914384, "grad_norm": 1.1933893354549958, "learning_rate": 4.483480763149812e-05, "loss": 0.3351, "step": 7828 }, { "epoch": 0.928376615676509, "grad_norm": 1.2476646950412273, "learning_rate": 4.483334641469808e-05, "loss": 0.2912, "step": 7829 }, { "epoch": 0.928495197438634, "grad_norm": 1.5287684976455276, "learning_rate": 4.48318850150589e-05, "loss": 0.4633, "step": 7830 }, { "epoch": 0.9286137792007589, "grad_norm": 1.5873219347335292, "learning_rate": 4.483042343259407e-05, "loss": 0.5446, "step": 7831 }, { "epoch": 0.9287323609628839, "grad_norm": 1.2483968199882163, "learning_rate": 4.4828961667317046e-05, "loss": 0.2844, "step": 7832 }, { "epoch": 0.9288509427250089, "grad_norm": 1.457496674730435, "learning_rate": 4.482749971924132e-05, "loss": 0.3786, "step": 7833 }, { "epoch": 0.9289695244871339, "grad_norm": 1.1203515871820942, "learning_rate": 4.482603758838035e-05, "loss": 0.3402, "step": 7834 }, { "epoch": 0.9290881062492589, "grad_norm": 1.2665410600997544, "learning_rate": 4.4824575274747635e-05, "loss": 0.3176, "step": 7835 }, { "epoch": 0.9292066880113838, "grad_norm": 1.595380170842477, "learning_rate": 4.482311277835664e-05, "loss": 0.4811, "step": 7836 }, { "epoch": 0.9293252697735088, "grad_norm": 2.1240734028816495, "learning_rate": 4.482165009922086e-05, "loss": 0.5238, "step": 7837 }, { "epoch": 0.9294438515356338, "grad_norm": 0.9491026579214498, "learning_rate": 4.482018723735377e-05, "loss": 0.253, "step": 7838 }, { "epoch": 0.9295624332977588, "grad_norm": 1.5822020288784522, "learning_rate": 4.481872419276886e-05, "loss": 0.5196, "step": 7839 }, { "epoch": 0.9296810150598838, "grad_norm": 1.5967015821742594, "learning_rate": 4.481726096547961e-05, "loss": 0.4201, "step": 7840 }, { "epoch": 0.9297995968220087, "grad_norm": 1.1176860166406442, "learning_rate": 4.481579755549952e-05, "loss": 0.3291, "step": 7841 }, { "epoch": 0.9299181785841337, "grad_norm": 1.4521404203464887, "learning_rate": 4.481433396284207e-05, "loss": 0.4124, "step": 7842 }, { "epoch": 0.9300367603462587, "grad_norm": 0.9294814839923479, "learning_rate": 4.4812870187520766e-05, "loss": 0.3229, "step": 7843 }, { "epoch": 0.9301553421083837, "grad_norm": 1.1213255975334284, "learning_rate": 4.48114062295491e-05, "loss": 0.3144, "step": 7844 }, { "epoch": 0.9302739238705087, "grad_norm": 1.1760118526969023, "learning_rate": 4.480994208894055e-05, "loss": 0.3546, "step": 7845 }, { "epoch": 0.9303925056326336, "grad_norm": 1.4061415612488872, "learning_rate": 4.4808477765708636e-05, "loss": 0.4154, "step": 7846 }, { "epoch": 0.9305110873947587, "grad_norm": 1.2376305672650991, "learning_rate": 4.480701325986685e-05, "loss": 0.3207, "step": 7847 }, { "epoch": 0.9306296691568837, "grad_norm": 0.7375877856855838, "learning_rate": 4.480554857142868e-05, "loss": 0.1926, "step": 7848 }, { "epoch": 0.9307482509190087, "grad_norm": 1.1229883476650957, "learning_rate": 4.480408370040765e-05, "loss": 0.2563, "step": 7849 }, { "epoch": 0.9308668326811337, "grad_norm": 1.854630983384136, "learning_rate": 4.480261864681724e-05, "loss": 0.5666, "step": 7850 }, { "epoch": 0.9309854144432587, "grad_norm": 1.7259865506022278, "learning_rate": 4.480115341067098e-05, "loss": 0.4155, "step": 7851 }, { "epoch": 0.9311039962053836, "grad_norm": 1.6291433492084548, "learning_rate": 4.479968799198237e-05, "loss": 0.5189, "step": 7852 }, { "epoch": 0.9312225779675086, "grad_norm": 1.1049110082900269, "learning_rate": 4.479822239076491e-05, "loss": 0.2703, "step": 7853 }, { "epoch": 0.9313411597296336, "grad_norm": 1.1478362112696097, "learning_rate": 4.479675660703212e-05, "loss": 0.3749, "step": 7854 }, { "epoch": 0.9314597414917586, "grad_norm": 1.0511792951050525, "learning_rate": 4.479529064079751e-05, "loss": 0.2883, "step": 7855 }, { "epoch": 0.9315783232538836, "grad_norm": 1.3709253421738306, "learning_rate": 4.47938244920746e-05, "loss": 0.3887, "step": 7856 }, { "epoch": 0.9316969050160085, "grad_norm": 1.3121005183673742, "learning_rate": 4.479235816087689e-05, "loss": 0.4166, "step": 7857 }, { "epoch": 0.9318154867781335, "grad_norm": 1.2588755475209268, "learning_rate": 4.479089164721792e-05, "loss": 0.3557, "step": 7858 }, { "epoch": 0.9319340685402585, "grad_norm": 1.1317530987234352, "learning_rate": 4.478942495111119e-05, "loss": 0.232, "step": 7859 }, { "epoch": 0.9320526503023835, "grad_norm": 1.0478994811404854, "learning_rate": 4.4787958072570236e-05, "loss": 0.2672, "step": 7860 }, { "epoch": 0.9321712320645085, "grad_norm": 1.6150924653668606, "learning_rate": 4.4786491011608564e-05, "loss": 0.4797, "step": 7861 }, { "epoch": 0.9322898138266335, "grad_norm": 1.7604349412057645, "learning_rate": 4.478502376823972e-05, "loss": 0.4635, "step": 7862 }, { "epoch": 0.9324083955887584, "grad_norm": 1.563629897448531, "learning_rate": 4.4783556342477215e-05, "loss": 0.4081, "step": 7863 }, { "epoch": 0.9325269773508834, "grad_norm": 1.3254193678936212, "learning_rate": 4.478208873433458e-05, "loss": 0.2975, "step": 7864 }, { "epoch": 0.9326455591130084, "grad_norm": 1.581870455184607, "learning_rate": 4.4780620943825344e-05, "loss": 0.4012, "step": 7865 }, { "epoch": 0.9327641408751334, "grad_norm": 1.2284418806157484, "learning_rate": 4.4779152970963044e-05, "loss": 0.3011, "step": 7866 }, { "epoch": 0.9328827226372584, "grad_norm": 1.9716541877979046, "learning_rate": 4.477768481576121e-05, "loss": 0.4165, "step": 7867 }, { "epoch": 0.9330013043993833, "grad_norm": 1.2946986859962948, "learning_rate": 4.477621647823337e-05, "loss": 0.378, "step": 7868 }, { "epoch": 0.9331198861615083, "grad_norm": 0.9908641081653118, "learning_rate": 4.477474795839306e-05, "loss": 0.3587, "step": 7869 }, { "epoch": 0.9332384679236333, "grad_norm": 0.9760000239221137, "learning_rate": 4.477327925625383e-05, "loss": 0.2375, "step": 7870 }, { "epoch": 0.9333570496857583, "grad_norm": 1.3885695656913952, "learning_rate": 4.4771810371829206e-05, "loss": 0.4132, "step": 7871 }, { "epoch": 0.9334756314478834, "grad_norm": 1.4924264398595317, "learning_rate": 4.477034130513274e-05, "loss": 0.4154, "step": 7872 }, { "epoch": 0.9335942132100084, "grad_norm": 1.8066033486954594, "learning_rate": 4.476887205617798e-05, "loss": 0.4199, "step": 7873 }, { "epoch": 0.9337127949721333, "grad_norm": 1.4149581234162891, "learning_rate": 4.4767402624978456e-05, "loss": 0.45, "step": 7874 }, { "epoch": 0.9338313767342583, "grad_norm": 1.2024014224278727, "learning_rate": 4.476593301154772e-05, "loss": 0.3622, "step": 7875 }, { "epoch": 0.9339499584963833, "grad_norm": 1.2812416206331476, "learning_rate": 4.476446321589931e-05, "loss": 0.3227, "step": 7876 }, { "epoch": 0.9340685402585083, "grad_norm": 1.0629513956954166, "learning_rate": 4.4762993238046805e-05, "loss": 0.2742, "step": 7877 }, { "epoch": 0.9341871220206333, "grad_norm": 2.3772212337473686, "learning_rate": 4.476152307800372e-05, "loss": 0.71, "step": 7878 }, { "epoch": 0.9343057037827582, "grad_norm": 1.399959406051211, "learning_rate": 4.4760052735783634e-05, "loss": 0.4355, "step": 7879 }, { "epoch": 0.9344242855448832, "grad_norm": 1.3084336705994624, "learning_rate": 4.47585822114001e-05, "loss": 0.3662, "step": 7880 }, { "epoch": 0.9345428673070082, "grad_norm": 1.2793369175301326, "learning_rate": 4.475711150486666e-05, "loss": 0.3712, "step": 7881 }, { "epoch": 0.9346614490691332, "grad_norm": 1.0757156580989016, "learning_rate": 4.475564061619688e-05, "loss": 0.3256, "step": 7882 }, { "epoch": 0.9347800308312582, "grad_norm": 1.0934839206249856, "learning_rate": 4.475416954540431e-05, "loss": 0.3677, "step": 7883 }, { "epoch": 0.9348986125933831, "grad_norm": 1.0348354180289654, "learning_rate": 4.475269829250254e-05, "loss": 0.312, "step": 7884 }, { "epoch": 0.9350171943555081, "grad_norm": 1.793066961184684, "learning_rate": 4.47512268575051e-05, "loss": 0.4712, "step": 7885 }, { "epoch": 0.9351357761176331, "grad_norm": 1.2234240240236505, "learning_rate": 4.474975524042557e-05, "loss": 0.4183, "step": 7886 }, { "epoch": 0.9352543578797581, "grad_norm": 1.2305756453587218, "learning_rate": 4.4748283441277514e-05, "loss": 0.3182, "step": 7887 }, { "epoch": 0.9353729396418831, "grad_norm": 1.2386729558645155, "learning_rate": 4.474681146007451e-05, "loss": 0.3458, "step": 7888 }, { "epoch": 0.935491521404008, "grad_norm": 1.2798982321391188, "learning_rate": 4.474533929683011e-05, "loss": 0.3886, "step": 7889 }, { "epoch": 0.935610103166133, "grad_norm": 1.1533686314110427, "learning_rate": 4.474386695155791e-05, "loss": 0.3434, "step": 7890 }, { "epoch": 0.935728684928258, "grad_norm": 1.1045975223822042, "learning_rate": 4.4742394424271455e-05, "loss": 0.2739, "step": 7891 }, { "epoch": 0.935847266690383, "grad_norm": 1.3697305999648342, "learning_rate": 4.474092171498434e-05, "loss": 0.3759, "step": 7892 }, { "epoch": 0.935965848452508, "grad_norm": 1.220260455499924, "learning_rate": 4.473944882371013e-05, "loss": 0.3749, "step": 7893 }, { "epoch": 0.936084430214633, "grad_norm": 1.3036442036027642, "learning_rate": 4.4737975750462405e-05, "loss": 0.3402, "step": 7894 }, { "epoch": 0.9362030119767579, "grad_norm": 1.1568325114301774, "learning_rate": 4.473650249525476e-05, "loss": 0.3627, "step": 7895 }, { "epoch": 0.9363215937388829, "grad_norm": 1.5322585575929735, "learning_rate": 4.473502905810075e-05, "loss": 0.3776, "step": 7896 }, { "epoch": 0.936440175501008, "grad_norm": 1.4225899707476533, "learning_rate": 4.473355543901398e-05, "loss": 0.4446, "step": 7897 }, { "epoch": 0.936558757263133, "grad_norm": 1.3442218016470409, "learning_rate": 4.4732081638008024e-05, "loss": 0.3422, "step": 7898 }, { "epoch": 0.936677339025258, "grad_norm": 1.574901528723671, "learning_rate": 4.473060765509648e-05, "loss": 0.4623, "step": 7899 }, { "epoch": 0.9367959207873829, "grad_norm": 1.7024167657787856, "learning_rate": 4.472913349029292e-05, "loss": 0.5328, "step": 7900 }, { "epoch": 0.9369145025495079, "grad_norm": 1.271409654843525, "learning_rate": 4.472765914361095e-05, "loss": 0.3721, "step": 7901 }, { "epoch": 0.9370330843116329, "grad_norm": 1.8097583526972605, "learning_rate": 4.4726184615064154e-05, "loss": 0.3364, "step": 7902 }, { "epoch": 0.9371516660737579, "grad_norm": 0.9557662243946908, "learning_rate": 4.4724709904666126e-05, "loss": 0.2922, "step": 7903 }, { "epoch": 0.9372702478358829, "grad_norm": 1.224507933366907, "learning_rate": 4.4723235012430454e-05, "loss": 0.307, "step": 7904 }, { "epoch": 0.9373888295980078, "grad_norm": 1.3849379511128523, "learning_rate": 4.472175993837074e-05, "loss": 0.4773, "step": 7905 }, { "epoch": 0.9375074113601328, "grad_norm": 1.4278112443010327, "learning_rate": 4.472028468250059e-05, "loss": 0.4522, "step": 7906 }, { "epoch": 0.9376259931222578, "grad_norm": 1.3976732154105562, "learning_rate": 4.471880924483361e-05, "loss": 0.3006, "step": 7907 }, { "epoch": 0.9377445748843828, "grad_norm": 0.9630806697750414, "learning_rate": 4.4717333625383375e-05, "loss": 0.2114, "step": 7908 }, { "epoch": 0.9378631566465078, "grad_norm": 1.196954884523647, "learning_rate": 4.47158578241635e-05, "loss": 0.3476, "step": 7909 }, { "epoch": 0.9379817384086327, "grad_norm": 1.3880275673979963, "learning_rate": 4.4714381841187595e-05, "loss": 0.454, "step": 7910 }, { "epoch": 0.9381003201707577, "grad_norm": 1.5369109457235928, "learning_rate": 4.471290567646927e-05, "loss": 0.4362, "step": 7911 }, { "epoch": 0.9382189019328827, "grad_norm": 1.146488087851261, "learning_rate": 4.471142933002213e-05, "loss": 0.3229, "step": 7912 }, { "epoch": 0.9383374836950077, "grad_norm": 1.6331038547310854, "learning_rate": 4.470995280185977e-05, "loss": 0.3831, "step": 7913 }, { "epoch": 0.9384560654571327, "grad_norm": 1.2847545929734496, "learning_rate": 4.470847609199583e-05, "loss": 0.2991, "step": 7914 }, { "epoch": 0.9385746472192577, "grad_norm": 1.4174673583184492, "learning_rate": 4.47069992004439e-05, "loss": 0.3823, "step": 7915 }, { "epoch": 0.9386932289813826, "grad_norm": 1.4382369954495928, "learning_rate": 4.47055221272176e-05, "loss": 0.3721, "step": 7916 }, { "epoch": 0.9388118107435076, "grad_norm": 0.9368905265203433, "learning_rate": 4.470404487233057e-05, "loss": 0.2866, "step": 7917 }, { "epoch": 0.9389303925056326, "grad_norm": 1.4405861502475428, "learning_rate": 4.470256743579638e-05, "loss": 0.395, "step": 7918 }, { "epoch": 0.9390489742677576, "grad_norm": 1.3578860487971665, "learning_rate": 4.47010898176287e-05, "loss": 0.4248, "step": 7919 }, { "epoch": 0.9391675560298826, "grad_norm": 1.358097664003968, "learning_rate": 4.4699612017841124e-05, "loss": 0.2689, "step": 7920 }, { "epoch": 0.9392861377920075, "grad_norm": 1.3707335460118961, "learning_rate": 4.4698134036447283e-05, "loss": 0.414, "step": 7921 }, { "epoch": 0.9394047195541326, "grad_norm": 1.344652650490059, "learning_rate": 4.469665587346081e-05, "loss": 0.3809, "step": 7922 }, { "epoch": 0.9395233013162576, "grad_norm": 1.350668547688967, "learning_rate": 4.4695177528895316e-05, "loss": 0.2968, "step": 7923 }, { "epoch": 0.9396418830783826, "grad_norm": 1.7592958012027944, "learning_rate": 4.469369900276443e-05, "loss": 0.5406, "step": 7924 }, { "epoch": 0.9397604648405076, "grad_norm": 1.098127321887825, "learning_rate": 4.4692220295081796e-05, "loss": 0.2966, "step": 7925 }, { "epoch": 0.9398790466026326, "grad_norm": 1.2430880166752971, "learning_rate": 4.469074140586105e-05, "loss": 0.3225, "step": 7926 }, { "epoch": 0.9399976283647575, "grad_norm": 1.550801812963555, "learning_rate": 4.46892623351158e-05, "loss": 0.4522, "step": 7927 }, { "epoch": 0.9401162101268825, "grad_norm": 1.7003616251711866, "learning_rate": 4.46877830828597e-05, "loss": 0.5074, "step": 7928 }, { "epoch": 0.9402347918890075, "grad_norm": 1.7703051802855216, "learning_rate": 4.468630364910638e-05, "loss": 0.4702, "step": 7929 }, { "epoch": 0.9403533736511325, "grad_norm": 1.0100421450087218, "learning_rate": 4.468482403386949e-05, "loss": 0.2823, "step": 7930 }, { "epoch": 0.9404719554132575, "grad_norm": 1.4845330597300348, "learning_rate": 4.468334423716265e-05, "loss": 0.469, "step": 7931 }, { "epoch": 0.9405905371753824, "grad_norm": 1.2598366819098834, "learning_rate": 4.468186425899952e-05, "loss": 0.2735, "step": 7932 }, { "epoch": 0.9407091189375074, "grad_norm": 1.2323541666291946, "learning_rate": 4.4680384099393736e-05, "loss": 0.3348, "step": 7933 }, { "epoch": 0.9408277006996324, "grad_norm": 1.30826069212377, "learning_rate": 4.467890375835894e-05, "loss": 0.3411, "step": 7934 }, { "epoch": 0.9409462824617574, "grad_norm": 1.4817824306521818, "learning_rate": 4.467742323590879e-05, "loss": 0.4869, "step": 7935 }, { "epoch": 0.9410648642238824, "grad_norm": 1.4365390922754468, "learning_rate": 4.467594253205693e-05, "loss": 0.4806, "step": 7936 }, { "epoch": 0.9411834459860073, "grad_norm": 1.5521876920960798, "learning_rate": 4.4674461646817e-05, "loss": 0.5316, "step": 7937 }, { "epoch": 0.9413020277481323, "grad_norm": 1.2196857701445063, "learning_rate": 4.4672980580202664e-05, "loss": 0.3046, "step": 7938 }, { "epoch": 0.9414206095102573, "grad_norm": 1.306708166824195, "learning_rate": 4.467149933222757e-05, "loss": 0.4167, "step": 7939 }, { "epoch": 0.9415391912723823, "grad_norm": 0.9658113327414094, "learning_rate": 4.467001790290538e-05, "loss": 0.2461, "step": 7940 }, { "epoch": 0.9416577730345073, "grad_norm": 1.4775720569357749, "learning_rate": 4.466853629224974e-05, "loss": 0.2694, "step": 7941 }, { "epoch": 0.9417763547966322, "grad_norm": 1.2558645890650024, "learning_rate": 4.4667054500274313e-05, "loss": 0.3378, "step": 7942 }, { "epoch": 0.9418949365587572, "grad_norm": 1.29225538000051, "learning_rate": 4.466557252699277e-05, "loss": 0.3043, "step": 7943 }, { "epoch": 0.9420135183208822, "grad_norm": 1.782053385644077, "learning_rate": 4.4664090372418755e-05, "loss": 0.5266, "step": 7944 }, { "epoch": 0.9421321000830072, "grad_norm": 1.5351069697400619, "learning_rate": 4.466260803656595e-05, "loss": 0.416, "step": 7945 }, { "epoch": 0.9422506818451322, "grad_norm": 1.098101520337271, "learning_rate": 4.4661125519447997e-05, "loss": 0.3123, "step": 7946 }, { "epoch": 0.9423692636072573, "grad_norm": 1.634881170880433, "learning_rate": 4.465964282107859e-05, "loss": 0.5117, "step": 7947 }, { "epoch": 0.9424878453693822, "grad_norm": 1.3250041333750595, "learning_rate": 4.465815994147138e-05, "loss": 0.3313, "step": 7948 }, { "epoch": 0.9426064271315072, "grad_norm": 1.1521457703142357, "learning_rate": 4.465667688064003e-05, "loss": 0.3062, "step": 7949 }, { "epoch": 0.9427250088936322, "grad_norm": 1.3427906734972042, "learning_rate": 4.465519363859824e-05, "loss": 0.3321, "step": 7950 }, { "epoch": 0.9428435906557572, "grad_norm": 1.5998830895568372, "learning_rate": 4.4653710215359655e-05, "loss": 0.4044, "step": 7951 }, { "epoch": 0.9429621724178822, "grad_norm": 1.2654796668555095, "learning_rate": 4.4652226610937974e-05, "loss": 0.4068, "step": 7952 }, { "epoch": 0.9430807541800071, "grad_norm": 1.42276818249597, "learning_rate": 4.465074282534686e-05, "loss": 0.4116, "step": 7953 }, { "epoch": 0.9431993359421321, "grad_norm": 1.181018237444917, "learning_rate": 4.464925885859999e-05, "loss": 0.2817, "step": 7954 }, { "epoch": 0.9433179177042571, "grad_norm": 2.146627859528423, "learning_rate": 4.464777471071106e-05, "loss": 0.6179, "step": 7955 }, { "epoch": 0.9434364994663821, "grad_norm": 1.4502575502158863, "learning_rate": 4.464629038169372e-05, "loss": 0.4363, "step": 7956 }, { "epoch": 0.9435550812285071, "grad_norm": 1.6312090706986382, "learning_rate": 4.464480587156169e-05, "loss": 0.4231, "step": 7957 }, { "epoch": 0.943673662990632, "grad_norm": 1.2278445287221056, "learning_rate": 4.464332118032864e-05, "loss": 0.4086, "step": 7958 }, { "epoch": 0.943792244752757, "grad_norm": 1.7196454044303109, "learning_rate": 4.464183630800825e-05, "loss": 0.4648, "step": 7959 }, { "epoch": 0.943910826514882, "grad_norm": 1.219768617827713, "learning_rate": 4.464035125461422e-05, "loss": 0.3525, "step": 7960 }, { "epoch": 0.944029408277007, "grad_norm": 1.626080831652033, "learning_rate": 4.463886602016024e-05, "loss": 0.5388, "step": 7961 }, { "epoch": 0.944147990039132, "grad_norm": 1.0452816723928051, "learning_rate": 4.463738060466e-05, "loss": 0.2611, "step": 7962 }, { "epoch": 0.944266571801257, "grad_norm": 1.1541678277579022, "learning_rate": 4.4635895008127176e-05, "loss": 0.3874, "step": 7963 }, { "epoch": 0.9443851535633819, "grad_norm": 1.2299553803873313, "learning_rate": 4.463440923057549e-05, "loss": 0.3195, "step": 7964 }, { "epoch": 0.9445037353255069, "grad_norm": 1.3624632472750375, "learning_rate": 4.463292327201862e-05, "loss": 0.3454, "step": 7965 }, { "epoch": 0.9446223170876319, "grad_norm": 1.3286463576522978, "learning_rate": 4.463143713247029e-05, "loss": 0.4108, "step": 7966 }, { "epoch": 0.9447408988497569, "grad_norm": 1.314053233597529, "learning_rate": 4.462995081194417e-05, "loss": 0.4709, "step": 7967 }, { "epoch": 0.9448594806118819, "grad_norm": 1.0905643205542164, "learning_rate": 4.462846431045398e-05, "loss": 0.2367, "step": 7968 }, { "epoch": 0.9449780623740068, "grad_norm": 1.501224340697668, "learning_rate": 4.462697762801341e-05, "loss": 0.3225, "step": 7969 }, { "epoch": 0.9450966441361318, "grad_norm": 1.2710424475121913, "learning_rate": 4.4625490764636185e-05, "loss": 0.3987, "step": 7970 }, { "epoch": 0.9452152258982568, "grad_norm": 1.2347031385639617, "learning_rate": 4.4624003720335995e-05, "loss": 0.3689, "step": 7971 }, { "epoch": 0.9453338076603819, "grad_norm": 1.3388408952108617, "learning_rate": 4.462251649512656e-05, "loss": 0.3252, "step": 7972 }, { "epoch": 0.9454523894225069, "grad_norm": 1.2119969118017877, "learning_rate": 4.462102908902159e-05, "loss": 0.3643, "step": 7973 }, { "epoch": 0.9455709711846318, "grad_norm": 1.3811258090387222, "learning_rate": 4.4619541502034775e-05, "loss": 0.3506, "step": 7974 }, { "epoch": 0.9456895529467568, "grad_norm": 1.5207276715785374, "learning_rate": 4.4618053734179865e-05, "loss": 0.426, "step": 7975 }, { "epoch": 0.9458081347088818, "grad_norm": 1.4089576816611045, "learning_rate": 4.461656578547055e-05, "loss": 0.3792, "step": 7976 }, { "epoch": 0.9459267164710068, "grad_norm": 1.1704743736236165, "learning_rate": 4.461507765592056e-05, "loss": 0.2951, "step": 7977 }, { "epoch": 0.9460452982331318, "grad_norm": 1.1165227329779759, "learning_rate": 4.46135893455436e-05, "loss": 0.3139, "step": 7978 }, { "epoch": 0.9461638799952568, "grad_norm": 1.0189928153408196, "learning_rate": 4.46121008543534e-05, "loss": 0.2775, "step": 7979 }, { "epoch": 0.9462824617573817, "grad_norm": 1.1659942035214148, "learning_rate": 4.4610612182363675e-05, "loss": 0.3271, "step": 7980 }, { "epoch": 0.9464010435195067, "grad_norm": 1.3539645741259374, "learning_rate": 4.460912332958816e-05, "loss": 0.4138, "step": 7981 }, { "epoch": 0.9465196252816317, "grad_norm": 1.2452454810924032, "learning_rate": 4.460763429604057e-05, "loss": 0.3684, "step": 7982 }, { "epoch": 0.9466382070437567, "grad_norm": 1.19627613211601, "learning_rate": 4.4606145081734644e-05, "loss": 0.3264, "step": 7983 }, { "epoch": 0.9467567888058817, "grad_norm": 1.6222523164440736, "learning_rate": 4.4604655686684095e-05, "loss": 0.5196, "step": 7984 }, { "epoch": 0.9468753705680066, "grad_norm": 1.8338982544227187, "learning_rate": 4.460316611090266e-05, "loss": 0.4368, "step": 7985 }, { "epoch": 0.9469939523301316, "grad_norm": 1.235964315610443, "learning_rate": 4.460167635440408e-05, "loss": 0.3448, "step": 7986 }, { "epoch": 0.9471125340922566, "grad_norm": 1.1664407967539072, "learning_rate": 4.4600186417202075e-05, "loss": 0.2947, "step": 7987 }, { "epoch": 0.9472311158543816, "grad_norm": 1.2098949845788092, "learning_rate": 4.4598696299310394e-05, "loss": 0.3396, "step": 7988 }, { "epoch": 0.9473496976165066, "grad_norm": 1.0223607314559817, "learning_rate": 4.4597206000742763e-05, "loss": 0.2021, "step": 7989 }, { "epoch": 0.9474682793786315, "grad_norm": 1.2822612013548682, "learning_rate": 4.459571552151292e-05, "loss": 0.3637, "step": 7990 }, { "epoch": 0.9475868611407565, "grad_norm": 1.0786323328616567, "learning_rate": 4.4594224861634616e-05, "loss": 0.2747, "step": 7991 }, { "epoch": 0.9477054429028815, "grad_norm": 1.4711053511842929, "learning_rate": 4.4592734021121584e-05, "loss": 0.3975, "step": 7992 }, { "epoch": 0.9478240246650065, "grad_norm": 1.4899548444957251, "learning_rate": 4.459124299998757e-05, "loss": 0.4031, "step": 7993 }, { "epoch": 0.9479426064271315, "grad_norm": 1.251338062631199, "learning_rate": 4.458975179824632e-05, "loss": 0.4014, "step": 7994 }, { "epoch": 0.9480611881892564, "grad_norm": 1.2872533830052764, "learning_rate": 4.458826041591158e-05, "loss": 0.251, "step": 7995 }, { "epoch": 0.9481797699513814, "grad_norm": 1.195781829172686, "learning_rate": 4.458676885299711e-05, "loss": 0.2668, "step": 7996 }, { "epoch": 0.9482983517135065, "grad_norm": 1.2430773268634088, "learning_rate": 4.458527710951663e-05, "loss": 0.318, "step": 7997 }, { "epoch": 0.9484169334756315, "grad_norm": 1.637819629720816, "learning_rate": 4.458378518548393e-05, "loss": 0.5257, "step": 7998 }, { "epoch": 0.9485355152377565, "grad_norm": 1.1604200159891602, "learning_rate": 4.458229308091274e-05, "loss": 0.2454, "step": 7999 }, { "epoch": 0.9486540969998815, "grad_norm": 1.720486806738014, "learning_rate": 4.4580800795816814e-05, "loss": 0.4358, "step": 8000 }, { "epoch": 0.9487726787620064, "grad_norm": 2.274460163138203, "learning_rate": 4.457930833020992e-05, "loss": 0.6192, "step": 8001 }, { "epoch": 0.9488912605241314, "grad_norm": 1.2245046785730396, "learning_rate": 4.4577815684105814e-05, "loss": 0.2759, "step": 8002 }, { "epoch": 0.9490098422862564, "grad_norm": 1.3312692981864966, "learning_rate": 4.457632285751826e-05, "loss": 0.3275, "step": 8003 }, { "epoch": 0.9491284240483814, "grad_norm": 1.3202877879297386, "learning_rate": 4.4574829850461017e-05, "loss": 0.4959, "step": 8004 }, { "epoch": 0.9492470058105064, "grad_norm": 1.5399677169971753, "learning_rate": 4.457333666294784e-05, "loss": 0.4554, "step": 8005 }, { "epoch": 0.9493655875726313, "grad_norm": 1.7444500829184237, "learning_rate": 4.45718432949925e-05, "loss": 0.5667, "step": 8006 }, { "epoch": 0.9494841693347563, "grad_norm": 1.6085755538928557, "learning_rate": 4.457034974660877e-05, "loss": 0.5003, "step": 8007 }, { "epoch": 0.9496027510968813, "grad_norm": 1.459773056167545, "learning_rate": 4.456885601781041e-05, "loss": 0.4111, "step": 8008 }, { "epoch": 0.9497213328590063, "grad_norm": 1.0352725219581451, "learning_rate": 4.45673621086112e-05, "loss": 0.3308, "step": 8009 }, { "epoch": 0.9498399146211313, "grad_norm": 1.1345589257293933, "learning_rate": 4.45658680190249e-05, "loss": 0.2684, "step": 8010 }, { "epoch": 0.9499584963832562, "grad_norm": 1.4336598903907307, "learning_rate": 4.45643737490653e-05, "loss": 0.4025, "step": 8011 }, { "epoch": 0.9500770781453812, "grad_norm": 0.8289305175252019, "learning_rate": 4.4562879298746165e-05, "loss": 0.2475, "step": 8012 }, { "epoch": 0.9501956599075062, "grad_norm": 1.0358434516792998, "learning_rate": 4.4561384668081265e-05, "loss": 0.2642, "step": 8013 }, { "epoch": 0.9503142416696312, "grad_norm": 1.0190059513705128, "learning_rate": 4.455988985708438e-05, "loss": 0.2709, "step": 8014 }, { "epoch": 0.9504328234317562, "grad_norm": 1.264418083727074, "learning_rate": 4.45583948657693e-05, "loss": 0.4059, "step": 8015 }, { "epoch": 0.9505514051938811, "grad_norm": 1.571236078747199, "learning_rate": 4.455689969414982e-05, "loss": 0.4957, "step": 8016 }, { "epoch": 0.9506699869560061, "grad_norm": 1.3738764560336314, "learning_rate": 4.455540434223969e-05, "loss": 0.4545, "step": 8017 }, { "epoch": 0.9507885687181311, "grad_norm": 0.9581658720322292, "learning_rate": 4.455390881005272e-05, "loss": 0.2882, "step": 8018 }, { "epoch": 0.9509071504802561, "grad_norm": 1.062481471360761, "learning_rate": 4.4552413097602684e-05, "loss": 0.3373, "step": 8019 }, { "epoch": 0.9510257322423811, "grad_norm": 1.6575485901743823, "learning_rate": 4.455091720490338e-05, "loss": 0.5094, "step": 8020 }, { "epoch": 0.951144314004506, "grad_norm": 1.186919668441879, "learning_rate": 4.4549421131968595e-05, "loss": 0.308, "step": 8021 }, { "epoch": 0.9512628957666311, "grad_norm": 1.447026596795619, "learning_rate": 4.454792487881212e-05, "loss": 0.3831, "step": 8022 }, { "epoch": 0.9513814775287561, "grad_norm": 1.6912972492309262, "learning_rate": 4.454642844544774e-05, "loss": 0.535, "step": 8023 }, { "epoch": 0.9515000592908811, "grad_norm": 1.2166015554651435, "learning_rate": 4.454493183188927e-05, "loss": 0.3196, "step": 8024 }, { "epoch": 0.9516186410530061, "grad_norm": 1.3215703094254, "learning_rate": 4.454343503815049e-05, "loss": 0.3335, "step": 8025 }, { "epoch": 0.9517372228151311, "grad_norm": 1.5619485374557895, "learning_rate": 4.454193806424521e-05, "loss": 0.4424, "step": 8026 }, { "epoch": 0.951855804577256, "grad_norm": 1.3594956119314252, "learning_rate": 4.454044091018722e-05, "loss": 0.3284, "step": 8027 }, { "epoch": 0.951974386339381, "grad_norm": 1.1236233546296095, "learning_rate": 4.453894357599033e-05, "loss": 0.2971, "step": 8028 }, { "epoch": 0.952092968101506, "grad_norm": 1.0960494765237532, "learning_rate": 4.453744606166834e-05, "loss": 0.2601, "step": 8029 }, { "epoch": 0.952211549863631, "grad_norm": 1.3952580742142333, "learning_rate": 4.453594836723505e-05, "loss": 0.3329, "step": 8030 }, { "epoch": 0.952330131625756, "grad_norm": 1.222815159519293, "learning_rate": 4.453445049270428e-05, "loss": 0.3059, "step": 8031 }, { "epoch": 0.952448713387881, "grad_norm": 1.362012167741883, "learning_rate": 4.453295243808983e-05, "loss": 0.2748, "step": 8032 }, { "epoch": 0.9525672951500059, "grad_norm": 1.2150285777074779, "learning_rate": 4.453145420340551e-05, "loss": 0.2689, "step": 8033 }, { "epoch": 0.9526858769121309, "grad_norm": 1.2855838883865331, "learning_rate": 4.452995578866513e-05, "loss": 0.27, "step": 8034 }, { "epoch": 0.9528044586742559, "grad_norm": 1.207240116834465, "learning_rate": 4.452845719388251e-05, "loss": 0.348, "step": 8035 }, { "epoch": 0.9529230404363809, "grad_norm": 1.976037561523375, "learning_rate": 4.452695841907146e-05, "loss": 0.4221, "step": 8036 }, { "epoch": 0.9530416221985059, "grad_norm": 1.0882559818571111, "learning_rate": 4.452545946424581e-05, "loss": 0.2842, "step": 8037 }, { "epoch": 0.9531602039606308, "grad_norm": 1.232931717864638, "learning_rate": 4.452396032941935e-05, "loss": 0.2973, "step": 8038 }, { "epoch": 0.9532787857227558, "grad_norm": 1.1271075117335136, "learning_rate": 4.452246101460593e-05, "loss": 0.2876, "step": 8039 }, { "epoch": 0.9533973674848808, "grad_norm": 1.5444116758723774, "learning_rate": 4.452096151981936e-05, "loss": 0.3832, "step": 8040 }, { "epoch": 0.9535159492470058, "grad_norm": 0.9664276374759817, "learning_rate": 4.4519461845073455e-05, "loss": 0.1938, "step": 8041 }, { "epoch": 0.9536345310091308, "grad_norm": 1.3147803835707417, "learning_rate": 4.451796199038205e-05, "loss": 0.4125, "step": 8042 }, { "epoch": 0.9537531127712557, "grad_norm": 1.3761153329754388, "learning_rate": 4.451646195575898e-05, "loss": 0.3475, "step": 8043 }, { "epoch": 0.9538716945333807, "grad_norm": 1.2878342541408505, "learning_rate": 4.451496174121805e-05, "loss": 0.3174, "step": 8044 }, { "epoch": 0.9539902762955057, "grad_norm": 0.8866667723474136, "learning_rate": 4.451346134677311e-05, "loss": 0.2203, "step": 8045 }, { "epoch": 0.9541088580576307, "grad_norm": 1.629654219561143, "learning_rate": 4.451196077243798e-05, "loss": 0.4417, "step": 8046 }, { "epoch": 0.9542274398197558, "grad_norm": 1.4519373801476008, "learning_rate": 4.4510460018226507e-05, "loss": 0.4393, "step": 8047 }, { "epoch": 0.9543460215818808, "grad_norm": 1.6852260665064158, "learning_rate": 4.450895908415251e-05, "loss": 0.3636, "step": 8048 }, { "epoch": 0.9544646033440057, "grad_norm": 1.2445821204015943, "learning_rate": 4.450745797022984e-05, "loss": 0.3764, "step": 8049 }, { "epoch": 0.9545831851061307, "grad_norm": 1.5855886874560812, "learning_rate": 4.450595667647233e-05, "loss": 0.4117, "step": 8050 }, { "epoch": 0.9547017668682557, "grad_norm": 1.2995239847937021, "learning_rate": 4.450445520289381e-05, "loss": 0.375, "step": 8051 }, { "epoch": 0.9548203486303807, "grad_norm": 1.3887038252275121, "learning_rate": 4.450295354950814e-05, "loss": 0.3991, "step": 8052 }, { "epoch": 0.9549389303925057, "grad_norm": 1.2866676184614656, "learning_rate": 4.450145171632915e-05, "loss": 0.3988, "step": 8053 }, { "epoch": 0.9550575121546306, "grad_norm": 1.0383869810289579, "learning_rate": 4.449994970337069e-05, "loss": 0.298, "step": 8054 }, { "epoch": 0.9551760939167556, "grad_norm": 1.1664270313904719, "learning_rate": 4.4498447510646615e-05, "loss": 0.2781, "step": 8055 }, { "epoch": 0.9552946756788806, "grad_norm": 1.3295822960354065, "learning_rate": 4.449694513817075e-05, "loss": 0.2839, "step": 8056 }, { "epoch": 0.9554132574410056, "grad_norm": 1.0728539216328117, "learning_rate": 4.449544258595697e-05, "loss": 0.2943, "step": 8057 }, { "epoch": 0.9555318392031306, "grad_norm": 3.8834231896939686, "learning_rate": 4.449393985401911e-05, "loss": 0.2671, "step": 8058 }, { "epoch": 0.9556504209652555, "grad_norm": 1.592659370679597, "learning_rate": 4.449243694237103e-05, "loss": 0.4821, "step": 8059 }, { "epoch": 0.9557690027273805, "grad_norm": 1.4868973791137006, "learning_rate": 4.449093385102659e-05, "loss": 0.4547, "step": 8060 }, { "epoch": 0.9558875844895055, "grad_norm": 1.157942205419363, "learning_rate": 4.4489430579999634e-05, "loss": 0.3171, "step": 8061 }, { "epoch": 0.9560061662516305, "grad_norm": 1.6566377794557878, "learning_rate": 4.448792712930404e-05, "loss": 0.455, "step": 8062 }, { "epoch": 0.9561247480137555, "grad_norm": 1.830326292639886, "learning_rate": 4.448642349895364e-05, "loss": 0.5038, "step": 8063 }, { "epoch": 0.9562433297758804, "grad_norm": 1.303351828879128, "learning_rate": 4.448491968896232e-05, "loss": 0.3944, "step": 8064 }, { "epoch": 0.9563619115380054, "grad_norm": 1.4179668729976078, "learning_rate": 4.448341569934393e-05, "loss": 0.4467, "step": 8065 }, { "epoch": 0.9564804933001304, "grad_norm": 2.635737760184117, "learning_rate": 4.448191153011234e-05, "loss": 0.4645, "step": 8066 }, { "epoch": 0.9565990750622554, "grad_norm": 1.1044657407700522, "learning_rate": 4.448040718128142e-05, "loss": 0.3475, "step": 8067 }, { "epoch": 0.9567176568243804, "grad_norm": 1.5548963917429848, "learning_rate": 4.4478902652865034e-05, "loss": 0.4924, "step": 8068 }, { "epoch": 0.9568362385865053, "grad_norm": 1.5749362287182267, "learning_rate": 4.447739794487705e-05, "loss": 0.5497, "step": 8069 }, { "epoch": 0.9569548203486303, "grad_norm": 1.0655730865525967, "learning_rate": 4.4475893057331343e-05, "loss": 0.3746, "step": 8070 }, { "epoch": 0.9570734021107553, "grad_norm": 1.1173530949868353, "learning_rate": 4.4474387990241783e-05, "loss": 0.2728, "step": 8071 }, { "epoch": 0.9571919838728804, "grad_norm": 1.6784232313488763, "learning_rate": 4.447288274362225e-05, "loss": 0.6521, "step": 8072 }, { "epoch": 0.9573105656350054, "grad_norm": 1.360226467524376, "learning_rate": 4.447137731748661e-05, "loss": 0.3514, "step": 8073 }, { "epoch": 0.9574291473971304, "grad_norm": 0.9404683682160336, "learning_rate": 4.446987171184875e-05, "loss": 0.2472, "step": 8074 }, { "epoch": 0.9575477291592553, "grad_norm": 1.0707695114687983, "learning_rate": 4.446836592672255e-05, "loss": 0.3252, "step": 8075 }, { "epoch": 0.9576663109213803, "grad_norm": 1.368796861831619, "learning_rate": 4.4466859962121896e-05, "loss": 0.4068, "step": 8076 }, { "epoch": 0.9577848926835053, "grad_norm": 1.0747107215480836, "learning_rate": 4.446535381806066e-05, "loss": 0.2278, "step": 8077 }, { "epoch": 0.9579034744456303, "grad_norm": 1.34691718125261, "learning_rate": 4.446384749455274e-05, "loss": 0.3826, "step": 8078 }, { "epoch": 0.9580220562077553, "grad_norm": 1.6579510358080132, "learning_rate": 4.4462340991612004e-05, "loss": 0.3867, "step": 8079 }, { "epoch": 0.9581406379698802, "grad_norm": 1.266088452291217, "learning_rate": 4.446083430925235e-05, "loss": 0.3902, "step": 8080 }, { "epoch": 0.9582592197320052, "grad_norm": 1.5800724406047364, "learning_rate": 4.445932744748767e-05, "loss": 0.358, "step": 8081 }, { "epoch": 0.9583778014941302, "grad_norm": 0.930351579158421, "learning_rate": 4.4457820406331864e-05, "loss": 0.2522, "step": 8082 }, { "epoch": 0.9584963832562552, "grad_norm": 1.7147880752328326, "learning_rate": 4.4456313185798803e-05, "loss": 0.4437, "step": 8083 }, { "epoch": 0.9586149650183802, "grad_norm": 1.0164223936533918, "learning_rate": 4.445480578590239e-05, "loss": 0.3444, "step": 8084 }, { "epoch": 0.9587335467805052, "grad_norm": 1.4095027425860642, "learning_rate": 4.4453298206656535e-05, "loss": 0.4074, "step": 8085 }, { "epoch": 0.9588521285426301, "grad_norm": 1.5968720502712217, "learning_rate": 4.445179044807512e-05, "loss": 0.3697, "step": 8086 }, { "epoch": 0.9589707103047551, "grad_norm": 1.2775421103105544, "learning_rate": 4.445028251017205e-05, "loss": 0.3318, "step": 8087 }, { "epoch": 0.9590892920668801, "grad_norm": 1.2787958473073109, "learning_rate": 4.4448774392961226e-05, "loss": 0.3619, "step": 8088 }, { "epoch": 0.9592078738290051, "grad_norm": 1.5330875908652577, "learning_rate": 4.444726609645656e-05, "loss": 0.3618, "step": 8089 }, { "epoch": 0.9593264555911301, "grad_norm": 1.243347735894414, "learning_rate": 4.444575762067194e-05, "loss": 0.2987, "step": 8090 }, { "epoch": 0.959445037353255, "grad_norm": 1.6740693188633327, "learning_rate": 4.444424896562128e-05, "loss": 0.4515, "step": 8091 }, { "epoch": 0.95956361911538, "grad_norm": 0.9688552680963481, "learning_rate": 4.4442740131318496e-05, "loss": 0.2767, "step": 8092 }, { "epoch": 0.959682200877505, "grad_norm": 1.2008829208974772, "learning_rate": 4.444123111777748e-05, "loss": 0.3455, "step": 8093 }, { "epoch": 0.95980078263963, "grad_norm": 1.153830083536485, "learning_rate": 4.443972192501217e-05, "loss": 0.2359, "step": 8094 }, { "epoch": 0.959919364401755, "grad_norm": 1.1596798150145227, "learning_rate": 4.443821255303645e-05, "loss": 0.2659, "step": 8095 }, { "epoch": 0.9600379461638799, "grad_norm": 1.155618774065739, "learning_rate": 4.443670300186425e-05, "loss": 0.3263, "step": 8096 }, { "epoch": 0.960156527926005, "grad_norm": 1.3068599972133255, "learning_rate": 4.443519327150948e-05, "loss": 0.3976, "step": 8097 }, { "epoch": 0.96027510968813, "grad_norm": 1.013263370286823, "learning_rate": 4.443368336198607e-05, "loss": 0.3024, "step": 8098 }, { "epoch": 0.960393691450255, "grad_norm": 1.177026460800368, "learning_rate": 4.443217327330792e-05, "loss": 0.3888, "step": 8099 }, { "epoch": 0.96051227321238, "grad_norm": 1.3293424581752964, "learning_rate": 4.4430663005488957e-05, "loss": 0.3226, "step": 8100 }, { "epoch": 0.960630854974505, "grad_norm": 1.195968863039665, "learning_rate": 4.442915255854312e-05, "loss": 0.3837, "step": 8101 }, { "epoch": 0.9607494367366299, "grad_norm": 1.3272635881102783, "learning_rate": 4.442764193248432e-05, "loss": 0.3465, "step": 8102 }, { "epoch": 0.9608680184987549, "grad_norm": 1.077597182339218, "learning_rate": 4.442613112732649e-05, "loss": 0.2887, "step": 8103 }, { "epoch": 0.9609866002608799, "grad_norm": 1.037463614026974, "learning_rate": 4.442462014308354e-05, "loss": 0.2723, "step": 8104 }, { "epoch": 0.9611051820230049, "grad_norm": 1.2431475837003096, "learning_rate": 4.442310897976942e-05, "loss": 0.3588, "step": 8105 }, { "epoch": 0.9612237637851299, "grad_norm": 1.3342722848806092, "learning_rate": 4.442159763739805e-05, "loss": 0.4415, "step": 8106 }, { "epoch": 0.9613423455472548, "grad_norm": 1.8874440134539356, "learning_rate": 4.4420086115983375e-05, "loss": 0.5666, "step": 8107 }, { "epoch": 0.9614609273093798, "grad_norm": 1.0096441313942262, "learning_rate": 4.441857441553932e-05, "loss": 0.2771, "step": 8108 }, { "epoch": 0.9615795090715048, "grad_norm": 1.0963651312820257, "learning_rate": 4.4417062536079815e-05, "loss": 0.2775, "step": 8109 }, { "epoch": 0.9616980908336298, "grad_norm": 1.4486858102987352, "learning_rate": 4.44155504776188e-05, "loss": 0.356, "step": 8110 }, { "epoch": 0.9618166725957548, "grad_norm": 1.2102181480741514, "learning_rate": 4.4414038240170225e-05, "loss": 0.4111, "step": 8111 }, { "epoch": 0.9619352543578797, "grad_norm": 0.9538152790884604, "learning_rate": 4.441252582374802e-05, "loss": 0.2945, "step": 8112 }, { "epoch": 0.9620538361200047, "grad_norm": 0.9847115894546697, "learning_rate": 4.441101322836614e-05, "loss": 0.2325, "step": 8113 }, { "epoch": 0.9621724178821297, "grad_norm": 0.9949457091988496, "learning_rate": 4.440950045403851e-05, "loss": 0.3063, "step": 8114 }, { "epoch": 0.9622909996442547, "grad_norm": 1.2777596830826339, "learning_rate": 4.4407987500779096e-05, "loss": 0.2634, "step": 8115 }, { "epoch": 0.9624095814063797, "grad_norm": 1.3962735110854212, "learning_rate": 4.4406474368601835e-05, "loss": 0.3875, "step": 8116 }, { "epoch": 0.9625281631685046, "grad_norm": 1.416522443703208, "learning_rate": 4.440496105752068e-05, "loss": 0.2966, "step": 8117 }, { "epoch": 0.9626467449306296, "grad_norm": 1.1606800810437023, "learning_rate": 4.440344756754958e-05, "loss": 0.268, "step": 8118 }, { "epoch": 0.9627653266927546, "grad_norm": 1.1209591625658175, "learning_rate": 4.440193389870249e-05, "loss": 0.3025, "step": 8119 }, { "epoch": 0.9628839084548796, "grad_norm": 1.463136962747462, "learning_rate": 4.440042005099335e-05, "loss": 0.3809, "step": 8120 }, { "epoch": 0.9630024902170046, "grad_norm": 1.6301401110139413, "learning_rate": 4.439890602443614e-05, "loss": 0.401, "step": 8121 }, { "epoch": 0.9631210719791297, "grad_norm": 1.3878120382227506, "learning_rate": 4.4397391819044795e-05, "loss": 0.3625, "step": 8122 }, { "epoch": 0.9632396537412546, "grad_norm": 1.7591390657824497, "learning_rate": 4.439587743483329e-05, "loss": 0.5031, "step": 8123 }, { "epoch": 0.9633582355033796, "grad_norm": 1.3904886805795058, "learning_rate": 4.439436287181558e-05, "loss": 0.429, "step": 8124 }, { "epoch": 0.9634768172655046, "grad_norm": 1.1540343421284536, "learning_rate": 4.439284813000563e-05, "loss": 0.3056, "step": 8125 }, { "epoch": 0.9635953990276296, "grad_norm": 1.3151689253188135, "learning_rate": 4.43913332094174e-05, "loss": 0.3113, "step": 8126 }, { "epoch": 0.9637139807897546, "grad_norm": 1.1899261058339594, "learning_rate": 4.4389818110064846e-05, "loss": 0.3188, "step": 8127 }, { "epoch": 0.9638325625518795, "grad_norm": 1.235129911718782, "learning_rate": 4.4388302831961956e-05, "loss": 0.2627, "step": 8128 }, { "epoch": 0.9639511443140045, "grad_norm": 0.9892017396639945, "learning_rate": 4.438678737512269e-05, "loss": 0.2591, "step": 8129 }, { "epoch": 0.9640697260761295, "grad_norm": 1.612438679686364, "learning_rate": 4.438527173956101e-05, "loss": 0.5128, "step": 8130 }, { "epoch": 0.9641883078382545, "grad_norm": 1.3890739605982219, "learning_rate": 4.43837559252909e-05, "loss": 0.3803, "step": 8131 }, { "epoch": 0.9643068896003795, "grad_norm": 1.1976421045171879, "learning_rate": 4.438223993232634e-05, "loss": 0.291, "step": 8132 }, { "epoch": 0.9644254713625044, "grad_norm": 1.2402878385418235, "learning_rate": 4.438072376068129e-05, "loss": 0.4378, "step": 8133 }, { "epoch": 0.9645440531246294, "grad_norm": 1.1393739353235415, "learning_rate": 4.4379207410369725e-05, "loss": 0.2987, "step": 8134 }, { "epoch": 0.9646626348867544, "grad_norm": 1.5372456120979854, "learning_rate": 4.4377690881405646e-05, "loss": 0.516, "step": 8135 }, { "epoch": 0.9647812166488794, "grad_norm": 1.257998111986321, "learning_rate": 4.437617417380301e-05, "loss": 0.3824, "step": 8136 }, { "epoch": 0.9648997984110044, "grad_norm": 1.8242940507519305, "learning_rate": 4.4374657287575807e-05, "loss": 0.5142, "step": 8137 }, { "epoch": 0.9650183801731294, "grad_norm": 1.2009205796839926, "learning_rate": 4.4373140222738027e-05, "loss": 0.2375, "step": 8138 }, { "epoch": 0.9651369619352543, "grad_norm": 1.3957704949994112, "learning_rate": 4.437162297930365e-05, "loss": 0.3383, "step": 8139 }, { "epoch": 0.9652555436973793, "grad_norm": 1.0481689531167053, "learning_rate": 4.437010555728667e-05, "loss": 0.2921, "step": 8140 }, { "epoch": 0.9653741254595043, "grad_norm": 1.0674281857959564, "learning_rate": 4.436858795670106e-05, "loss": 0.2875, "step": 8141 }, { "epoch": 0.9654927072216293, "grad_norm": 1.2253518955248046, "learning_rate": 4.436707017756083e-05, "loss": 0.2562, "step": 8142 }, { "epoch": 0.9656112889837543, "grad_norm": 1.1128549873415263, "learning_rate": 4.436555221987996e-05, "loss": 0.2745, "step": 8143 }, { "epoch": 0.9657298707458792, "grad_norm": 1.1623819644161728, "learning_rate": 4.436403408367243e-05, "loss": 0.3094, "step": 8144 }, { "epoch": 0.9658484525080042, "grad_norm": 1.2204119488892813, "learning_rate": 4.436251576895227e-05, "loss": 0.3275, "step": 8145 }, { "epoch": 0.9659670342701292, "grad_norm": 1.9673665393571227, "learning_rate": 4.436099727573346e-05, "loss": 0.5316, "step": 8146 }, { "epoch": 0.9660856160322543, "grad_norm": 0.9989171554370674, "learning_rate": 4.435947860402999e-05, "loss": 0.267, "step": 8147 }, { "epoch": 0.9662041977943793, "grad_norm": 1.635825437099239, "learning_rate": 4.4357959753855866e-05, "loss": 0.4859, "step": 8148 }, { "epoch": 0.9663227795565043, "grad_norm": 1.4734499987039664, "learning_rate": 4.4356440725225094e-05, "loss": 0.3534, "step": 8149 }, { "epoch": 0.9664413613186292, "grad_norm": 1.0632644293957512, "learning_rate": 4.435492151815168e-05, "loss": 0.2979, "step": 8150 }, { "epoch": 0.9665599430807542, "grad_norm": 1.3189364320380723, "learning_rate": 4.4353402132649615e-05, "loss": 0.3005, "step": 8151 }, { "epoch": 0.9666785248428792, "grad_norm": 1.0159099655762984, "learning_rate": 4.4351882568732916e-05, "loss": 0.2753, "step": 8152 }, { "epoch": 0.9667971066050042, "grad_norm": 1.0748361457002256, "learning_rate": 4.4350362826415594e-05, "loss": 0.2713, "step": 8153 }, { "epoch": 0.9669156883671292, "grad_norm": 1.256553425006719, "learning_rate": 4.4348842905711656e-05, "loss": 0.3459, "step": 8154 }, { "epoch": 0.9670342701292541, "grad_norm": 1.493076337103224, "learning_rate": 4.4347322806635115e-05, "loss": 0.3048, "step": 8155 }, { "epoch": 0.9671528518913791, "grad_norm": 1.1247932360153043, "learning_rate": 4.434580252919998e-05, "loss": 0.3384, "step": 8156 }, { "epoch": 0.9672714336535041, "grad_norm": 1.6417396546811136, "learning_rate": 4.434428207342027e-05, "loss": 0.5175, "step": 8157 }, { "epoch": 0.9673900154156291, "grad_norm": 1.2887541285621864, "learning_rate": 4.434276143931e-05, "loss": 0.315, "step": 8158 }, { "epoch": 0.9675085971777541, "grad_norm": 1.0724426456594587, "learning_rate": 4.434124062688319e-05, "loss": 0.2762, "step": 8159 }, { "epoch": 0.967627178939879, "grad_norm": 1.269726237926335, "learning_rate": 4.433971963615386e-05, "loss": 0.3186, "step": 8160 }, { "epoch": 0.967745760702004, "grad_norm": 1.5213560838382587, "learning_rate": 4.433819846713603e-05, "loss": 0.3389, "step": 8161 }, { "epoch": 0.967864342464129, "grad_norm": 1.4235110474144252, "learning_rate": 4.4336677119843726e-05, "loss": 0.3686, "step": 8162 }, { "epoch": 0.967982924226254, "grad_norm": 2.0779846240463296, "learning_rate": 4.4335155594290966e-05, "loss": 0.4383, "step": 8163 }, { "epoch": 0.968101505988379, "grad_norm": 1.3051941282844637, "learning_rate": 4.433363389049179e-05, "loss": 0.3209, "step": 8164 }, { "epoch": 0.9682200877505039, "grad_norm": 1.1817107583731892, "learning_rate": 4.433211200846021e-05, "loss": 0.3527, "step": 8165 }, { "epoch": 0.9683386695126289, "grad_norm": 1.37474012577105, "learning_rate": 4.433058994821027e-05, "loss": 0.4202, "step": 8166 }, { "epoch": 0.9684572512747539, "grad_norm": 1.4037019268230604, "learning_rate": 4.432906770975599e-05, "loss": 0.4617, "step": 8167 }, { "epoch": 0.9685758330368789, "grad_norm": 1.2809090366264535, "learning_rate": 4.43275452931114e-05, "loss": 0.3073, "step": 8168 }, { "epoch": 0.9686944147990039, "grad_norm": 1.3158900853300421, "learning_rate": 4.432602269829056e-05, "loss": 0.2932, "step": 8169 }, { "epoch": 0.9688129965611288, "grad_norm": 1.2815179877077714, "learning_rate": 4.432449992530748e-05, "loss": 0.3894, "step": 8170 }, { "epoch": 0.9689315783232538, "grad_norm": 1.065872399761948, "learning_rate": 4.432297697417622e-05, "loss": 0.226, "step": 8171 }, { "epoch": 0.9690501600853789, "grad_norm": 1.3683395850497797, "learning_rate": 4.43214538449108e-05, "loss": 0.4333, "step": 8172 }, { "epoch": 0.9691687418475039, "grad_norm": 0.9104760576518058, "learning_rate": 4.431993053752527e-05, "loss": 0.2458, "step": 8173 }, { "epoch": 0.9692873236096289, "grad_norm": 1.77293872559727, "learning_rate": 4.431840705203367e-05, "loss": 0.5781, "step": 8174 }, { "epoch": 0.9694059053717539, "grad_norm": 1.1878893923556353, "learning_rate": 4.431688338845005e-05, "loss": 0.3127, "step": 8175 }, { "epoch": 0.9695244871338788, "grad_norm": 1.1888864110771686, "learning_rate": 4.431535954678845e-05, "loss": 0.361, "step": 8176 }, { "epoch": 0.9696430688960038, "grad_norm": 0.9074047903410065, "learning_rate": 4.4313835527062916e-05, "loss": 0.2467, "step": 8177 }, { "epoch": 0.9697616506581288, "grad_norm": 1.3513410669722292, "learning_rate": 4.431231132928752e-05, "loss": 0.4258, "step": 8178 }, { "epoch": 0.9698802324202538, "grad_norm": 2.4271383614477706, "learning_rate": 4.431078695347628e-05, "loss": 0.4968, "step": 8179 }, { "epoch": 0.9699988141823788, "grad_norm": 1.2491309480356787, "learning_rate": 4.430926239964327e-05, "loss": 0.428, "step": 8180 }, { "epoch": 0.9701173959445037, "grad_norm": 1.7306745460985185, "learning_rate": 4.4307737667802535e-05, "loss": 0.5438, "step": 8181 }, { "epoch": 0.9702359777066287, "grad_norm": 1.987216902175568, "learning_rate": 4.4306212757968136e-05, "loss": 0.4798, "step": 8182 }, { "epoch": 0.9703545594687537, "grad_norm": 1.4188095173894528, "learning_rate": 4.430468767015413e-05, "loss": 0.3463, "step": 8183 }, { "epoch": 0.9704731412308787, "grad_norm": 1.493202920179971, "learning_rate": 4.430316240437459e-05, "loss": 0.4963, "step": 8184 }, { "epoch": 0.9705917229930037, "grad_norm": 1.6176218137372922, "learning_rate": 4.4301636960643547e-05, "loss": 0.5062, "step": 8185 }, { "epoch": 0.9707103047551287, "grad_norm": 1.1218699444425575, "learning_rate": 4.430011133897509e-05, "loss": 0.3898, "step": 8186 }, { "epoch": 0.9708288865172536, "grad_norm": 1.404717250212203, "learning_rate": 4.429858553938327e-05, "loss": 0.3283, "step": 8187 }, { "epoch": 0.9709474682793786, "grad_norm": 1.2652422308033182, "learning_rate": 4.429705956188215e-05, "loss": 0.3631, "step": 8188 }, { "epoch": 0.9710660500415036, "grad_norm": 1.0702481052654365, "learning_rate": 4.429553340648582e-05, "loss": 0.2792, "step": 8189 }, { "epoch": 0.9711846318036286, "grad_norm": 1.3641518937897057, "learning_rate": 4.429400707320832e-05, "loss": 0.3565, "step": 8190 }, { "epoch": 0.9713032135657536, "grad_norm": 1.7613119523323126, "learning_rate": 4.4292480562063744e-05, "loss": 0.5023, "step": 8191 }, { "epoch": 0.9714217953278785, "grad_norm": 1.440184518153207, "learning_rate": 4.429095387306616e-05, "loss": 0.3049, "step": 8192 }, { "epoch": 0.9715403770900035, "grad_norm": 1.4479183717940678, "learning_rate": 4.428942700622962e-05, "loss": 0.4287, "step": 8193 }, { "epoch": 0.9716589588521285, "grad_norm": 1.6790783285911148, "learning_rate": 4.4287899961568225e-05, "loss": 0.4686, "step": 8194 }, { "epoch": 0.9717775406142535, "grad_norm": 1.3622350586926437, "learning_rate": 4.428637273909605e-05, "loss": 0.3239, "step": 8195 }, { "epoch": 0.9718961223763786, "grad_norm": 1.2892139289553168, "learning_rate": 4.428484533882716e-05, "loss": 0.313, "step": 8196 }, { "epoch": 0.9720147041385035, "grad_norm": 1.3401894177361846, "learning_rate": 4.4283317760775655e-05, "loss": 0.313, "step": 8197 }, { "epoch": 0.9721332859006285, "grad_norm": 1.6402893736383397, "learning_rate": 4.4281790004955596e-05, "loss": 0.4722, "step": 8198 }, { "epoch": 0.9722518676627535, "grad_norm": 1.3413166391540747, "learning_rate": 4.4280262071381086e-05, "loss": 0.3511, "step": 8199 }, { "epoch": 0.9723704494248785, "grad_norm": 1.2495459317850015, "learning_rate": 4.42787339600662e-05, "loss": 0.3397, "step": 8200 }, { "epoch": 0.9724890311870035, "grad_norm": 1.3937855029562911, "learning_rate": 4.427720567102503e-05, "loss": 0.3753, "step": 8201 }, { "epoch": 0.9726076129491285, "grad_norm": 0.9350646121531249, "learning_rate": 4.427567720427166e-05, "loss": 0.2669, "step": 8202 }, { "epoch": 0.9727261947112534, "grad_norm": 1.0361441724849219, "learning_rate": 4.4274148559820184e-05, "loss": 0.3553, "step": 8203 }, { "epoch": 0.9728447764733784, "grad_norm": 1.541816279716457, "learning_rate": 4.427261973768469e-05, "loss": 0.398, "step": 8204 }, { "epoch": 0.9729633582355034, "grad_norm": 1.723326328994623, "learning_rate": 4.4271090737879284e-05, "loss": 0.4114, "step": 8205 }, { "epoch": 0.9730819399976284, "grad_norm": 1.452991294167984, "learning_rate": 4.426956156041805e-05, "loss": 0.3887, "step": 8206 }, { "epoch": 0.9732005217597534, "grad_norm": 1.0077558517289296, "learning_rate": 4.426803220531509e-05, "loss": 0.3006, "step": 8207 }, { "epoch": 0.9733191035218783, "grad_norm": 1.2721200412751315, "learning_rate": 4.42665026725845e-05, "loss": 0.3163, "step": 8208 }, { "epoch": 0.9734376852840033, "grad_norm": 1.1337580601403356, "learning_rate": 4.426497296224038e-05, "loss": 0.3492, "step": 8209 }, { "epoch": 0.9735562670461283, "grad_norm": 1.076771339224893, "learning_rate": 4.4263443074296836e-05, "loss": 0.3269, "step": 8210 }, { "epoch": 0.9736748488082533, "grad_norm": 1.0294929896144827, "learning_rate": 4.426191300876796e-05, "loss": 0.2766, "step": 8211 }, { "epoch": 0.9737934305703783, "grad_norm": 1.2721147737790501, "learning_rate": 4.4260382765667875e-05, "loss": 0.5007, "step": 8212 }, { "epoch": 0.9739120123325032, "grad_norm": 1.1534284645168305, "learning_rate": 4.425885234501068e-05, "loss": 0.2473, "step": 8213 }, { "epoch": 0.9740305940946282, "grad_norm": 0.9918955543628654, "learning_rate": 4.425732174681048e-05, "loss": 0.2563, "step": 8214 }, { "epoch": 0.9741491758567532, "grad_norm": 1.1017125839990123, "learning_rate": 4.425579097108139e-05, "loss": 0.2688, "step": 8215 }, { "epoch": 0.9742677576188782, "grad_norm": 1.6039275702104385, "learning_rate": 4.425426001783752e-05, "loss": 0.3914, "step": 8216 }, { "epoch": 0.9743863393810032, "grad_norm": 1.413733879228654, "learning_rate": 4.4252728887092985e-05, "loss": 0.3802, "step": 8217 }, { "epoch": 0.9745049211431281, "grad_norm": 1.9617133293881306, "learning_rate": 4.4251197578861895e-05, "loss": 0.519, "step": 8218 }, { "epoch": 0.9746235029052531, "grad_norm": 1.049980515791884, "learning_rate": 4.4249666093158375e-05, "loss": 0.2601, "step": 8219 }, { "epoch": 0.9747420846673781, "grad_norm": 1.282932316984849, "learning_rate": 4.424813442999654e-05, "loss": 0.3163, "step": 8220 }, { "epoch": 0.9748606664295032, "grad_norm": 2.089350268423967, "learning_rate": 4.4246602589390505e-05, "loss": 0.4115, "step": 8221 }, { "epoch": 0.9749792481916282, "grad_norm": 1.3385941295003538, "learning_rate": 4.42450705713544e-05, "loss": 0.3306, "step": 8222 }, { "epoch": 0.9750978299537532, "grad_norm": 1.6387965707964869, "learning_rate": 4.424353837590234e-05, "loss": 0.4756, "step": 8223 }, { "epoch": 0.9752164117158781, "grad_norm": 1.3945664719658082, "learning_rate": 4.4242006003048455e-05, "loss": 0.45, "step": 8224 }, { "epoch": 0.9753349934780031, "grad_norm": 1.3718169850503437, "learning_rate": 4.424047345280688e-05, "loss": 0.3953, "step": 8225 }, { "epoch": 0.9754535752401281, "grad_norm": 1.490019740919339, "learning_rate": 4.423894072519173e-05, "loss": 0.4175, "step": 8226 }, { "epoch": 0.9755721570022531, "grad_norm": 1.390351439311583, "learning_rate": 4.423740782021713e-05, "loss": 0.4115, "step": 8227 }, { "epoch": 0.9756907387643781, "grad_norm": 1.482690610601037, "learning_rate": 4.423587473789722e-05, "loss": 0.3932, "step": 8228 }, { "epoch": 0.975809320526503, "grad_norm": 0.9740296380707301, "learning_rate": 4.4234341478246135e-05, "loss": 0.3463, "step": 8229 }, { "epoch": 0.975927902288628, "grad_norm": 1.0893854687165152, "learning_rate": 4.423280804127802e-05, "loss": 0.2413, "step": 8230 }, { "epoch": 0.976046484050753, "grad_norm": 1.3056862616797247, "learning_rate": 4.423127442700699e-05, "loss": 0.4037, "step": 8231 }, { "epoch": 0.976165065812878, "grad_norm": 1.5161811346437355, "learning_rate": 4.422974063544719e-05, "loss": 0.3927, "step": 8232 }, { "epoch": 0.976283647575003, "grad_norm": 1.1830751205590924, "learning_rate": 4.422820666661276e-05, "loss": 0.3047, "step": 8233 }, { "epoch": 0.976402229337128, "grad_norm": 1.460136211669708, "learning_rate": 4.422667252051785e-05, "loss": 0.3145, "step": 8234 }, { "epoch": 0.9765208110992529, "grad_norm": 1.3029913160266968, "learning_rate": 4.422513819717661e-05, "loss": 0.3256, "step": 8235 }, { "epoch": 0.9766393928613779, "grad_norm": 1.3197674250621456, "learning_rate": 4.422360369660316e-05, "loss": 0.3718, "step": 8236 }, { "epoch": 0.9767579746235029, "grad_norm": 1.819194100709286, "learning_rate": 4.4222069018811646e-05, "loss": 0.4408, "step": 8237 }, { "epoch": 0.9768765563856279, "grad_norm": 1.1865882030349053, "learning_rate": 4.422053416381624e-05, "loss": 0.2861, "step": 8238 }, { "epoch": 0.9769951381477529, "grad_norm": 1.378766527501846, "learning_rate": 4.421899913163108e-05, "loss": 0.3727, "step": 8239 }, { "epoch": 0.9771137199098778, "grad_norm": 1.1750551265831204, "learning_rate": 4.421746392227031e-05, "loss": 0.285, "step": 8240 }, { "epoch": 0.9772323016720028, "grad_norm": 1.5919439824388095, "learning_rate": 4.421592853574808e-05, "loss": 0.4557, "step": 8241 }, { "epoch": 0.9773508834341278, "grad_norm": 1.4915030825085096, "learning_rate": 4.4214392972078564e-05, "loss": 0.3796, "step": 8242 }, { "epoch": 0.9774694651962528, "grad_norm": 1.4856681422083686, "learning_rate": 4.421285723127591e-05, "loss": 0.4703, "step": 8243 }, { "epoch": 0.9775880469583778, "grad_norm": 1.5037894558235063, "learning_rate": 4.421132131335427e-05, "loss": 0.47, "step": 8244 }, { "epoch": 0.9777066287205027, "grad_norm": 2.290613850905578, "learning_rate": 4.42097852183278e-05, "loss": 0.439, "step": 8245 }, { "epoch": 0.9778252104826278, "grad_norm": 1.9440260274002121, "learning_rate": 4.420824894621068e-05, "loss": 0.4844, "step": 8246 }, { "epoch": 0.9779437922447528, "grad_norm": 1.1735948052483787, "learning_rate": 4.4206712497017044e-05, "loss": 0.3585, "step": 8247 }, { "epoch": 0.9780623740068778, "grad_norm": 1.4126376662162046, "learning_rate": 4.4205175870761084e-05, "loss": 0.4634, "step": 8248 }, { "epoch": 0.9781809557690028, "grad_norm": 1.155936304663757, "learning_rate": 4.420363906745695e-05, "loss": 0.3024, "step": 8249 }, { "epoch": 0.9782995375311277, "grad_norm": 1.3232305058601213, "learning_rate": 4.4202102087118816e-05, "loss": 0.3661, "step": 8250 }, { "epoch": 0.9784181192932527, "grad_norm": 1.2903021732971605, "learning_rate": 4.420056492976083e-05, "loss": 0.4302, "step": 8251 }, { "epoch": 0.9785367010553777, "grad_norm": 1.0388218051379199, "learning_rate": 4.41990275953972e-05, "loss": 0.2812, "step": 8252 }, { "epoch": 0.9786552828175027, "grad_norm": 1.2177193996533395, "learning_rate": 4.419749008404207e-05, "loss": 0.3629, "step": 8253 }, { "epoch": 0.9787738645796277, "grad_norm": 1.2441246059391018, "learning_rate": 4.4195952395709626e-05, "loss": 0.4034, "step": 8254 }, { "epoch": 0.9788924463417527, "grad_norm": 1.954026238271305, "learning_rate": 4.419441453041404e-05, "loss": 0.6383, "step": 8255 }, { "epoch": 0.9790110281038776, "grad_norm": 1.5236650976977129, "learning_rate": 4.419287648816949e-05, "loss": 0.3747, "step": 8256 }, { "epoch": 0.9791296098660026, "grad_norm": 1.4158456378821245, "learning_rate": 4.4191338268990155e-05, "loss": 0.3576, "step": 8257 }, { "epoch": 0.9792481916281276, "grad_norm": 1.0445991961790237, "learning_rate": 4.418979987289021e-05, "loss": 0.3024, "step": 8258 }, { "epoch": 0.9793667733902526, "grad_norm": 1.8982252414516851, "learning_rate": 4.4188261299883855e-05, "loss": 0.4167, "step": 8259 }, { "epoch": 0.9794853551523776, "grad_norm": 1.7302093215581753, "learning_rate": 4.418672254998525e-05, "loss": 0.5263, "step": 8260 }, { "epoch": 0.9796039369145025, "grad_norm": 1.3160211013671157, "learning_rate": 4.4185183623208596e-05, "loss": 0.3069, "step": 8261 }, { "epoch": 0.9797225186766275, "grad_norm": 1.5029790123065305, "learning_rate": 4.4183644519568074e-05, "loss": 0.3576, "step": 8262 }, { "epoch": 0.9798411004387525, "grad_norm": 1.1714167229553873, "learning_rate": 4.4182105239077874e-05, "loss": 0.2556, "step": 8263 }, { "epoch": 0.9799596822008775, "grad_norm": 1.3695227954239373, "learning_rate": 4.4180565781752185e-05, "loss": 0.3805, "step": 8264 }, { "epoch": 0.9800782639630025, "grad_norm": 1.5265943569979792, "learning_rate": 4.4179026147605204e-05, "loss": 0.4969, "step": 8265 }, { "epoch": 0.9801968457251274, "grad_norm": 1.0496227491883274, "learning_rate": 4.417748633665112e-05, "loss": 0.2916, "step": 8266 }, { "epoch": 0.9803154274872524, "grad_norm": 1.1745081791655443, "learning_rate": 4.417594634890413e-05, "loss": 0.3455, "step": 8267 }, { "epoch": 0.9804340092493774, "grad_norm": 1.3660139898043802, "learning_rate": 4.4174406184378423e-05, "loss": 0.3067, "step": 8268 }, { "epoch": 0.9805525910115024, "grad_norm": 1.4143690696727151, "learning_rate": 4.417286584308821e-05, "loss": 0.3741, "step": 8269 }, { "epoch": 0.9806711727736274, "grad_norm": 1.6161539479091347, "learning_rate": 4.417132532504768e-05, "loss": 0.4053, "step": 8270 }, { "epoch": 0.9807897545357525, "grad_norm": 1.9189972634805195, "learning_rate": 4.4169784630271044e-05, "loss": 0.4676, "step": 8271 }, { "epoch": 0.9809083362978774, "grad_norm": 1.5210829500560445, "learning_rate": 4.41682437587725e-05, "loss": 0.4304, "step": 8272 }, { "epoch": 0.9810269180600024, "grad_norm": 1.2980712379443737, "learning_rate": 4.4166702710566253e-05, "loss": 0.3041, "step": 8273 }, { "epoch": 0.9811454998221274, "grad_norm": 1.042771555026388, "learning_rate": 4.4165161485666515e-05, "loss": 0.321, "step": 8274 }, { "epoch": 0.9812640815842524, "grad_norm": 1.6762945741493627, "learning_rate": 4.416362008408749e-05, "loss": 0.3537, "step": 8275 }, { "epoch": 0.9813826633463774, "grad_norm": 1.5355305933543786, "learning_rate": 4.416207850584338e-05, "loss": 0.5004, "step": 8276 }, { "epoch": 0.9815012451085023, "grad_norm": 1.3440459745088584, "learning_rate": 4.416053675094841e-05, "loss": 0.3886, "step": 8277 }, { "epoch": 0.9816198268706273, "grad_norm": 1.7659723243297478, "learning_rate": 4.4158994819416785e-05, "loss": 0.5173, "step": 8278 }, { "epoch": 0.9817384086327523, "grad_norm": 0.9384418976409911, "learning_rate": 4.415745271126273e-05, "loss": 0.2449, "step": 8279 }, { "epoch": 0.9818569903948773, "grad_norm": 1.134335210879764, "learning_rate": 4.4155910426500445e-05, "loss": 0.2208, "step": 8280 }, { "epoch": 0.9819755721570023, "grad_norm": 1.4023215298284348, "learning_rate": 4.415436796514416e-05, "loss": 0.3452, "step": 8281 }, { "epoch": 0.9820941539191272, "grad_norm": 1.2038328093017134, "learning_rate": 4.415282532720809e-05, "loss": 0.3374, "step": 8282 }, { "epoch": 0.9822127356812522, "grad_norm": 1.8541931352399736, "learning_rate": 4.415128251270646e-05, "loss": 0.5911, "step": 8283 }, { "epoch": 0.9823313174433772, "grad_norm": 1.1325715717383371, "learning_rate": 4.414973952165349e-05, "loss": 0.3457, "step": 8284 }, { "epoch": 0.9824498992055022, "grad_norm": 1.3175636894067353, "learning_rate": 4.41481963540634e-05, "loss": 0.3728, "step": 8285 }, { "epoch": 0.9825684809676272, "grad_norm": 1.1846950592231786, "learning_rate": 4.414665300995042e-05, "loss": 0.3901, "step": 8286 }, { "epoch": 0.9826870627297521, "grad_norm": 1.3406832503286963, "learning_rate": 4.414510948932878e-05, "loss": 0.4634, "step": 8287 }, { "epoch": 0.9828056444918771, "grad_norm": 1.1666157425659556, "learning_rate": 4.414356579221271e-05, "loss": 0.3602, "step": 8288 }, { "epoch": 0.9829242262540021, "grad_norm": 1.1461167083692458, "learning_rate": 4.4142021918616436e-05, "loss": 0.3556, "step": 8289 }, { "epoch": 0.9830428080161271, "grad_norm": 1.3339416297478541, "learning_rate": 4.41404778685542e-05, "loss": 0.366, "step": 8290 }, { "epoch": 0.9831613897782521, "grad_norm": 1.0992496175637299, "learning_rate": 4.413893364204022e-05, "loss": 0.2794, "step": 8291 }, { "epoch": 0.983279971540377, "grad_norm": 1.0496558124389437, "learning_rate": 4.413738923908874e-05, "loss": 0.3312, "step": 8292 }, { "epoch": 0.983398553302502, "grad_norm": 1.0041303520195561, "learning_rate": 4.413584465971401e-05, "loss": 0.2563, "step": 8293 }, { "epoch": 0.983517135064627, "grad_norm": 1.1994406671413391, "learning_rate": 4.413429990393026e-05, "loss": 0.3521, "step": 8294 }, { "epoch": 0.983635716826752, "grad_norm": 1.1228716758603077, "learning_rate": 4.4132754971751725e-05, "loss": 0.2919, "step": 8295 }, { "epoch": 0.9837542985888771, "grad_norm": 1.5484890592929543, "learning_rate": 4.413120986319265e-05, "loss": 0.5048, "step": 8296 }, { "epoch": 0.9838728803510021, "grad_norm": 1.1216547936045358, "learning_rate": 4.412966457826727e-05, "loss": 0.3163, "step": 8297 }, { "epoch": 0.983991462113127, "grad_norm": 1.216994063474826, "learning_rate": 4.4128119116989853e-05, "loss": 0.3122, "step": 8298 }, { "epoch": 0.984110043875252, "grad_norm": 1.356051453945428, "learning_rate": 4.412657347937463e-05, "loss": 0.3979, "step": 8299 }, { "epoch": 0.984228625637377, "grad_norm": 1.5565817440412268, "learning_rate": 4.4125027665435855e-05, "loss": 0.4419, "step": 8300 }, { "epoch": 0.984347207399502, "grad_norm": 1.2939810403576215, "learning_rate": 4.412348167518778e-05, "loss": 0.3484, "step": 8301 }, { "epoch": 0.984465789161627, "grad_norm": 1.6675655207334301, "learning_rate": 4.412193550864465e-05, "loss": 0.443, "step": 8302 }, { "epoch": 0.984584370923752, "grad_norm": 1.1275146104501859, "learning_rate": 4.412038916582072e-05, "loss": 0.3197, "step": 8303 }, { "epoch": 0.9847029526858769, "grad_norm": 1.4852302003618498, "learning_rate": 4.4118842646730265e-05, "loss": 0.4782, "step": 8304 }, { "epoch": 0.9848215344480019, "grad_norm": 1.0994196022871845, "learning_rate": 4.411729595138751e-05, "loss": 0.2952, "step": 8305 }, { "epoch": 0.9849401162101269, "grad_norm": 1.1325394807586397, "learning_rate": 4.4115749079806735e-05, "loss": 0.3134, "step": 8306 }, { "epoch": 0.9850586979722519, "grad_norm": 1.8362708734907416, "learning_rate": 4.4114202032002196e-05, "loss": 0.5704, "step": 8307 }, { "epoch": 0.9851772797343769, "grad_norm": 1.1073745324526318, "learning_rate": 4.411265480798815e-05, "loss": 0.3177, "step": 8308 }, { "epoch": 0.9852958614965018, "grad_norm": 0.9190692121407944, "learning_rate": 4.411110740777887e-05, "loss": 0.2273, "step": 8309 }, { "epoch": 0.9854144432586268, "grad_norm": 0.9970089836211617, "learning_rate": 4.410955983138861e-05, "loss": 0.2746, "step": 8310 }, { "epoch": 0.9855330250207518, "grad_norm": 1.0994086346776935, "learning_rate": 4.410801207883165e-05, "loss": 0.3126, "step": 8311 }, { "epoch": 0.9856516067828768, "grad_norm": 1.0749081274117072, "learning_rate": 4.410646415012224e-05, "loss": 0.2873, "step": 8312 }, { "epoch": 0.9857701885450018, "grad_norm": 1.005669069933978, "learning_rate": 4.4104916045274666e-05, "loss": 0.2925, "step": 8313 }, { "epoch": 0.9858887703071267, "grad_norm": 1.327819062712485, "learning_rate": 4.41033677643032e-05, "loss": 0.4411, "step": 8314 }, { "epoch": 0.9860073520692517, "grad_norm": 1.1203345668524824, "learning_rate": 4.410181930722209e-05, "loss": 0.3527, "step": 8315 }, { "epoch": 0.9861259338313767, "grad_norm": 1.377269764827712, "learning_rate": 4.4100270674045644e-05, "loss": 0.3138, "step": 8316 }, { "epoch": 0.9862445155935017, "grad_norm": 1.1932555608511186, "learning_rate": 4.409872186478812e-05, "loss": 0.3424, "step": 8317 }, { "epoch": 0.9863630973556267, "grad_norm": 1.3516302408045655, "learning_rate": 4.409717287946381e-05, "loss": 0.3693, "step": 8318 }, { "epoch": 0.9864816791177516, "grad_norm": 1.5059061622839247, "learning_rate": 4.4095623718086975e-05, "loss": 0.3634, "step": 8319 }, { "epoch": 0.9866002608798766, "grad_norm": 1.52567461120596, "learning_rate": 4.409407438067191e-05, "loss": 0.3972, "step": 8320 }, { "epoch": 0.9867188426420017, "grad_norm": 1.6358471629146132, "learning_rate": 4.409252486723289e-05, "loss": 0.5007, "step": 8321 }, { "epoch": 0.9868374244041267, "grad_norm": 1.9357186421675099, "learning_rate": 4.409097517778421e-05, "loss": 0.4328, "step": 8322 }, { "epoch": 0.9869560061662517, "grad_norm": 1.144977940217153, "learning_rate": 4.408942531234015e-05, "loss": 0.2859, "step": 8323 }, { "epoch": 0.9870745879283767, "grad_norm": 1.119934173802696, "learning_rate": 4.408787527091499e-05, "loss": 0.2795, "step": 8324 }, { "epoch": 0.9871931696905016, "grad_norm": 1.2478435845979823, "learning_rate": 4.4086325053523036e-05, "loss": 0.3489, "step": 8325 }, { "epoch": 0.9873117514526266, "grad_norm": 1.3078614196705987, "learning_rate": 4.408477466017856e-05, "loss": 0.3519, "step": 8326 }, { "epoch": 0.9874303332147516, "grad_norm": 1.412297391448768, "learning_rate": 4.408322409089587e-05, "loss": 0.32, "step": 8327 }, { "epoch": 0.9875489149768766, "grad_norm": 1.3341375931682915, "learning_rate": 4.408167334568926e-05, "loss": 0.3771, "step": 8328 }, { "epoch": 0.9876674967390016, "grad_norm": 1.2051370130038352, "learning_rate": 4.4080122424573014e-05, "loss": 0.2873, "step": 8329 }, { "epoch": 0.9877860785011265, "grad_norm": 0.8677588652279163, "learning_rate": 4.407857132756144e-05, "loss": 0.2477, "step": 8330 }, { "epoch": 0.9879046602632515, "grad_norm": 1.603924359562475, "learning_rate": 4.407702005466884e-05, "loss": 0.5179, "step": 8331 }, { "epoch": 0.9880232420253765, "grad_norm": 1.3642989926233313, "learning_rate": 4.4075468605909495e-05, "loss": 0.4478, "step": 8332 }, { "epoch": 0.9881418237875015, "grad_norm": 1.3186306307710889, "learning_rate": 4.407391698129773e-05, "loss": 0.2738, "step": 8333 }, { "epoch": 0.9882604055496265, "grad_norm": 1.2434007412517496, "learning_rate": 4.407236518084784e-05, "loss": 0.2545, "step": 8334 }, { "epoch": 0.9883789873117514, "grad_norm": 1.4309835706668015, "learning_rate": 4.407081320457414e-05, "loss": 0.3443, "step": 8335 }, { "epoch": 0.9884975690738764, "grad_norm": 1.220267684942932, "learning_rate": 4.4069261052490905e-05, "loss": 0.2703, "step": 8336 }, { "epoch": 0.9886161508360014, "grad_norm": 1.7623157504592633, "learning_rate": 4.406770872461249e-05, "loss": 0.4866, "step": 8337 }, { "epoch": 0.9887347325981264, "grad_norm": 1.5232810964712822, "learning_rate": 4.406615622095317e-05, "loss": 0.4773, "step": 8338 }, { "epoch": 0.9888533143602514, "grad_norm": 1.6629620261011426, "learning_rate": 4.406460354152728e-05, "loss": 0.4104, "step": 8339 }, { "epoch": 0.9889718961223763, "grad_norm": 1.4503818380698115, "learning_rate": 4.4063050686349115e-05, "loss": 0.4208, "step": 8340 }, { "epoch": 0.9890904778845013, "grad_norm": 1.2750722881846672, "learning_rate": 4.406149765543301e-05, "loss": 0.3748, "step": 8341 }, { "epoch": 0.9892090596466263, "grad_norm": 1.1198582269859838, "learning_rate": 4.405994444879325e-05, "loss": 0.285, "step": 8342 }, { "epoch": 0.9893276414087513, "grad_norm": 1.3072882171867417, "learning_rate": 4.405839106644419e-05, "loss": 0.3668, "step": 8343 }, { "epoch": 0.9894462231708763, "grad_norm": 1.1834262559508983, "learning_rate": 4.405683750840014e-05, "loss": 0.3053, "step": 8344 }, { "epoch": 0.9895648049330013, "grad_norm": 2.103725946320049, "learning_rate": 4.405528377467541e-05, "loss": 0.6149, "step": 8345 }, { "epoch": 0.9896833866951263, "grad_norm": 1.198405940078596, "learning_rate": 4.405372986528433e-05, "loss": 0.3677, "step": 8346 }, { "epoch": 0.9898019684572513, "grad_norm": 1.2908765298526426, "learning_rate": 4.405217578024123e-05, "loss": 0.3976, "step": 8347 }, { "epoch": 0.9899205502193763, "grad_norm": 0.9963238427189444, "learning_rate": 4.4050621519560434e-05, "loss": 0.3091, "step": 8348 }, { "epoch": 0.9900391319815013, "grad_norm": 1.1757533106434608, "learning_rate": 4.4049067083256266e-05, "loss": 0.3467, "step": 8349 }, { "epoch": 0.9901577137436263, "grad_norm": 0.9119039467758296, "learning_rate": 4.404751247134306e-05, "loss": 0.2468, "step": 8350 }, { "epoch": 0.9902762955057512, "grad_norm": 1.0647396531477253, "learning_rate": 4.404595768383514e-05, "loss": 0.2835, "step": 8351 }, { "epoch": 0.9903948772678762, "grad_norm": 0.8893043141503866, "learning_rate": 4.4044402720746854e-05, "loss": 0.1991, "step": 8352 }, { "epoch": 0.9905134590300012, "grad_norm": 1.31823956288799, "learning_rate": 4.404284758209253e-05, "loss": 0.4616, "step": 8353 }, { "epoch": 0.9906320407921262, "grad_norm": 1.455877491605922, "learning_rate": 4.404129226788649e-05, "loss": 0.4176, "step": 8354 }, { "epoch": 0.9907506225542512, "grad_norm": 1.1537333783276036, "learning_rate": 4.4039736778143103e-05, "loss": 0.2802, "step": 8355 }, { "epoch": 0.9908692043163762, "grad_norm": 1.1508236510848868, "learning_rate": 4.403818111287668e-05, "loss": 0.3065, "step": 8356 }, { "epoch": 0.9909877860785011, "grad_norm": 0.9467449206899237, "learning_rate": 4.4036625272101575e-05, "loss": 0.2593, "step": 8357 }, { "epoch": 0.9911063678406261, "grad_norm": 1.0830540285871675, "learning_rate": 4.403506925583213e-05, "loss": 0.3039, "step": 8358 }, { "epoch": 0.9912249496027511, "grad_norm": 1.6873746458445982, "learning_rate": 4.4033513064082684e-05, "loss": 0.5691, "step": 8359 }, { "epoch": 0.9913435313648761, "grad_norm": 1.4196795585458912, "learning_rate": 4.403195669686759e-05, "loss": 0.351, "step": 8360 }, { "epoch": 0.991462113127001, "grad_norm": 1.1219907812060252, "learning_rate": 4.40304001542012e-05, "loss": 0.3611, "step": 8361 }, { "epoch": 0.991580694889126, "grad_norm": 2.278143101768903, "learning_rate": 4.402884343609785e-05, "loss": 0.4256, "step": 8362 }, { "epoch": 0.991699276651251, "grad_norm": 1.2288900606411612, "learning_rate": 4.402728654257191e-05, "loss": 0.3703, "step": 8363 }, { "epoch": 0.991817858413376, "grad_norm": 1.07848626114996, "learning_rate": 4.402572947363771e-05, "loss": 0.3306, "step": 8364 }, { "epoch": 0.991936440175501, "grad_norm": 1.391748692174989, "learning_rate": 4.402417222930962e-05, "loss": 0.3726, "step": 8365 }, { "epoch": 0.992055021937626, "grad_norm": 1.3466464512086804, "learning_rate": 4.4022614809601985e-05, "loss": 0.3493, "step": 8366 }, { "epoch": 0.9921736036997509, "grad_norm": 1.191317824634217, "learning_rate": 4.402105721452918e-05, "loss": 0.3208, "step": 8367 }, { "epoch": 0.9922921854618759, "grad_norm": 0.8967699348752601, "learning_rate": 4.401949944410554e-05, "loss": 0.261, "step": 8368 }, { "epoch": 0.9924107672240009, "grad_norm": 1.116273200530575, "learning_rate": 4.401794149834544e-05, "loss": 0.3364, "step": 8369 }, { "epoch": 0.9925293489861259, "grad_norm": 1.3887421289050765, "learning_rate": 4.401638337726325e-05, "loss": 0.4445, "step": 8370 }, { "epoch": 0.992647930748251, "grad_norm": 1.1396667876560393, "learning_rate": 4.401482508087331e-05, "loss": 0.3227, "step": 8371 }, { "epoch": 0.992766512510376, "grad_norm": 1.2113054683819284, "learning_rate": 4.401326660919002e-05, "loss": 0.3597, "step": 8372 }, { "epoch": 0.9928850942725009, "grad_norm": 0.9920963243245934, "learning_rate": 4.401170796222771e-05, "loss": 0.2495, "step": 8373 }, { "epoch": 0.9930036760346259, "grad_norm": 1.4840581313377206, "learning_rate": 4.401014914000078e-05, "loss": 0.3918, "step": 8374 }, { "epoch": 0.9931222577967509, "grad_norm": 1.2860677942356737, "learning_rate": 4.4008590142523576e-05, "loss": 0.394, "step": 8375 }, { "epoch": 0.9932408395588759, "grad_norm": 1.2054274465654053, "learning_rate": 4.4007030969810484e-05, "loss": 0.3894, "step": 8376 }, { "epoch": 0.9933594213210009, "grad_norm": 1.36404192030046, "learning_rate": 4.4005471621875874e-05, "loss": 0.3879, "step": 8377 }, { "epoch": 0.9934780030831258, "grad_norm": 1.191411666390149, "learning_rate": 4.4003912098734125e-05, "loss": 0.3305, "step": 8378 }, { "epoch": 0.9935965848452508, "grad_norm": 1.5263877000463102, "learning_rate": 4.40023524003996e-05, "loss": 0.5024, "step": 8379 }, { "epoch": 0.9937151666073758, "grad_norm": 1.1770925707619329, "learning_rate": 4.40007925268867e-05, "loss": 0.2975, "step": 8380 }, { "epoch": 0.9938337483695008, "grad_norm": 1.4316975666712985, "learning_rate": 4.3999232478209796e-05, "loss": 0.4419, "step": 8381 }, { "epoch": 0.9939523301316258, "grad_norm": 1.110856695490159, "learning_rate": 4.399767225438326e-05, "loss": 0.2998, "step": 8382 }, { "epoch": 0.9940709118937507, "grad_norm": 1.1922122693325554, "learning_rate": 4.399611185542148e-05, "loss": 0.2887, "step": 8383 }, { "epoch": 0.9941894936558757, "grad_norm": 1.1728264172234133, "learning_rate": 4.399455128133885e-05, "loss": 0.2738, "step": 8384 }, { "epoch": 0.9943080754180007, "grad_norm": 1.2844702506003918, "learning_rate": 4.399299053214974e-05, "loss": 0.3066, "step": 8385 }, { "epoch": 0.9944266571801257, "grad_norm": 0.9807667478936694, "learning_rate": 4.399142960786855e-05, "loss": 0.2697, "step": 8386 }, { "epoch": 0.9945452389422507, "grad_norm": 1.3044634803674768, "learning_rate": 4.398986850850968e-05, "loss": 0.3512, "step": 8387 }, { "epoch": 0.9946638207043756, "grad_norm": 1.1771487897411643, "learning_rate": 4.39883072340875e-05, "loss": 0.2565, "step": 8388 }, { "epoch": 0.9947824024665006, "grad_norm": 1.2420486685758554, "learning_rate": 4.398674578461641e-05, "loss": 0.3277, "step": 8389 }, { "epoch": 0.9949009842286256, "grad_norm": 1.387581676215533, "learning_rate": 4.398518416011081e-05, "loss": 0.391, "step": 8390 }, { "epoch": 0.9950195659907506, "grad_norm": 1.3726693436681654, "learning_rate": 4.3983622360585094e-05, "loss": 0.4378, "step": 8391 }, { "epoch": 0.9951381477528756, "grad_norm": 1.3224357748157405, "learning_rate": 4.398206038605366e-05, "loss": 0.3967, "step": 8392 }, { "epoch": 0.9952567295150005, "grad_norm": 1.0072762819409165, "learning_rate": 4.3980498236530906e-05, "loss": 0.2664, "step": 8393 }, { "epoch": 0.9953753112771255, "grad_norm": 1.363029913503713, "learning_rate": 4.3978935912031236e-05, "loss": 0.4206, "step": 8394 }, { "epoch": 0.9954938930392505, "grad_norm": 1.812383238024004, "learning_rate": 4.397737341256904e-05, "loss": 0.519, "step": 8395 }, { "epoch": 0.9956124748013756, "grad_norm": 1.3389757404108453, "learning_rate": 4.397581073815874e-05, "loss": 0.3537, "step": 8396 }, { "epoch": 0.9957310565635006, "grad_norm": 1.6177210072311754, "learning_rate": 4.3974247888814736e-05, "loss": 0.4413, "step": 8397 }, { "epoch": 0.9958496383256256, "grad_norm": 1.5360715266935046, "learning_rate": 4.3972684864551435e-05, "loss": 0.4536, "step": 8398 }, { "epoch": 0.9959682200877505, "grad_norm": 1.3953794592339568, "learning_rate": 4.397112166538324e-05, "loss": 0.3692, "step": 8399 }, { "epoch": 0.9960868018498755, "grad_norm": 0.8565468997939193, "learning_rate": 4.396955829132457e-05, "loss": 0.2118, "step": 8400 }, { "epoch": 0.9962053836120005, "grad_norm": 1.327332589709776, "learning_rate": 4.396799474238984e-05, "loss": 0.3484, "step": 8401 }, { "epoch": 0.9963239653741255, "grad_norm": 0.965588737634401, "learning_rate": 4.396643101859345e-05, "loss": 0.2552, "step": 8402 }, { "epoch": 0.9964425471362505, "grad_norm": 0.9267118186393212, "learning_rate": 4.3964867119949826e-05, "loss": 0.2815, "step": 8403 }, { "epoch": 0.9965611288983754, "grad_norm": 1.2807303921377133, "learning_rate": 4.3963303046473385e-05, "loss": 0.3806, "step": 8404 }, { "epoch": 0.9966797106605004, "grad_norm": 1.1912467045154984, "learning_rate": 4.396173879817855e-05, "loss": 0.3124, "step": 8405 }, { "epoch": 0.9967982924226254, "grad_norm": 1.0972122119147063, "learning_rate": 4.396017437507973e-05, "loss": 0.2609, "step": 8406 }, { "epoch": 0.9969168741847504, "grad_norm": 1.1517713969281416, "learning_rate": 4.395860977719135e-05, "loss": 0.2991, "step": 8407 }, { "epoch": 0.9970354559468754, "grad_norm": 1.2341343789475054, "learning_rate": 4.395704500452784e-05, "loss": 0.3819, "step": 8408 }, { "epoch": 0.9971540377090004, "grad_norm": 1.3898659377744782, "learning_rate": 4.395548005710362e-05, "loss": 0.3926, "step": 8409 }, { "epoch": 0.9972726194711253, "grad_norm": 1.2453737077752018, "learning_rate": 4.395391493493312e-05, "loss": 0.2993, "step": 8410 }, { "epoch": 0.9973912012332503, "grad_norm": 1.1577730934032031, "learning_rate": 4.395234963803076e-05, "loss": 0.201, "step": 8411 }, { "epoch": 0.9975097829953753, "grad_norm": 1.3986908322431455, "learning_rate": 4.395078416641099e-05, "loss": 0.377, "step": 8412 }, { "epoch": 0.9976283647575003, "grad_norm": 1.2659900958531087, "learning_rate": 4.394921852008822e-05, "loss": 0.3554, "step": 8413 }, { "epoch": 0.9977469465196253, "grad_norm": 1.067785201855373, "learning_rate": 4.39476526990769e-05, "loss": 0.3007, "step": 8414 }, { "epoch": 0.9978655282817502, "grad_norm": 0.9299346891534348, "learning_rate": 4.394608670339145e-05, "loss": 0.2628, "step": 8415 }, { "epoch": 0.9979841100438752, "grad_norm": 1.459633649190856, "learning_rate": 4.394452053304632e-05, "loss": 0.4162, "step": 8416 }, { "epoch": 0.9981026918060002, "grad_norm": 1.2886448968308508, "learning_rate": 4.3942954188055934e-05, "loss": 0.3561, "step": 8417 }, { "epoch": 0.9982212735681252, "grad_norm": 1.6839758299514358, "learning_rate": 4.394138766843474e-05, "loss": 0.4649, "step": 8418 }, { "epoch": 0.9983398553302502, "grad_norm": 1.2005170950324398, "learning_rate": 4.393982097419719e-05, "loss": 0.3372, "step": 8419 }, { "epoch": 0.9984584370923751, "grad_norm": 1.0673854953428692, "learning_rate": 4.393825410535771e-05, "loss": 0.2691, "step": 8420 }, { "epoch": 0.9985770188545002, "grad_norm": 1.5751152785993752, "learning_rate": 4.3936687061930746e-05, "loss": 0.3658, "step": 8421 }, { "epoch": 0.9986956006166252, "grad_norm": 0.9716758827686145, "learning_rate": 4.393511984393076e-05, "loss": 0.2788, "step": 8422 }, { "epoch": 0.9988141823787502, "grad_norm": 1.0730029202344653, "learning_rate": 4.393355245137218e-05, "loss": 0.2501, "step": 8423 }, { "epoch": 0.9989327641408752, "grad_norm": 1.1905833741134881, "learning_rate": 4.393198488426946e-05, "loss": 0.3428, "step": 8424 }, { "epoch": 0.9990513459030002, "grad_norm": 1.1001311224650676, "learning_rate": 4.3930417142637064e-05, "loss": 0.3724, "step": 8425 }, { "epoch": 0.9991699276651251, "grad_norm": 1.228732629885281, "learning_rate": 4.392884922648943e-05, "loss": 0.3143, "step": 8426 }, { "epoch": 0.9992885094272501, "grad_norm": 1.3814827585714875, "learning_rate": 4.392728113584103e-05, "loss": 0.3525, "step": 8427 }, { "epoch": 0.9994070911893751, "grad_norm": 1.485714849450434, "learning_rate": 4.392571287070629e-05, "loss": 0.3761, "step": 8428 }, { "epoch": 0.9995256729515001, "grad_norm": 1.2142553362948696, "learning_rate": 4.39241444310997e-05, "loss": 0.3448, "step": 8429 }, { "epoch": 0.9996442547136251, "grad_norm": 1.8951449955056623, "learning_rate": 4.3922575817035706e-05, "loss": 0.5687, "step": 8430 }, { "epoch": 0.99976283647575, "grad_norm": 1.6129581696271182, "learning_rate": 4.3921007028528755e-05, "loss": 0.4994, "step": 8431 }, { "epoch": 0.999881418237875, "grad_norm": 1.2824675334568216, "learning_rate": 4.391943806559333e-05, "loss": 0.3286, "step": 8432 }, { "epoch": 1.0, "grad_norm": 1.366681362063659, "learning_rate": 4.3917868928243885e-05, "loss": 0.3314, "step": 8433 }, { "epoch": 1.000118581762125, "grad_norm": 1.0116535075431734, "learning_rate": 4.391629961649488e-05, "loss": 0.1771, "step": 8434 }, { "epoch": 1.00023716352425, "grad_norm": 1.2193812128411812, "learning_rate": 4.391473013036081e-05, "loss": 0.2422, "step": 8435 }, { "epoch": 1.000355745286375, "grad_norm": 1.4742052430419397, "learning_rate": 4.3913160469856103e-05, "loss": 0.4023, "step": 8436 }, { "epoch": 1.0004743270485, "grad_norm": 1.5130150378610312, "learning_rate": 4.391159063499525e-05, "loss": 0.3015, "step": 8437 }, { "epoch": 1.000592908810625, "grad_norm": 0.9946705772061516, "learning_rate": 4.391002062579273e-05, "loss": 0.2675, "step": 8438 }, { "epoch": 1.0007114905727499, "grad_norm": 1.3997477424240494, "learning_rate": 4.3908450442263005e-05, "loss": 0.4026, "step": 8439 }, { "epoch": 1.0008300723348749, "grad_norm": 1.1401223154126967, "learning_rate": 4.390688008442055e-05, "loss": 0.2775, "step": 8440 }, { "epoch": 1.0009486540969998, "grad_norm": 0.8500053707219434, "learning_rate": 4.390530955227986e-05, "loss": 0.1858, "step": 8441 }, { "epoch": 1.0010672358591248, "grad_norm": 1.3240812480486093, "learning_rate": 4.390373884585539e-05, "loss": 0.2419, "step": 8442 }, { "epoch": 1.0011858176212498, "grad_norm": 1.172378877902708, "learning_rate": 4.390216796516163e-05, "loss": 0.2735, "step": 8443 }, { "epoch": 1.0013043993833748, "grad_norm": 0.9041682156217384, "learning_rate": 4.390059691021306e-05, "loss": 0.1845, "step": 8444 }, { "epoch": 1.0014229811454998, "grad_norm": 1.5993065022336403, "learning_rate": 4.389902568102416e-05, "loss": 0.216, "step": 8445 }, { "epoch": 1.0015415629076247, "grad_norm": 0.7578723816842607, "learning_rate": 4.389745427760943e-05, "loss": 0.1625, "step": 8446 }, { "epoch": 1.0016601446697497, "grad_norm": 1.4286839894077132, "learning_rate": 4.389588269998334e-05, "loss": 0.339, "step": 8447 }, { "epoch": 1.0017787264318747, "grad_norm": 1.0012377571934274, "learning_rate": 4.389431094816038e-05, "loss": 0.1976, "step": 8448 }, { "epoch": 1.0018973081939997, "grad_norm": 1.1289661925722079, "learning_rate": 4.3892739022155035e-05, "loss": 0.2083, "step": 8449 }, { "epoch": 1.0020158899561247, "grad_norm": 0.9524707574705489, "learning_rate": 4.389116692198182e-05, "loss": 0.1963, "step": 8450 }, { "epoch": 1.0021344717182497, "grad_norm": 1.3839711981021519, "learning_rate": 4.38895946476552e-05, "loss": 0.3207, "step": 8451 }, { "epoch": 1.0022530534803746, "grad_norm": 1.17078525078618, "learning_rate": 4.388802219918969e-05, "loss": 0.2806, "step": 8452 }, { "epoch": 1.0023716352424996, "grad_norm": 1.0086992151293408, "learning_rate": 4.388644957659978e-05, "loss": 0.1685, "step": 8453 }, { "epoch": 1.0024902170046246, "grad_norm": 0.956868915552307, "learning_rate": 4.388487677989995e-05, "loss": 0.2242, "step": 8454 }, { "epoch": 1.0026087987667496, "grad_norm": 1.0151949699309901, "learning_rate": 4.3883303809104725e-05, "loss": 0.2477, "step": 8455 }, { "epoch": 1.0027273805288746, "grad_norm": 1.4989823584636237, "learning_rate": 4.3881730664228586e-05, "loss": 0.2747, "step": 8456 }, { "epoch": 1.0028459622909995, "grad_norm": 1.0611127957650728, "learning_rate": 4.388015734528605e-05, "loss": 0.2439, "step": 8457 }, { "epoch": 1.0029645440531247, "grad_norm": 1.1526288922072745, "learning_rate": 4.387858385229162e-05, "loss": 0.2337, "step": 8458 }, { "epoch": 1.0030831258152497, "grad_norm": 1.006097871421932, "learning_rate": 4.38770101852598e-05, "loss": 0.2658, "step": 8459 }, { "epoch": 1.0032017075773747, "grad_norm": 1.057263664126593, "learning_rate": 4.387543634420509e-05, "loss": 0.2374, "step": 8460 }, { "epoch": 1.0033202893394997, "grad_norm": 1.0953231929775675, "learning_rate": 4.3873862329142004e-05, "loss": 0.2184, "step": 8461 }, { "epoch": 1.0034388711016247, "grad_norm": 2.310140707232505, "learning_rate": 4.3872288140085044e-05, "loss": 0.5143, "step": 8462 }, { "epoch": 1.0035574528637496, "grad_norm": 1.4511061270652497, "learning_rate": 4.3870713777048736e-05, "loss": 0.2653, "step": 8463 }, { "epoch": 1.0036760346258746, "grad_norm": 0.9548306235049033, "learning_rate": 4.386913924004759e-05, "loss": 0.2277, "step": 8464 }, { "epoch": 1.0037946163879996, "grad_norm": 1.1681892298685417, "learning_rate": 4.3867564529096117e-05, "loss": 0.243, "step": 8465 }, { "epoch": 1.0039131981501246, "grad_norm": 1.1651845115311485, "learning_rate": 4.386598964420884e-05, "loss": 0.2535, "step": 8466 }, { "epoch": 1.0040317799122496, "grad_norm": 1.4461354750105, "learning_rate": 4.386441458540027e-05, "loss": 0.3373, "step": 8467 }, { "epoch": 1.0041503616743745, "grad_norm": 1.3104766460533388, "learning_rate": 4.3862839352684925e-05, "loss": 0.271, "step": 8468 }, { "epoch": 1.0042689434364995, "grad_norm": 1.4133080049472115, "learning_rate": 4.386126394607734e-05, "loss": 0.2635, "step": 8469 }, { "epoch": 1.0043875251986245, "grad_norm": 1.3595634842399655, "learning_rate": 4.385968836559203e-05, "loss": 0.26, "step": 8470 }, { "epoch": 1.0045061069607495, "grad_norm": 1.223973498053663, "learning_rate": 4.385811261124352e-05, "loss": 0.2421, "step": 8471 }, { "epoch": 1.0046246887228745, "grad_norm": 0.9985368000156944, "learning_rate": 4.385653668304633e-05, "loss": 0.2285, "step": 8472 }, { "epoch": 1.0047432704849995, "grad_norm": 0.7920777545129393, "learning_rate": 4.3854960581015e-05, "loss": 0.1803, "step": 8473 }, { "epoch": 1.0048618522471244, "grad_norm": 1.3979043692838444, "learning_rate": 4.3853384305164055e-05, "loss": 0.3115, "step": 8474 }, { "epoch": 1.0049804340092494, "grad_norm": 1.059978332471856, "learning_rate": 4.385180785550803e-05, "loss": 0.2578, "step": 8475 }, { "epoch": 1.0050990157713744, "grad_norm": 1.125049197367131, "learning_rate": 4.3850231232061454e-05, "loss": 0.2247, "step": 8476 }, { "epoch": 1.0052175975334994, "grad_norm": 1.1846929368143908, "learning_rate": 4.384865443483886e-05, "loss": 0.3053, "step": 8477 }, { "epoch": 1.0053361792956244, "grad_norm": 1.0256781277566005, "learning_rate": 4.3847077463854784e-05, "loss": 0.2091, "step": 8478 }, { "epoch": 1.0054547610577493, "grad_norm": 1.0894297031011426, "learning_rate": 4.384550031912377e-05, "loss": 0.2476, "step": 8479 }, { "epoch": 1.0055733428198743, "grad_norm": 1.2996948287644685, "learning_rate": 4.3843923000660345e-05, "loss": 0.2577, "step": 8480 }, { "epoch": 1.0056919245819993, "grad_norm": 1.0352343956731125, "learning_rate": 4.3842345508479064e-05, "loss": 0.227, "step": 8481 }, { "epoch": 1.0058105063441243, "grad_norm": 1.3341259711406626, "learning_rate": 4.3840767842594466e-05, "loss": 0.2441, "step": 8482 }, { "epoch": 1.0059290881062493, "grad_norm": 1.2805923260634549, "learning_rate": 4.3839190003021093e-05, "loss": 0.2847, "step": 8483 }, { "epoch": 1.0060476698683742, "grad_norm": 1.2672059214015086, "learning_rate": 4.383761198977348e-05, "loss": 0.3032, "step": 8484 }, { "epoch": 1.0061662516304992, "grad_norm": 1.3003501316059527, "learning_rate": 4.383603380286619e-05, "loss": 0.2902, "step": 8485 }, { "epoch": 1.0062848333926242, "grad_norm": 1.055031321568214, "learning_rate": 4.3834455442313767e-05, "loss": 0.2601, "step": 8486 }, { "epoch": 1.0064034151547492, "grad_norm": 1.4761709617770922, "learning_rate": 4.383287690813076e-05, "loss": 0.314, "step": 8487 }, { "epoch": 1.0065219969168742, "grad_norm": 1.0079408379731285, "learning_rate": 4.3831298200331725e-05, "loss": 0.2375, "step": 8488 }, { "epoch": 1.0066405786789991, "grad_norm": 1.1380826431607163, "learning_rate": 4.382971931893121e-05, "loss": 0.2554, "step": 8489 }, { "epoch": 1.0067591604411241, "grad_norm": 1.1348118086708896, "learning_rate": 4.382814026394377e-05, "loss": 0.2344, "step": 8490 }, { "epoch": 1.006877742203249, "grad_norm": 1.4530174379540401, "learning_rate": 4.382656103538397e-05, "loss": 0.2875, "step": 8491 }, { "epoch": 1.006996323965374, "grad_norm": 1.0407495624650387, "learning_rate": 4.382498163326637e-05, "loss": 0.2099, "step": 8492 }, { "epoch": 1.007114905727499, "grad_norm": 1.122949505292771, "learning_rate": 4.382340205760552e-05, "loss": 0.1919, "step": 8493 }, { "epoch": 1.007233487489624, "grad_norm": 0.9637810590025562, "learning_rate": 4.382182230841598e-05, "loss": 0.2381, "step": 8494 }, { "epoch": 1.007352069251749, "grad_norm": 1.276754080857855, "learning_rate": 4.382024238571233e-05, "loss": 0.2988, "step": 8495 }, { "epoch": 1.007470651013874, "grad_norm": 1.2003274390474081, "learning_rate": 4.3818662289509116e-05, "loss": 0.2915, "step": 8496 }, { "epoch": 1.007589232775999, "grad_norm": 1.0495622467007464, "learning_rate": 4.381708201982091e-05, "loss": 0.2239, "step": 8497 }, { "epoch": 1.007707814538124, "grad_norm": 1.337063216628371, "learning_rate": 4.381550157666229e-05, "loss": 0.2481, "step": 8498 }, { "epoch": 1.007826396300249, "grad_norm": 1.0775840448888152, "learning_rate": 4.3813920960047816e-05, "loss": 0.2041, "step": 8499 }, { "epoch": 1.007944978062374, "grad_norm": 1.857912393293358, "learning_rate": 4.381234016999206e-05, "loss": 0.3008, "step": 8500 }, { "epoch": 1.008063559824499, "grad_norm": 0.9832865972245178, "learning_rate": 4.38107592065096e-05, "loss": 0.2294, "step": 8501 }, { "epoch": 1.008182141586624, "grad_norm": 1.3910149826257736, "learning_rate": 4.3809178069615e-05, "loss": 0.3314, "step": 8502 }, { "epoch": 1.0083007233487489, "grad_norm": 1.0976676155204652, "learning_rate": 4.3807596759322855e-05, "loss": 0.2184, "step": 8503 }, { "epoch": 1.0084193051108739, "grad_norm": 1.362783412764019, "learning_rate": 4.3806015275647724e-05, "loss": 0.2922, "step": 8504 }, { "epoch": 1.0085378868729988, "grad_norm": 1.0779840743153437, "learning_rate": 4.3804433618604195e-05, "loss": 0.2165, "step": 8505 }, { "epoch": 1.0086564686351238, "grad_norm": 1.2584119587314155, "learning_rate": 4.380285178820685e-05, "loss": 0.2399, "step": 8506 }, { "epoch": 1.0087750503972488, "grad_norm": 1.543329074444893, "learning_rate": 4.3801269784470266e-05, "loss": 0.3001, "step": 8507 }, { "epoch": 1.0088936321593738, "grad_norm": 1.2425083163548134, "learning_rate": 4.379968760740903e-05, "loss": 0.2451, "step": 8508 }, { "epoch": 1.009012213921499, "grad_norm": 1.2111355707958291, "learning_rate": 4.379810525703773e-05, "loss": 0.2215, "step": 8509 }, { "epoch": 1.009130795683624, "grad_norm": 1.405106430613689, "learning_rate": 4.379652273337095e-05, "loss": 0.2892, "step": 8510 }, { "epoch": 1.009249377445749, "grad_norm": 1.3252634958095557, "learning_rate": 4.379494003642328e-05, "loss": 0.341, "step": 8511 }, { "epoch": 1.009367959207874, "grad_norm": 1.4089357504463023, "learning_rate": 4.3793357166209313e-05, "loss": 0.338, "step": 8512 }, { "epoch": 1.009486540969999, "grad_norm": 1.3332979909294422, "learning_rate": 4.379177412274363e-05, "loss": 0.3292, "step": 8513 }, { "epoch": 1.0096051227321239, "grad_norm": 1.9475234250562536, "learning_rate": 4.3790190906040846e-05, "loss": 0.5017, "step": 8514 }, { "epoch": 1.0097237044942489, "grad_norm": 1.4424283663277089, "learning_rate": 4.378860751611554e-05, "loss": 0.2839, "step": 8515 }, { "epoch": 1.0098422862563738, "grad_norm": 1.7639078241817572, "learning_rate": 4.378702395298231e-05, "loss": 0.2891, "step": 8516 }, { "epoch": 1.0099608680184988, "grad_norm": 1.302431562174773, "learning_rate": 4.378544021665576e-05, "loss": 0.276, "step": 8517 }, { "epoch": 1.0100794497806238, "grad_norm": 1.3781757993327952, "learning_rate": 4.378385630715048e-05, "loss": 0.305, "step": 8518 }, { "epoch": 1.0101980315427488, "grad_norm": 1.067353111985892, "learning_rate": 4.378227222448109e-05, "loss": 0.2444, "step": 8519 }, { "epoch": 1.0103166133048738, "grad_norm": 0.85282009762846, "learning_rate": 4.378068796866218e-05, "loss": 0.2051, "step": 8520 }, { "epoch": 1.0104351950669987, "grad_norm": 1.055959983747183, "learning_rate": 4.377910353970836e-05, "loss": 0.2515, "step": 8521 }, { "epoch": 1.0105537768291237, "grad_norm": 1.2216361808148561, "learning_rate": 4.377751893763422e-05, "loss": 0.2512, "step": 8522 }, { "epoch": 1.0106723585912487, "grad_norm": 1.055060757462321, "learning_rate": 4.377593416245439e-05, "loss": 0.2039, "step": 8523 }, { "epoch": 1.0107909403533737, "grad_norm": 1.2240033679493745, "learning_rate": 4.3774349214183474e-05, "loss": 0.2579, "step": 8524 }, { "epoch": 1.0109095221154987, "grad_norm": 0.9350299926816681, "learning_rate": 4.377276409283608e-05, "loss": 0.2046, "step": 8525 }, { "epoch": 1.0110281038776237, "grad_norm": 1.1864168428543627, "learning_rate": 4.377117879842682e-05, "loss": 0.298, "step": 8526 }, { "epoch": 1.0111466856397486, "grad_norm": 1.2583408940588878, "learning_rate": 4.3769593330970314e-05, "loss": 0.2666, "step": 8527 }, { "epoch": 1.0112652674018736, "grad_norm": 1.154412114982071, "learning_rate": 4.376800769048117e-05, "loss": 0.2235, "step": 8528 }, { "epoch": 1.0113838491639986, "grad_norm": 1.1597480377614724, "learning_rate": 4.376642187697401e-05, "loss": 0.2364, "step": 8529 }, { "epoch": 1.0115024309261236, "grad_norm": 1.1466625855854315, "learning_rate": 4.376483589046345e-05, "loss": 0.2277, "step": 8530 }, { "epoch": 1.0116210126882486, "grad_norm": 1.2750514131529698, "learning_rate": 4.3763249730964126e-05, "loss": 0.3053, "step": 8531 }, { "epoch": 1.0117395944503735, "grad_norm": 1.162166929675154, "learning_rate": 4.376166339849064e-05, "loss": 0.2499, "step": 8532 }, { "epoch": 1.0118581762124985, "grad_norm": 0.9022452117208262, "learning_rate": 4.376007689305762e-05, "loss": 0.1549, "step": 8533 }, { "epoch": 1.0119767579746235, "grad_norm": 0.9628951717996728, "learning_rate": 4.3758490214679705e-05, "loss": 0.197, "step": 8534 }, { "epoch": 1.0120953397367485, "grad_norm": 1.5805408359281443, "learning_rate": 4.375690336337151e-05, "loss": 0.3197, "step": 8535 }, { "epoch": 1.0122139214988735, "grad_norm": 1.8595952180351867, "learning_rate": 4.375531633914767e-05, "loss": 0.4343, "step": 8536 }, { "epoch": 1.0123325032609984, "grad_norm": 1.1186694543055207, "learning_rate": 4.375372914202281e-05, "loss": 0.1961, "step": 8537 }, { "epoch": 1.0124510850231234, "grad_norm": 1.1082097063203298, "learning_rate": 4.375214177201157e-05, "loss": 0.2299, "step": 8538 }, { "epoch": 1.0125696667852484, "grad_norm": 0.9746838122112957, "learning_rate": 4.375055422912857e-05, "loss": 0.1949, "step": 8539 }, { "epoch": 1.0126882485473734, "grad_norm": 1.1290862891330993, "learning_rate": 4.3748966513388456e-05, "loss": 0.3089, "step": 8540 }, { "epoch": 1.0128068303094984, "grad_norm": 1.221681544702482, "learning_rate": 4.374737862480586e-05, "loss": 0.288, "step": 8541 }, { "epoch": 1.0129254120716233, "grad_norm": 1.2629133963078014, "learning_rate": 4.374579056339543e-05, "loss": 0.2651, "step": 8542 }, { "epoch": 1.0130439938337483, "grad_norm": 2.024871823279411, "learning_rate": 4.374420232917179e-05, "loss": 0.5251, "step": 8543 }, { "epoch": 1.0131625755958733, "grad_norm": 1.2181195195908356, "learning_rate": 4.37426139221496e-05, "loss": 0.2844, "step": 8544 }, { "epoch": 1.0132811573579983, "grad_norm": 1.2655830160350774, "learning_rate": 4.374102534234348e-05, "loss": 0.259, "step": 8545 }, { "epoch": 1.0133997391201233, "grad_norm": 1.3374148346873538, "learning_rate": 4.37394365897681e-05, "loss": 0.2643, "step": 8546 }, { "epoch": 1.0135183208822482, "grad_norm": 1.3434120321238792, "learning_rate": 4.3737847664438084e-05, "loss": 0.3005, "step": 8547 }, { "epoch": 1.0136369026443732, "grad_norm": 1.1140270026468941, "learning_rate": 4.37362585663681e-05, "loss": 0.3034, "step": 8548 }, { "epoch": 1.0137554844064982, "grad_norm": 1.0476829904503462, "learning_rate": 4.3734669295572786e-05, "loss": 0.2417, "step": 8549 }, { "epoch": 1.0138740661686232, "grad_norm": 0.8781076452117877, "learning_rate": 4.3733079852066795e-05, "loss": 0.1618, "step": 8550 }, { "epoch": 1.0139926479307482, "grad_norm": 0.7836120547345586, "learning_rate": 4.373149023586477e-05, "loss": 0.182, "step": 8551 }, { "epoch": 1.0141112296928732, "grad_norm": 1.0635611802699878, "learning_rate": 4.372990044698139e-05, "loss": 0.2641, "step": 8552 }, { "epoch": 1.0142298114549981, "grad_norm": 1.0905779848649926, "learning_rate": 4.372831048543129e-05, "loss": 0.2228, "step": 8553 }, { "epoch": 1.0143483932171231, "grad_norm": 1.4739789595871764, "learning_rate": 4.372672035122913e-05, "loss": 0.3055, "step": 8554 }, { "epoch": 1.014466974979248, "grad_norm": 1.8320097436038831, "learning_rate": 4.3725130044389576e-05, "loss": 0.3883, "step": 8555 }, { "epoch": 1.014585556741373, "grad_norm": 1.096099754601582, "learning_rate": 4.3723539564927285e-05, "loss": 0.2171, "step": 8556 }, { "epoch": 1.014704138503498, "grad_norm": 1.1946322294060978, "learning_rate": 4.372194891285691e-05, "loss": 0.2437, "step": 8557 }, { "epoch": 1.0148227202656233, "grad_norm": 1.5795724462056968, "learning_rate": 4.372035808819314e-05, "loss": 0.3218, "step": 8558 }, { "epoch": 1.0149413020277482, "grad_norm": 1.1833073236381562, "learning_rate": 4.371876709095062e-05, "loss": 0.2017, "step": 8559 }, { "epoch": 1.0150598837898732, "grad_norm": 1.1839020467958385, "learning_rate": 4.3717175921144014e-05, "loss": 0.2859, "step": 8560 }, { "epoch": 1.0151784655519982, "grad_norm": 1.2676361647536176, "learning_rate": 4.371558457878801e-05, "loss": 0.2671, "step": 8561 }, { "epoch": 1.0152970473141232, "grad_norm": 1.1290241232784106, "learning_rate": 4.371399306389726e-05, "loss": 0.2056, "step": 8562 }, { "epoch": 1.0154156290762482, "grad_norm": 1.3565608920727867, "learning_rate": 4.371240137648645e-05, "loss": 0.3137, "step": 8563 }, { "epoch": 1.0155342108383731, "grad_norm": 1.6175094240854497, "learning_rate": 4.371080951657024e-05, "loss": 0.3397, "step": 8564 }, { "epoch": 1.0156527926004981, "grad_norm": 1.1415218168424681, "learning_rate": 4.370921748416331e-05, "loss": 0.2181, "step": 8565 }, { "epoch": 1.015771374362623, "grad_norm": 1.0185096003738565, "learning_rate": 4.370762527928034e-05, "loss": 0.2077, "step": 8566 }, { "epoch": 1.015889956124748, "grad_norm": 1.6076607894450587, "learning_rate": 4.3706032901936e-05, "loss": 0.3515, "step": 8567 }, { "epoch": 1.016008537886873, "grad_norm": 1.3478731083037627, "learning_rate": 4.370444035214498e-05, "loss": 0.251, "step": 8568 }, { "epoch": 1.016127119648998, "grad_norm": 0.8551066517021854, "learning_rate": 4.370284762992196e-05, "loss": 0.197, "step": 8569 }, { "epoch": 1.016245701411123, "grad_norm": 1.777650980670058, "learning_rate": 4.3701254735281616e-05, "loss": 0.3857, "step": 8570 }, { "epoch": 1.016364283173248, "grad_norm": 1.0350879916798532, "learning_rate": 4.3699661668238635e-05, "loss": 0.1891, "step": 8571 }, { "epoch": 1.016482864935373, "grad_norm": 1.185270775390171, "learning_rate": 4.3698068428807706e-05, "loss": 0.2342, "step": 8572 }, { "epoch": 1.016601446697498, "grad_norm": 1.4536409818391902, "learning_rate": 4.369647501700351e-05, "loss": 0.2734, "step": 8573 }, { "epoch": 1.016720028459623, "grad_norm": 1.3510088550288675, "learning_rate": 4.369488143284075e-05, "loss": 0.2676, "step": 8574 }, { "epoch": 1.016838610221748, "grad_norm": 1.4935079821909465, "learning_rate": 4.3693287676334106e-05, "loss": 0.218, "step": 8575 }, { "epoch": 1.016957191983873, "grad_norm": 1.2993281064220215, "learning_rate": 4.3691693747498276e-05, "loss": 0.2893, "step": 8576 }, { "epoch": 1.017075773745998, "grad_norm": 1.23285325843082, "learning_rate": 4.3690099646347946e-05, "loss": 0.3107, "step": 8577 }, { "epoch": 1.0171943555081229, "grad_norm": 1.0671111000727624, "learning_rate": 4.368850537289782e-05, "loss": 0.2652, "step": 8578 }, { "epoch": 1.0173129372702479, "grad_norm": 1.0361573956546033, "learning_rate": 4.368691092716258e-05, "loss": 0.305, "step": 8579 }, { "epoch": 1.0174315190323728, "grad_norm": 1.7640161514388055, "learning_rate": 4.368531630915695e-05, "loss": 0.304, "step": 8580 }, { "epoch": 1.0175501007944978, "grad_norm": 1.2224999735062672, "learning_rate": 4.3683721518895616e-05, "loss": 0.2788, "step": 8581 }, { "epoch": 1.0176686825566228, "grad_norm": 1.4266725900455859, "learning_rate": 4.3682126556393274e-05, "loss": 0.301, "step": 8582 }, { "epoch": 1.0177872643187478, "grad_norm": 1.224107914486952, "learning_rate": 4.368053142166465e-05, "loss": 0.3261, "step": 8583 }, { "epoch": 1.0179058460808728, "grad_norm": 1.2162984970764834, "learning_rate": 4.367893611472442e-05, "loss": 0.2351, "step": 8584 }, { "epoch": 1.0180244278429977, "grad_norm": 1.8551110503179864, "learning_rate": 4.3677340635587305e-05, "loss": 0.2918, "step": 8585 }, { "epoch": 1.0181430096051227, "grad_norm": 0.9759350665903611, "learning_rate": 4.367574498426802e-05, "loss": 0.1903, "step": 8586 }, { "epoch": 1.0182615913672477, "grad_norm": 1.2567856377894209, "learning_rate": 4.3674149160781256e-05, "loss": 0.3184, "step": 8587 }, { "epoch": 1.0183801731293727, "grad_norm": 1.170773376323589, "learning_rate": 4.367255316514175e-05, "loss": 0.268, "step": 8588 }, { "epoch": 1.0184987548914977, "grad_norm": 1.0343395661729524, "learning_rate": 4.3670956997364194e-05, "loss": 0.2083, "step": 8589 }, { "epoch": 1.0186173366536226, "grad_norm": 1.3085903682871336, "learning_rate": 4.366936065746331e-05, "loss": 0.2503, "step": 8590 }, { "epoch": 1.0187359184157476, "grad_norm": 1.0182033537148185, "learning_rate": 4.3667764145453816e-05, "loss": 0.2435, "step": 8591 }, { "epoch": 1.0188545001778726, "grad_norm": 1.3080754554820382, "learning_rate": 4.366616746135043e-05, "loss": 0.2516, "step": 8592 }, { "epoch": 1.0189730819399976, "grad_norm": 1.546386948849375, "learning_rate": 4.366457060516787e-05, "loss": 0.3783, "step": 8593 }, { "epoch": 1.0190916637021226, "grad_norm": 0.9466666465358384, "learning_rate": 4.3662973576920854e-05, "loss": 0.2242, "step": 8594 }, { "epoch": 1.0192102454642475, "grad_norm": 1.4825237752593396, "learning_rate": 4.366137637662411e-05, "loss": 0.2938, "step": 8595 }, { "epoch": 1.0193288272263725, "grad_norm": 1.0017315564352482, "learning_rate": 4.3659779004292364e-05, "loss": 0.1942, "step": 8596 }, { "epoch": 1.0194474089884975, "grad_norm": 1.05011877294742, "learning_rate": 4.365818145994033e-05, "loss": 0.1975, "step": 8597 }, { "epoch": 1.0195659907506225, "grad_norm": 1.2409887967350528, "learning_rate": 4.3656583743582745e-05, "loss": 0.2636, "step": 8598 }, { "epoch": 1.0196845725127475, "grad_norm": 1.763804159577235, "learning_rate": 4.365498585523434e-05, "loss": 0.266, "step": 8599 }, { "epoch": 1.0198031542748724, "grad_norm": 1.5727264498746163, "learning_rate": 4.365338779490984e-05, "loss": 0.3748, "step": 8600 }, { "epoch": 1.0199217360369974, "grad_norm": 1.1128405014607599, "learning_rate": 4.365178956262398e-05, "loss": 0.2391, "step": 8601 }, { "epoch": 1.0200403177991224, "grad_norm": 0.9792512394428746, "learning_rate": 4.365019115839148e-05, "loss": 0.1877, "step": 8602 }, { "epoch": 1.0201588995612474, "grad_norm": 1.255373785436766, "learning_rate": 4.364859258222711e-05, "loss": 0.2588, "step": 8603 }, { "epoch": 1.0202774813233724, "grad_norm": 1.2574673564154812, "learning_rate": 4.364699383414557e-05, "loss": 0.2384, "step": 8604 }, { "epoch": 1.0203960630854974, "grad_norm": 1.5460089415797247, "learning_rate": 4.364539491416162e-05, "loss": 0.282, "step": 8605 }, { "epoch": 1.0205146448476223, "grad_norm": 1.3123412455537662, "learning_rate": 4.364379582228999e-05, "loss": 0.2299, "step": 8606 }, { "epoch": 1.0206332266097473, "grad_norm": 1.0849822710258632, "learning_rate": 4.364219655854542e-05, "loss": 0.2088, "step": 8607 }, { "epoch": 1.0207518083718723, "grad_norm": 1.0046549242582878, "learning_rate": 4.364059712294267e-05, "loss": 0.2362, "step": 8608 }, { "epoch": 1.0208703901339975, "grad_norm": 1.0871466528388025, "learning_rate": 4.363899751549646e-05, "loss": 0.2419, "step": 8609 }, { "epoch": 1.0209889718961225, "grad_norm": 1.693557573374471, "learning_rate": 4.363739773622156e-05, "loss": 0.3573, "step": 8610 }, { "epoch": 1.0211075536582475, "grad_norm": 1.2342037127426082, "learning_rate": 4.3635797785132706e-05, "loss": 0.3145, "step": 8611 }, { "epoch": 1.0212261354203724, "grad_norm": 1.192676659756125, "learning_rate": 4.363419766224465e-05, "loss": 0.2205, "step": 8612 }, { "epoch": 1.0213447171824974, "grad_norm": 1.0235318233881867, "learning_rate": 4.3632597367572134e-05, "loss": 0.1873, "step": 8613 }, { "epoch": 1.0214632989446224, "grad_norm": 1.0484160852669286, "learning_rate": 4.3630996901129925e-05, "loss": 0.1826, "step": 8614 }, { "epoch": 1.0215818807067474, "grad_norm": 1.2313775590420182, "learning_rate": 4.3629396262932764e-05, "loss": 0.2684, "step": 8615 }, { "epoch": 1.0217004624688724, "grad_norm": 0.9980435957168756, "learning_rate": 4.3627795452995425e-05, "loss": 0.1999, "step": 8616 }, { "epoch": 1.0218190442309973, "grad_norm": 1.0631005237042457, "learning_rate": 4.362619447133265e-05, "loss": 0.2169, "step": 8617 }, { "epoch": 1.0219376259931223, "grad_norm": 1.1568639554661395, "learning_rate": 4.3624593317959204e-05, "loss": 0.2532, "step": 8618 }, { "epoch": 1.0220562077552473, "grad_norm": 1.2794863953608537, "learning_rate": 4.3622991992889847e-05, "loss": 0.296, "step": 8619 }, { "epoch": 1.0221747895173723, "grad_norm": 1.0152022377417569, "learning_rate": 4.362139049613934e-05, "loss": 0.2217, "step": 8620 }, { "epoch": 1.0222933712794973, "grad_norm": 1.100257457769699, "learning_rate": 4.3619788827722445e-05, "loss": 0.2168, "step": 8621 }, { "epoch": 1.0224119530416222, "grad_norm": 1.0345103681849837, "learning_rate": 4.361818698765394e-05, "loss": 0.2418, "step": 8622 }, { "epoch": 1.0225305348037472, "grad_norm": 2.107323870670551, "learning_rate": 4.361658497594857e-05, "loss": 0.4822, "step": 8623 }, { "epoch": 1.0226491165658722, "grad_norm": 1.4758190611139208, "learning_rate": 4.3614982792621115e-05, "loss": 0.4374, "step": 8624 }, { "epoch": 1.0227676983279972, "grad_norm": 1.0801703291989793, "learning_rate": 4.3613380437686356e-05, "loss": 0.2475, "step": 8625 }, { "epoch": 1.0228862800901222, "grad_norm": 1.348185040645482, "learning_rate": 4.361177791115905e-05, "loss": 0.3261, "step": 8626 }, { "epoch": 1.0230048618522471, "grad_norm": 1.1377051753775735, "learning_rate": 4.3610175213053977e-05, "loss": 0.2569, "step": 8627 }, { "epoch": 1.0231234436143721, "grad_norm": 0.7827188177737925, "learning_rate": 4.360857234338591e-05, "loss": 0.1651, "step": 8628 }, { "epoch": 1.023242025376497, "grad_norm": 1.127017370847579, "learning_rate": 4.360696930216962e-05, "loss": 0.2222, "step": 8629 }, { "epoch": 1.023360607138622, "grad_norm": 1.2987630458343589, "learning_rate": 4.3605366089419894e-05, "loss": 0.3068, "step": 8630 }, { "epoch": 1.023479188900747, "grad_norm": 1.0790454260782965, "learning_rate": 4.360376270515151e-05, "loss": 0.256, "step": 8631 }, { "epoch": 1.023597770662872, "grad_norm": 0.9345125406912789, "learning_rate": 4.360215914937924e-05, "loss": 0.2261, "step": 8632 }, { "epoch": 1.023716352424997, "grad_norm": 1.2805315332452034, "learning_rate": 4.360055542211788e-05, "loss": 0.2827, "step": 8633 }, { "epoch": 1.023834934187122, "grad_norm": 1.1373654659518602, "learning_rate": 4.359895152338221e-05, "loss": 0.2656, "step": 8634 }, { "epoch": 1.023953515949247, "grad_norm": 1.0349107668777628, "learning_rate": 4.3597347453187006e-05, "loss": 0.2204, "step": 8635 }, { "epoch": 1.024072097711372, "grad_norm": 1.5066624709931495, "learning_rate": 4.359574321154708e-05, "loss": 0.4631, "step": 8636 }, { "epoch": 1.024190679473497, "grad_norm": 1.4406702036177779, "learning_rate": 4.359413879847719e-05, "loss": 0.2948, "step": 8637 }, { "epoch": 1.024309261235622, "grad_norm": 1.257949406115359, "learning_rate": 4.3592534213992154e-05, "loss": 0.2531, "step": 8638 }, { "epoch": 1.024427842997747, "grad_norm": 1.344263420829829, "learning_rate": 4.359092945810674e-05, "loss": 0.3121, "step": 8639 }, { "epoch": 1.024546424759872, "grad_norm": 1.1011753445772992, "learning_rate": 4.3589324530835763e-05, "loss": 0.2807, "step": 8640 }, { "epoch": 1.0246650065219969, "grad_norm": 1.163955025529501, "learning_rate": 4.358771943219401e-05, "loss": 0.2398, "step": 8641 }, { "epoch": 1.0247835882841219, "grad_norm": 1.1846884286282486, "learning_rate": 4.358611416219627e-05, "loss": 0.276, "step": 8642 }, { "epoch": 1.0249021700462468, "grad_norm": 0.9238875793461886, "learning_rate": 4.358450872085735e-05, "loss": 0.2381, "step": 8643 }, { "epoch": 1.0250207518083718, "grad_norm": 1.0422962886427154, "learning_rate": 4.3582903108192054e-05, "loss": 0.2317, "step": 8644 }, { "epoch": 1.0251393335704968, "grad_norm": 1.6624129115416268, "learning_rate": 4.358129732421518e-05, "loss": 0.2337, "step": 8645 }, { "epoch": 1.0252579153326218, "grad_norm": 1.1133572928251048, "learning_rate": 4.3579691368941525e-05, "loss": 0.2645, "step": 8646 }, { "epoch": 1.0253764970947468, "grad_norm": 1.3500513350857724, "learning_rate": 4.35780852423859e-05, "loss": 0.2658, "step": 8647 }, { "epoch": 1.0254950788568717, "grad_norm": 1.265081611690696, "learning_rate": 4.357647894456312e-05, "loss": 0.2891, "step": 8648 }, { "epoch": 1.0256136606189967, "grad_norm": 1.20832128093361, "learning_rate": 4.3574872475487974e-05, "loss": 0.2433, "step": 8649 }, { "epoch": 1.0257322423811217, "grad_norm": 1.015469979660728, "learning_rate": 4.357326583517528e-05, "loss": 0.2232, "step": 8650 }, { "epoch": 1.0258508241432467, "grad_norm": 1.2756741788228796, "learning_rate": 4.3571659023639866e-05, "loss": 0.2253, "step": 8651 }, { "epoch": 1.0259694059053717, "grad_norm": 1.5126450377075686, "learning_rate": 4.357005204089651e-05, "loss": 0.2861, "step": 8652 }, { "epoch": 1.0260879876674966, "grad_norm": 1.1260895676288503, "learning_rate": 4.356844488696006e-05, "loss": 0.2471, "step": 8653 }, { "epoch": 1.0262065694296216, "grad_norm": 1.375178808750325, "learning_rate": 4.356683756184532e-05, "loss": 0.2569, "step": 8654 }, { "epoch": 1.0263251511917466, "grad_norm": 1.0525314230008287, "learning_rate": 4.3565230065567096e-05, "loss": 0.212, "step": 8655 }, { "epoch": 1.0264437329538716, "grad_norm": 1.2332741800940186, "learning_rate": 4.3563622398140225e-05, "loss": 0.278, "step": 8656 }, { "epoch": 1.0265623147159966, "grad_norm": 1.0165661462476312, "learning_rate": 4.3562014559579515e-05, "loss": 0.1787, "step": 8657 }, { "epoch": 1.0266808964781218, "grad_norm": 1.3748725008699616, "learning_rate": 4.3560406549899793e-05, "loss": 0.2766, "step": 8658 }, { "epoch": 1.0267994782402468, "grad_norm": 1.2954567251766318, "learning_rate": 4.3558798369115886e-05, "loss": 0.2646, "step": 8659 }, { "epoch": 1.0269180600023717, "grad_norm": 1.449890863718068, "learning_rate": 4.355719001724262e-05, "loss": 0.3121, "step": 8660 }, { "epoch": 1.0270366417644967, "grad_norm": 1.16587726818535, "learning_rate": 4.3555581494294806e-05, "loss": 0.2784, "step": 8661 }, { "epoch": 1.0271552235266217, "grad_norm": 1.2940707718686897, "learning_rate": 4.355397280028729e-05, "loss": 0.2863, "step": 8662 }, { "epoch": 1.0272738052887467, "grad_norm": 1.4868700893655498, "learning_rate": 4.355236393523491e-05, "loss": 0.3262, "step": 8663 }, { "epoch": 1.0273923870508717, "grad_norm": 1.0282925553844282, "learning_rate": 4.355075489915247e-05, "loss": 0.2224, "step": 8664 }, { "epoch": 1.0275109688129966, "grad_norm": 1.3438010538190113, "learning_rate": 4.354914569205482e-05, "loss": 0.2806, "step": 8665 }, { "epoch": 1.0276295505751216, "grad_norm": 1.5136575599641917, "learning_rate": 4.35475363139568e-05, "loss": 0.2856, "step": 8666 }, { "epoch": 1.0277481323372466, "grad_norm": 1.2312771558396547, "learning_rate": 4.3545926764873235e-05, "loss": 0.2265, "step": 8667 }, { "epoch": 1.0278667140993716, "grad_norm": 1.554837302441255, "learning_rate": 4.354431704481897e-05, "loss": 0.3886, "step": 8668 }, { "epoch": 1.0279852958614966, "grad_norm": 1.00322057370471, "learning_rate": 4.3542707153808835e-05, "loss": 0.1962, "step": 8669 }, { "epoch": 1.0281038776236215, "grad_norm": 1.2974210375724637, "learning_rate": 4.354109709185769e-05, "loss": 0.3219, "step": 8670 }, { "epoch": 1.0282224593857465, "grad_norm": 1.5103470047795027, "learning_rate": 4.353948685898036e-05, "loss": 0.311, "step": 8671 }, { "epoch": 1.0283410411478715, "grad_norm": 0.9170341863504202, "learning_rate": 4.3537876455191694e-05, "loss": 0.2145, "step": 8672 }, { "epoch": 1.0284596229099965, "grad_norm": 1.098818793018443, "learning_rate": 4.353626588050655e-05, "loss": 0.2459, "step": 8673 }, { "epoch": 1.0285782046721215, "grad_norm": 1.0529108121386994, "learning_rate": 4.3534655134939754e-05, "loss": 0.2663, "step": 8674 }, { "epoch": 1.0286967864342464, "grad_norm": 1.2126640959620578, "learning_rate": 4.353304421850616e-05, "loss": 0.3154, "step": 8675 }, { "epoch": 1.0288153681963714, "grad_norm": 1.3364425863511333, "learning_rate": 4.353143313122064e-05, "loss": 0.2916, "step": 8676 }, { "epoch": 1.0289339499584964, "grad_norm": 1.1624370747883905, "learning_rate": 4.352982187309802e-05, "loss": 0.2097, "step": 8677 }, { "epoch": 1.0290525317206214, "grad_norm": 1.6019569235270579, "learning_rate": 4.352821044415318e-05, "loss": 0.4394, "step": 8678 }, { "epoch": 1.0291711134827464, "grad_norm": 1.561617553273008, "learning_rate": 4.3526598844400944e-05, "loss": 0.3642, "step": 8679 }, { "epoch": 1.0292896952448713, "grad_norm": 1.452169521764443, "learning_rate": 4.3524987073856196e-05, "loss": 0.3088, "step": 8680 }, { "epoch": 1.0294082770069963, "grad_norm": 1.5145734356436107, "learning_rate": 4.3523375132533784e-05, "loss": 0.334, "step": 8681 }, { "epoch": 1.0295268587691213, "grad_norm": 0.8724238163042343, "learning_rate": 4.352176302044856e-05, "loss": 0.2184, "step": 8682 }, { "epoch": 1.0296454405312463, "grad_norm": 1.3311126649851854, "learning_rate": 4.352015073761541e-05, "loss": 0.3208, "step": 8683 }, { "epoch": 1.0297640222933713, "grad_norm": 1.4273368448124097, "learning_rate": 4.351853828404916e-05, "loss": 0.3032, "step": 8684 }, { "epoch": 1.0298826040554963, "grad_norm": 1.0024048730139754, "learning_rate": 4.351692565976471e-05, "loss": 0.2047, "step": 8685 }, { "epoch": 1.0300011858176212, "grad_norm": 1.1413840710263234, "learning_rate": 4.351531286477691e-05, "loss": 0.2301, "step": 8686 }, { "epoch": 1.0301197675797462, "grad_norm": 1.5495909259010694, "learning_rate": 4.3513699899100625e-05, "loss": 0.3071, "step": 8687 }, { "epoch": 1.0302383493418712, "grad_norm": 1.2274409169074116, "learning_rate": 4.351208676275074e-05, "loss": 0.2501, "step": 8688 }, { "epoch": 1.0303569311039962, "grad_norm": 1.1330816233984966, "learning_rate": 4.351047345574211e-05, "loss": 0.2604, "step": 8689 }, { "epoch": 1.0304755128661212, "grad_norm": 1.03018211653985, "learning_rate": 4.350885997808961e-05, "loss": 0.2421, "step": 8690 }, { "epoch": 1.0305940946282461, "grad_norm": 1.4782467227042544, "learning_rate": 4.350724632980812e-05, "loss": 0.2479, "step": 8691 }, { "epoch": 1.0307126763903711, "grad_norm": 1.3529534470570095, "learning_rate": 4.350563251091252e-05, "loss": 0.322, "step": 8692 }, { "epoch": 1.030831258152496, "grad_norm": 1.2563931807512323, "learning_rate": 4.350401852141768e-05, "loss": 0.2842, "step": 8693 }, { "epoch": 1.030949839914621, "grad_norm": 1.799749181714516, "learning_rate": 4.3502404361338465e-05, "loss": 0.4179, "step": 8694 }, { "epoch": 1.031068421676746, "grad_norm": 0.9816304716450948, "learning_rate": 4.350079003068979e-05, "loss": 0.166, "step": 8695 }, { "epoch": 1.031187003438871, "grad_norm": 0.9343594515011191, "learning_rate": 4.349917552948651e-05, "loss": 0.259, "step": 8696 }, { "epoch": 1.031305585200996, "grad_norm": 1.5170821471604519, "learning_rate": 4.349756085774352e-05, "loss": 0.3389, "step": 8697 }, { "epoch": 1.031424166963121, "grad_norm": 1.3630021742951526, "learning_rate": 4.3495946015475696e-05, "loss": 0.2963, "step": 8698 }, { "epoch": 1.031542748725246, "grad_norm": 1.0877449347402883, "learning_rate": 4.349433100269794e-05, "loss": 0.2391, "step": 8699 }, { "epoch": 1.031661330487371, "grad_norm": 1.016996287299599, "learning_rate": 4.349271581942512e-05, "loss": 0.2726, "step": 8700 }, { "epoch": 1.031779912249496, "grad_norm": 0.8723684196893926, "learning_rate": 4.349110046567215e-05, "loss": 0.1678, "step": 8701 }, { "epoch": 1.031898494011621, "grad_norm": 1.0390996424226597, "learning_rate": 4.3489484941453906e-05, "loss": 0.1867, "step": 8702 }, { "epoch": 1.032017075773746, "grad_norm": 1.1726741762108155, "learning_rate": 4.348786924678528e-05, "loss": 0.2271, "step": 8703 }, { "epoch": 1.0321356575358709, "grad_norm": 1.6747149529521428, "learning_rate": 4.3486253381681166e-05, "loss": 0.3035, "step": 8704 }, { "epoch": 1.0322542392979959, "grad_norm": 1.6946372994231809, "learning_rate": 4.3484637346156475e-05, "loss": 0.3954, "step": 8705 }, { "epoch": 1.0323728210601208, "grad_norm": 1.3241743348220776, "learning_rate": 4.348302114022609e-05, "loss": 0.2994, "step": 8706 }, { "epoch": 1.032491402822246, "grad_norm": 1.136246431497065, "learning_rate": 4.348140476390492e-05, "loss": 0.2315, "step": 8707 }, { "epoch": 1.0326099845843708, "grad_norm": 1.2393174987085978, "learning_rate": 4.3479788217207854e-05, "loss": 0.2246, "step": 8708 }, { "epoch": 1.032728566346496, "grad_norm": 1.2274423095927245, "learning_rate": 4.347817150014981e-05, "loss": 0.2129, "step": 8709 }, { "epoch": 1.032847148108621, "grad_norm": 1.3447853875204068, "learning_rate": 4.347655461274568e-05, "loss": 0.2691, "step": 8710 }, { "epoch": 1.032965729870746, "grad_norm": 1.2125514069554881, "learning_rate": 4.347493755501038e-05, "loss": 0.2693, "step": 8711 }, { "epoch": 1.033084311632871, "grad_norm": 0.995722339224991, "learning_rate": 4.34733203269588e-05, "loss": 0.1993, "step": 8712 }, { "epoch": 1.033202893394996, "grad_norm": 1.0156587330666167, "learning_rate": 4.3471702928605864e-05, "loss": 0.2063, "step": 8713 }, { "epoch": 1.033321475157121, "grad_norm": 1.176509565691804, "learning_rate": 4.347008535996648e-05, "loss": 0.2646, "step": 8714 }, { "epoch": 1.033440056919246, "grad_norm": 1.472871713133462, "learning_rate": 4.346846762105556e-05, "loss": 0.2844, "step": 8715 }, { "epoch": 1.0335586386813709, "grad_norm": 0.9656809403492589, "learning_rate": 4.3466849711888017e-05, "loss": 0.2099, "step": 8716 }, { "epoch": 1.0336772204434959, "grad_norm": 1.1951250258776642, "learning_rate": 4.346523163247876e-05, "loss": 0.2382, "step": 8717 }, { "epoch": 1.0337958022056208, "grad_norm": 1.2998572872598482, "learning_rate": 4.3463613382842714e-05, "loss": 0.2655, "step": 8718 }, { "epoch": 1.0339143839677458, "grad_norm": 1.2992851735719566, "learning_rate": 4.346199496299479e-05, "loss": 0.2203, "step": 8719 }, { "epoch": 1.0340329657298708, "grad_norm": 0.9981478588874155, "learning_rate": 4.346037637294992e-05, "loss": 0.1841, "step": 8720 }, { "epoch": 1.0341515474919958, "grad_norm": 1.4126861220404356, "learning_rate": 4.3458757612723014e-05, "loss": 0.289, "step": 8721 }, { "epoch": 1.0342701292541208, "grad_norm": 1.3344594151870173, "learning_rate": 4.3457138682329e-05, "loss": 0.3428, "step": 8722 }, { "epoch": 1.0343887110162457, "grad_norm": 1.3051806547541311, "learning_rate": 4.3455519581782794e-05, "loss": 0.2788, "step": 8723 }, { "epoch": 1.0345072927783707, "grad_norm": 1.0293813660546045, "learning_rate": 4.345390031109934e-05, "loss": 0.2292, "step": 8724 }, { "epoch": 1.0346258745404957, "grad_norm": 0.8618549552234482, "learning_rate": 4.345228087029355e-05, "loss": 0.1766, "step": 8725 }, { "epoch": 1.0347444563026207, "grad_norm": 0.9951411448236145, "learning_rate": 4.3450661259380354e-05, "loss": 0.2155, "step": 8726 }, { "epoch": 1.0348630380647457, "grad_norm": 0.9098048947576263, "learning_rate": 4.344904147837469e-05, "loss": 0.2203, "step": 8727 }, { "epoch": 1.0349816198268706, "grad_norm": 1.0960464828673955, "learning_rate": 4.3447421527291485e-05, "loss": 0.269, "step": 8728 }, { "epoch": 1.0351002015889956, "grad_norm": 1.4080394397051454, "learning_rate": 4.344580140614568e-05, "loss": 0.299, "step": 8729 }, { "epoch": 1.0352187833511206, "grad_norm": 1.2151715038059747, "learning_rate": 4.34441811149522e-05, "loss": 0.2634, "step": 8730 }, { "epoch": 1.0353373651132456, "grad_norm": 1.2135902872442277, "learning_rate": 4.3442560653725995e-05, "loss": 0.2671, "step": 8731 }, { "epoch": 1.0354559468753706, "grad_norm": 1.5541375428365387, "learning_rate": 4.344094002248199e-05, "loss": 0.2784, "step": 8732 }, { "epoch": 1.0355745286374956, "grad_norm": 1.2453193184981581, "learning_rate": 4.3439319221235134e-05, "loss": 0.3015, "step": 8733 }, { "epoch": 1.0356931103996205, "grad_norm": 1.3538594183892736, "learning_rate": 4.3437698250000365e-05, "loss": 0.2545, "step": 8734 }, { "epoch": 1.0358116921617455, "grad_norm": 1.4433193194285805, "learning_rate": 4.343607710879263e-05, "loss": 0.3047, "step": 8735 }, { "epoch": 1.0359302739238705, "grad_norm": 1.1398840434970634, "learning_rate": 4.343445579762687e-05, "loss": 0.2618, "step": 8736 }, { "epoch": 1.0360488556859955, "grad_norm": 1.1998045997009865, "learning_rate": 4.343283431651803e-05, "loss": 0.2572, "step": 8737 }, { "epoch": 1.0361674374481205, "grad_norm": 1.4733235172440011, "learning_rate": 4.343121266548107e-05, "loss": 0.3285, "step": 8738 }, { "epoch": 1.0362860192102454, "grad_norm": 0.8893544149415541, "learning_rate": 4.3429590844530935e-05, "loss": 0.1835, "step": 8739 }, { "epoch": 1.0364046009723704, "grad_norm": 0.8827264664372044, "learning_rate": 4.342796885368256e-05, "loss": 0.2134, "step": 8740 }, { "epoch": 1.0365231827344954, "grad_norm": 1.567845394523652, "learning_rate": 4.342634669295091e-05, "loss": 0.2758, "step": 8741 }, { "epoch": 1.0366417644966204, "grad_norm": 1.406028092529751, "learning_rate": 4.342472436235095e-05, "loss": 0.3393, "step": 8742 }, { "epoch": 1.0367603462587454, "grad_norm": 1.0314139850107558, "learning_rate": 4.342310186189762e-05, "loss": 0.234, "step": 8743 }, { "epoch": 1.0368789280208703, "grad_norm": 1.4233805817855374, "learning_rate": 4.342147919160588e-05, "loss": 0.2794, "step": 8744 }, { "epoch": 1.0369975097829953, "grad_norm": 1.1977579374524407, "learning_rate": 4.3419856351490694e-05, "loss": 0.2372, "step": 8745 }, { "epoch": 1.0371160915451203, "grad_norm": 1.3026654757368779, "learning_rate": 4.341823334156702e-05, "loss": 0.2697, "step": 8746 }, { "epoch": 1.0372346733072453, "grad_norm": 1.1288654667301898, "learning_rate": 4.341661016184982e-05, "loss": 0.2789, "step": 8747 }, { "epoch": 1.0373532550693703, "grad_norm": 0.8463236514993734, "learning_rate": 4.341498681235406e-05, "loss": 0.2149, "step": 8748 }, { "epoch": 1.0374718368314952, "grad_norm": 1.2312868921741293, "learning_rate": 4.34133632930947e-05, "loss": 0.2762, "step": 8749 }, { "epoch": 1.0375904185936202, "grad_norm": 1.308121474171523, "learning_rate": 4.341173960408672e-05, "loss": 0.258, "step": 8750 }, { "epoch": 1.0377090003557452, "grad_norm": 1.6606574225528736, "learning_rate": 4.341011574534507e-05, "loss": 0.3799, "step": 8751 }, { "epoch": 1.0378275821178702, "grad_norm": 1.0812082247667596, "learning_rate": 4.340849171688473e-05, "loss": 0.2239, "step": 8752 }, { "epoch": 1.0379461638799952, "grad_norm": 0.964034702374784, "learning_rate": 4.3406867518720675e-05, "loss": 0.2049, "step": 8753 }, { "epoch": 1.0380647456421201, "grad_norm": 1.714727204527804, "learning_rate": 4.3405243150867864e-05, "loss": 0.3433, "step": 8754 }, { "epoch": 1.0381833274042451, "grad_norm": 1.2635026607600452, "learning_rate": 4.340361861334129e-05, "loss": 0.2785, "step": 8755 }, { "epoch": 1.03830190916637, "grad_norm": 1.2343559142942158, "learning_rate": 4.340199390615591e-05, "loss": 0.2403, "step": 8756 }, { "epoch": 1.038420490928495, "grad_norm": 1.2816855849696513, "learning_rate": 4.340036902932673e-05, "loss": 0.2704, "step": 8757 }, { "epoch": 1.0385390726906203, "grad_norm": 0.987885643020447, "learning_rate": 4.339874398286869e-05, "loss": 0.2086, "step": 8758 }, { "epoch": 1.0386576544527453, "grad_norm": 1.527245303444485, "learning_rate": 4.339711876679681e-05, "loss": 0.2944, "step": 8759 }, { "epoch": 1.0387762362148703, "grad_norm": 1.4074646608389771, "learning_rate": 4.339549338112605e-05, "loss": 0.359, "step": 8760 }, { "epoch": 1.0388948179769952, "grad_norm": 1.045346746673983, "learning_rate": 4.3393867825871395e-05, "loss": 0.2346, "step": 8761 }, { "epoch": 1.0390133997391202, "grad_norm": 1.15375939389964, "learning_rate": 4.3392242101047835e-05, "loss": 0.2454, "step": 8762 }, { "epoch": 1.0391319815012452, "grad_norm": 1.1677095748083433, "learning_rate": 4.339061620667036e-05, "loss": 0.2468, "step": 8763 }, { "epoch": 1.0392505632633702, "grad_norm": 1.2720548306530906, "learning_rate": 4.338899014275395e-05, "loss": 0.2608, "step": 8764 }, { "epoch": 1.0393691450254952, "grad_norm": 1.246176241739673, "learning_rate": 4.33873639093136e-05, "loss": 0.2598, "step": 8765 }, { "epoch": 1.0394877267876201, "grad_norm": 1.1726450025804496, "learning_rate": 4.338573750636432e-05, "loss": 0.2873, "step": 8766 }, { "epoch": 1.0396063085497451, "grad_norm": 1.3884145795692522, "learning_rate": 4.3384110933921076e-05, "loss": 0.3105, "step": 8767 }, { "epoch": 1.03972489031187, "grad_norm": 1.160071445463676, "learning_rate": 4.3382484191998865e-05, "loss": 0.2186, "step": 8768 }, { "epoch": 1.039843472073995, "grad_norm": 1.3139286227333213, "learning_rate": 4.3380857280612704e-05, "loss": 0.2862, "step": 8769 }, { "epoch": 1.03996205383612, "grad_norm": 1.0447294964010192, "learning_rate": 4.337923019977757e-05, "loss": 0.1789, "step": 8770 }, { "epoch": 1.040080635598245, "grad_norm": 2.2519352602999994, "learning_rate": 4.337760294950848e-05, "loss": 0.3521, "step": 8771 }, { "epoch": 1.04019921736037, "grad_norm": 1.0381403781013068, "learning_rate": 4.3375975529820414e-05, "loss": 0.2436, "step": 8772 }, { "epoch": 1.040317799122495, "grad_norm": 1.0012960772412955, "learning_rate": 4.33743479407284e-05, "loss": 0.2095, "step": 8773 }, { "epoch": 1.04043638088462, "grad_norm": 1.520478511185997, "learning_rate": 4.3372720182247426e-05, "loss": 0.2404, "step": 8774 }, { "epoch": 1.040554962646745, "grad_norm": 1.0465651929800464, "learning_rate": 4.33710922543925e-05, "loss": 0.1991, "step": 8775 }, { "epoch": 1.04067354440887, "grad_norm": 0.8151450244121861, "learning_rate": 4.336946415717864e-05, "loss": 0.1673, "step": 8776 }, { "epoch": 1.040792126170995, "grad_norm": 1.0194437005826082, "learning_rate": 4.336783589062083e-05, "loss": 0.2326, "step": 8777 }, { "epoch": 1.04091070793312, "grad_norm": 1.0662929685560938, "learning_rate": 4.3366207454734114e-05, "loss": 0.2142, "step": 8778 }, { "epoch": 1.0410292896952449, "grad_norm": 1.231154637162679, "learning_rate": 4.336457884953348e-05, "loss": 0.2079, "step": 8779 }, { "epoch": 1.0411478714573699, "grad_norm": 0.9266943633340246, "learning_rate": 4.336295007503395e-05, "loss": 0.1922, "step": 8780 }, { "epoch": 1.0412664532194948, "grad_norm": 1.024038553262166, "learning_rate": 4.3361321131250545e-05, "loss": 0.2087, "step": 8781 }, { "epoch": 1.0413850349816198, "grad_norm": 1.3326645804226134, "learning_rate": 4.335969201819826e-05, "loss": 0.2493, "step": 8782 }, { "epoch": 1.0415036167437448, "grad_norm": 1.0418875404568255, "learning_rate": 4.335806273589214e-05, "loss": 0.1913, "step": 8783 }, { "epoch": 1.0416221985058698, "grad_norm": 1.3346171624502143, "learning_rate": 4.335643328434719e-05, "loss": 0.2546, "step": 8784 }, { "epoch": 1.0417407802679948, "grad_norm": 1.328033259233724, "learning_rate": 4.3354803663578433e-05, "loss": 0.2373, "step": 8785 }, { "epoch": 1.0418593620301198, "grad_norm": 1.121354638130676, "learning_rate": 4.3353173873600904e-05, "loss": 0.2081, "step": 8786 }, { "epoch": 1.0419779437922447, "grad_norm": 1.1537376938316042, "learning_rate": 4.3351543914429615e-05, "loss": 0.2208, "step": 8787 }, { "epoch": 1.0420965255543697, "grad_norm": 0.8725607853417957, "learning_rate": 4.334991378607959e-05, "loss": 0.1884, "step": 8788 }, { "epoch": 1.0422151073164947, "grad_norm": 1.342452993715864, "learning_rate": 4.3348283488565866e-05, "loss": 0.2047, "step": 8789 }, { "epoch": 1.0423336890786197, "grad_norm": 0.9416868486520811, "learning_rate": 4.334665302190346e-05, "loss": 0.2002, "step": 8790 }, { "epoch": 1.0424522708407447, "grad_norm": 1.2038602348958505, "learning_rate": 4.3345022386107426e-05, "loss": 0.2792, "step": 8791 }, { "epoch": 1.0425708526028696, "grad_norm": 1.437030084870201, "learning_rate": 4.334339158119277e-05, "loss": 0.2944, "step": 8792 }, { "epoch": 1.0426894343649946, "grad_norm": 1.0618987990180877, "learning_rate": 4.3341760607174544e-05, "loss": 0.1933, "step": 8793 }, { "epoch": 1.0428080161271196, "grad_norm": 1.4106723649317154, "learning_rate": 4.334012946406777e-05, "loss": 0.3085, "step": 8794 }, { "epoch": 1.0429265978892446, "grad_norm": 2.4255884991628043, "learning_rate": 4.3338498151887495e-05, "loss": 0.4899, "step": 8795 }, { "epoch": 1.0430451796513696, "grad_norm": 1.2560243748426505, "learning_rate": 4.333686667064876e-05, "loss": 0.3121, "step": 8796 }, { "epoch": 1.0431637614134945, "grad_norm": 1.4073067198024598, "learning_rate": 4.3335235020366596e-05, "loss": 0.3117, "step": 8797 }, { "epoch": 1.0432823431756195, "grad_norm": 1.2844028885794645, "learning_rate": 4.333360320105605e-05, "loss": 0.2589, "step": 8798 }, { "epoch": 1.0434009249377445, "grad_norm": 1.2192694668117827, "learning_rate": 4.333197121273217e-05, "loss": 0.2239, "step": 8799 }, { "epoch": 1.0435195066998695, "grad_norm": 0.9557466153046051, "learning_rate": 4.333033905540999e-05, "loss": 0.2309, "step": 8800 }, { "epoch": 1.0436380884619945, "grad_norm": 1.2515369872502398, "learning_rate": 4.332870672910456e-05, "loss": 0.2328, "step": 8801 }, { "epoch": 1.0437566702241194, "grad_norm": 1.0751337574664546, "learning_rate": 4.3327074233830936e-05, "loss": 0.2434, "step": 8802 }, { "epoch": 1.0438752519862444, "grad_norm": 1.1733544926862942, "learning_rate": 4.332544156960416e-05, "loss": 0.2431, "step": 8803 }, { "epoch": 1.0439938337483694, "grad_norm": 1.603492290251434, "learning_rate": 4.3323808736439286e-05, "loss": 0.3572, "step": 8804 }, { "epoch": 1.0441124155104944, "grad_norm": 1.5965113525621177, "learning_rate": 4.332217573435137e-05, "loss": 0.3333, "step": 8805 }, { "epoch": 1.0442309972726194, "grad_norm": 1.2903221023860152, "learning_rate": 4.332054256335545e-05, "loss": 0.2308, "step": 8806 }, { "epoch": 1.0443495790347446, "grad_norm": 1.067358006662811, "learning_rate": 4.33189092234666e-05, "loss": 0.2849, "step": 8807 }, { "epoch": 1.0444681607968693, "grad_norm": 1.2049656230679275, "learning_rate": 4.331727571469987e-05, "loss": 0.2301, "step": 8808 }, { "epoch": 1.0445867425589945, "grad_norm": 1.3029317489105614, "learning_rate": 4.331564203707032e-05, "loss": 0.2835, "step": 8809 }, { "epoch": 1.0447053243211195, "grad_norm": 1.1872153267628787, "learning_rate": 4.331400819059301e-05, "loss": 0.2829, "step": 8810 }, { "epoch": 1.0448239060832445, "grad_norm": 1.075999343223569, "learning_rate": 4.3312374175283004e-05, "loss": 0.2398, "step": 8811 }, { "epoch": 1.0449424878453695, "grad_norm": 0.9417120065820003, "learning_rate": 4.3310739991155365e-05, "loss": 0.2485, "step": 8812 }, { "epoch": 1.0450610696074945, "grad_norm": 0.9161381571324625, "learning_rate": 4.330910563822516e-05, "loss": 0.2055, "step": 8813 }, { "epoch": 1.0451796513696194, "grad_norm": 1.0111974834244464, "learning_rate": 4.330747111650744e-05, "loss": 0.2435, "step": 8814 }, { "epoch": 1.0452982331317444, "grad_norm": 0.9821113880939647, "learning_rate": 4.33058364260173e-05, "loss": 0.2338, "step": 8815 }, { "epoch": 1.0454168148938694, "grad_norm": 1.24870532998228, "learning_rate": 4.330420156676979e-05, "loss": 0.2424, "step": 8816 }, { "epoch": 1.0455353966559944, "grad_norm": 1.363187185728759, "learning_rate": 4.330256653877999e-05, "loss": 0.2645, "step": 8817 }, { "epoch": 1.0456539784181194, "grad_norm": 1.061073696883084, "learning_rate": 4.330093134206297e-05, "loss": 0.2196, "step": 8818 }, { "epoch": 1.0457725601802443, "grad_norm": 1.1266140754123848, "learning_rate": 4.32992959766338e-05, "loss": 0.2903, "step": 8819 }, { "epoch": 1.0458911419423693, "grad_norm": 1.1667155412209742, "learning_rate": 4.329766044250757e-05, "loss": 0.2576, "step": 8820 }, { "epoch": 1.0460097237044943, "grad_norm": 1.288270297229462, "learning_rate": 4.329602473969935e-05, "loss": 0.2887, "step": 8821 }, { "epoch": 1.0461283054666193, "grad_norm": 1.2296189744952115, "learning_rate": 4.329438886822421e-05, "loss": 0.26, "step": 8822 }, { "epoch": 1.0462468872287443, "grad_norm": 1.3036444824644098, "learning_rate": 4.3292752828097236e-05, "loss": 0.2429, "step": 8823 }, { "epoch": 1.0463654689908692, "grad_norm": 1.1042970330903388, "learning_rate": 4.329111661933353e-05, "loss": 0.2531, "step": 8824 }, { "epoch": 1.0464840507529942, "grad_norm": 1.1349324656388533, "learning_rate": 4.328948024194814e-05, "loss": 0.2451, "step": 8825 }, { "epoch": 1.0466026325151192, "grad_norm": 0.8544340971900954, "learning_rate": 4.3287843695956185e-05, "loss": 0.1866, "step": 8826 }, { "epoch": 1.0467212142772442, "grad_norm": 1.0537107714389413, "learning_rate": 4.328620698137272e-05, "loss": 0.2267, "step": 8827 }, { "epoch": 1.0468397960393692, "grad_norm": 1.8128610367506484, "learning_rate": 4.328457009821286e-05, "loss": 0.3897, "step": 8828 }, { "epoch": 1.0469583778014941, "grad_norm": 1.2589400105622177, "learning_rate": 4.328293304649169e-05, "loss": 0.2679, "step": 8829 }, { "epoch": 1.0470769595636191, "grad_norm": 1.2807313444723736, "learning_rate": 4.32812958262243e-05, "loss": 0.2581, "step": 8830 }, { "epoch": 1.047195541325744, "grad_norm": 1.1296185928476177, "learning_rate": 4.3279658437425775e-05, "loss": 0.2015, "step": 8831 }, { "epoch": 1.047314123087869, "grad_norm": 1.4441841270194349, "learning_rate": 4.327802088011121e-05, "loss": 0.2837, "step": 8832 }, { "epoch": 1.047432704849994, "grad_norm": 1.150429917494715, "learning_rate": 4.327638315429571e-05, "loss": 0.2888, "step": 8833 }, { "epoch": 1.047551286612119, "grad_norm": 1.400317009237685, "learning_rate": 4.327474525999437e-05, "loss": 0.3091, "step": 8834 }, { "epoch": 1.047669868374244, "grad_norm": 1.3868193789651517, "learning_rate": 4.327310719722229e-05, "loss": 0.3058, "step": 8835 }, { "epoch": 1.047788450136369, "grad_norm": 1.6883445466841218, "learning_rate": 4.3271468965994564e-05, "loss": 0.2998, "step": 8836 }, { "epoch": 1.047907031898494, "grad_norm": 1.2884518325711596, "learning_rate": 4.326983056632631e-05, "loss": 0.2696, "step": 8837 }, { "epoch": 1.048025613660619, "grad_norm": 1.2804090394545162, "learning_rate": 4.326819199823261e-05, "loss": 0.2649, "step": 8838 }, { "epoch": 1.048144195422744, "grad_norm": 1.3449961276881859, "learning_rate": 4.3266553261728595e-05, "loss": 0.2686, "step": 8839 }, { "epoch": 1.048262777184869, "grad_norm": 1.2266141106712674, "learning_rate": 4.326491435682936e-05, "loss": 0.2885, "step": 8840 }, { "epoch": 1.048381358946994, "grad_norm": 1.3522164905710596, "learning_rate": 4.326327528355e-05, "loss": 0.3456, "step": 8841 }, { "epoch": 1.048499940709119, "grad_norm": 1.2234743528214456, "learning_rate": 4.326163604190564e-05, "loss": 0.2582, "step": 8842 }, { "epoch": 1.0486185224712439, "grad_norm": 0.881793714689452, "learning_rate": 4.325999663191139e-05, "loss": 0.2151, "step": 8843 }, { "epoch": 1.0487371042333689, "grad_norm": 1.0167903365917756, "learning_rate": 4.3258357053582364e-05, "loss": 0.2123, "step": 8844 }, { "epoch": 1.0488556859954938, "grad_norm": 1.0141325216494252, "learning_rate": 4.325671730693368e-05, "loss": 0.2233, "step": 8845 }, { "epoch": 1.0489742677576188, "grad_norm": 1.1507496767609884, "learning_rate": 4.325507739198045e-05, "loss": 0.2524, "step": 8846 }, { "epoch": 1.0490928495197438, "grad_norm": 1.1619724040131971, "learning_rate": 4.3253437308737786e-05, "loss": 0.2489, "step": 8847 }, { "epoch": 1.0492114312818688, "grad_norm": 1.5593631672506056, "learning_rate": 4.325179705722082e-05, "loss": 0.3491, "step": 8848 }, { "epoch": 1.0493300130439938, "grad_norm": 1.3662738111680175, "learning_rate": 4.325015663744466e-05, "loss": 0.2712, "step": 8849 }, { "epoch": 1.0494485948061187, "grad_norm": 1.11739699865356, "learning_rate": 4.324851604942445e-05, "loss": 0.2472, "step": 8850 }, { "epoch": 1.0495671765682437, "grad_norm": 0.8798053927196526, "learning_rate": 4.324687529317528e-05, "loss": 0.1874, "step": 8851 }, { "epoch": 1.0496857583303687, "grad_norm": 1.4305799800818872, "learning_rate": 4.324523436871231e-05, "loss": 0.2831, "step": 8852 }, { "epoch": 1.0498043400924937, "grad_norm": 1.454677101607734, "learning_rate": 4.324359327605065e-05, "loss": 0.3216, "step": 8853 }, { "epoch": 1.0499229218546187, "grad_norm": 1.4454906560852592, "learning_rate": 4.3241952015205426e-05, "loss": 0.2819, "step": 8854 }, { "epoch": 1.0500415036167436, "grad_norm": 1.2015396646799459, "learning_rate": 4.324031058619178e-05, "loss": 0.2573, "step": 8855 }, { "epoch": 1.0501600853788686, "grad_norm": 0.9736835330095573, "learning_rate": 4.323866898902483e-05, "loss": 0.1928, "step": 8856 }, { "epoch": 1.0502786671409936, "grad_norm": 1.031610897540319, "learning_rate": 4.323702722371973e-05, "loss": 0.2458, "step": 8857 }, { "epoch": 1.0503972489031188, "grad_norm": 1.0462355883352463, "learning_rate": 4.3235385290291596e-05, "loss": 0.2368, "step": 8858 }, { "epoch": 1.0505158306652438, "grad_norm": 1.1710986055152672, "learning_rate": 4.323374318875557e-05, "loss": 0.2579, "step": 8859 }, { "epoch": 1.0506344124273688, "grad_norm": 1.066640261811951, "learning_rate": 4.32321009191268e-05, "loss": 0.2307, "step": 8860 }, { "epoch": 1.0507529941894937, "grad_norm": 1.0752604307279687, "learning_rate": 4.323045848142041e-05, "loss": 0.2504, "step": 8861 }, { "epoch": 1.0508715759516187, "grad_norm": 1.1638489414006856, "learning_rate": 4.322881587565155e-05, "loss": 0.1969, "step": 8862 }, { "epoch": 1.0509901577137437, "grad_norm": 1.3483171204363438, "learning_rate": 4.322717310183536e-05, "loss": 0.2722, "step": 8863 }, { "epoch": 1.0511087394758687, "grad_norm": 1.270579955871703, "learning_rate": 4.3225530159986984e-05, "loss": 0.2379, "step": 8864 }, { "epoch": 1.0512273212379937, "grad_norm": 1.4308481473300818, "learning_rate": 4.322388705012157e-05, "loss": 0.3347, "step": 8865 }, { "epoch": 1.0513459030001187, "grad_norm": 1.2329590279852176, "learning_rate": 4.322224377225427e-05, "loss": 0.2808, "step": 8866 }, { "epoch": 1.0514644847622436, "grad_norm": 1.6365087366627815, "learning_rate": 4.322060032640022e-05, "loss": 0.3423, "step": 8867 }, { "epoch": 1.0515830665243686, "grad_norm": 1.393620158262112, "learning_rate": 4.3218956712574585e-05, "loss": 0.3367, "step": 8868 }, { "epoch": 1.0517016482864936, "grad_norm": 1.1915877328615943, "learning_rate": 4.321731293079251e-05, "loss": 0.2407, "step": 8869 }, { "epoch": 1.0518202300486186, "grad_norm": 0.9902889047417804, "learning_rate": 4.3215668981069155e-05, "loss": 0.1838, "step": 8870 }, { "epoch": 1.0519388118107436, "grad_norm": 1.3464064652896202, "learning_rate": 4.3214024863419656e-05, "loss": 0.2616, "step": 8871 }, { "epoch": 1.0520573935728685, "grad_norm": 1.072765244046311, "learning_rate": 4.321238057785919e-05, "loss": 0.2378, "step": 8872 }, { "epoch": 1.0521759753349935, "grad_norm": 1.1827099644413859, "learning_rate": 4.3210736124402904e-05, "loss": 0.2382, "step": 8873 }, { "epoch": 1.0522945570971185, "grad_norm": 1.5199985647204575, "learning_rate": 4.3209091503065974e-05, "loss": 0.3294, "step": 8874 }, { "epoch": 1.0524131388592435, "grad_norm": 1.784573543790307, "learning_rate": 4.320744671386354e-05, "loss": 0.441, "step": 8875 }, { "epoch": 1.0525317206213685, "grad_norm": 1.4740787580854022, "learning_rate": 4.320580175681078e-05, "loss": 0.3039, "step": 8876 }, { "epoch": 1.0526503023834934, "grad_norm": 0.9469816093936522, "learning_rate": 4.3204156631922854e-05, "loss": 0.2525, "step": 8877 }, { "epoch": 1.0527688841456184, "grad_norm": 0.8960987478180966, "learning_rate": 4.3202511339214925e-05, "loss": 0.1686, "step": 8878 }, { "epoch": 1.0528874659077434, "grad_norm": 1.0265006529984584, "learning_rate": 4.320086587870216e-05, "loss": 0.1584, "step": 8879 }, { "epoch": 1.0530060476698684, "grad_norm": 1.1098375818949238, "learning_rate": 4.319922025039973e-05, "loss": 0.2569, "step": 8880 }, { "epoch": 1.0531246294319934, "grad_norm": 1.1059638779722991, "learning_rate": 4.319757445432281e-05, "loss": 0.2536, "step": 8881 }, { "epoch": 1.0532432111941183, "grad_norm": 1.720020472988096, "learning_rate": 4.319592849048657e-05, "loss": 0.3193, "step": 8882 }, { "epoch": 1.0533617929562433, "grad_norm": 0.8792292719247136, "learning_rate": 4.319428235890618e-05, "loss": 0.1824, "step": 8883 }, { "epoch": 1.0534803747183683, "grad_norm": 0.836266542622384, "learning_rate": 4.319263605959683e-05, "loss": 0.1576, "step": 8884 }, { "epoch": 1.0535989564804933, "grad_norm": 1.111317085103575, "learning_rate": 4.319098959257366e-05, "loss": 0.281, "step": 8885 }, { "epoch": 1.0537175382426183, "grad_norm": 1.181459652873555, "learning_rate": 4.318934295785189e-05, "loss": 0.2215, "step": 8886 }, { "epoch": 1.0538361200047432, "grad_norm": 1.1938403572142715, "learning_rate": 4.318769615544668e-05, "loss": 0.2211, "step": 8887 }, { "epoch": 1.0539547017668682, "grad_norm": 1.1544891816711473, "learning_rate": 4.318604918537321e-05, "loss": 0.1942, "step": 8888 }, { "epoch": 1.0540732835289932, "grad_norm": 1.054571048726214, "learning_rate": 4.318440204764668e-05, "loss": 0.2406, "step": 8889 }, { "epoch": 1.0541918652911182, "grad_norm": 1.1860359193342804, "learning_rate": 4.3182754742282256e-05, "loss": 0.2804, "step": 8890 }, { "epoch": 1.0543104470532432, "grad_norm": 1.2779103613385185, "learning_rate": 4.318110726929513e-05, "loss": 0.2807, "step": 8891 }, { "epoch": 1.0544290288153682, "grad_norm": 0.9972226531094392, "learning_rate": 4.3179459628700485e-05, "loss": 0.1994, "step": 8892 }, { "epoch": 1.0545476105774931, "grad_norm": 1.4029352478770503, "learning_rate": 4.317781182051353e-05, "loss": 0.2824, "step": 8893 }, { "epoch": 1.0546661923396181, "grad_norm": 1.4149659892357525, "learning_rate": 4.317616384474942e-05, "loss": 0.2862, "step": 8894 }, { "epoch": 1.054784774101743, "grad_norm": 1.2544310738692328, "learning_rate": 4.317451570142338e-05, "loss": 0.2375, "step": 8895 }, { "epoch": 1.054903355863868, "grad_norm": 1.1436078162441063, "learning_rate": 4.31728673905506e-05, "loss": 0.2226, "step": 8896 }, { "epoch": 1.055021937625993, "grad_norm": 1.4329395612525961, "learning_rate": 4.3171218912146254e-05, "loss": 0.3414, "step": 8897 }, { "epoch": 1.055140519388118, "grad_norm": 1.259362573078985, "learning_rate": 4.316957026622556e-05, "loss": 0.2341, "step": 8898 }, { "epoch": 1.055259101150243, "grad_norm": 1.2978788058913768, "learning_rate": 4.31679214528037e-05, "loss": 0.2884, "step": 8899 }, { "epoch": 1.055377682912368, "grad_norm": 1.0885381186198704, "learning_rate": 4.3166272471895896e-05, "loss": 0.2235, "step": 8900 }, { "epoch": 1.055496264674493, "grad_norm": 1.316034831765526, "learning_rate": 4.3164623323517336e-05, "loss": 0.3118, "step": 8901 }, { "epoch": 1.055614846436618, "grad_norm": 1.4534453300777241, "learning_rate": 4.316297400768321e-05, "loss": 0.2875, "step": 8902 }, { "epoch": 1.055733428198743, "grad_norm": 1.2913230911341091, "learning_rate": 4.316132452440875e-05, "loss": 0.2314, "step": 8903 }, { "epoch": 1.055852009960868, "grad_norm": 1.0389501436781414, "learning_rate": 4.315967487370915e-05, "loss": 0.1906, "step": 8904 }, { "epoch": 1.055970591722993, "grad_norm": 1.2412164804531642, "learning_rate": 4.3158025055599606e-05, "loss": 0.2636, "step": 8905 }, { "epoch": 1.0560891734851179, "grad_norm": 1.4873595366288763, "learning_rate": 4.315637507009535e-05, "loss": 0.3961, "step": 8906 }, { "epoch": 1.056207755247243, "grad_norm": 1.123406053570726, "learning_rate": 4.3154724917211566e-05, "loss": 0.2129, "step": 8907 }, { "epoch": 1.056326337009368, "grad_norm": 1.5617426636062754, "learning_rate": 4.3153074596963486e-05, "loss": 0.3703, "step": 8908 }, { "epoch": 1.056444918771493, "grad_norm": 1.438374245803892, "learning_rate": 4.315142410936632e-05, "loss": 0.3027, "step": 8909 }, { "epoch": 1.056563500533618, "grad_norm": 1.1159150963963393, "learning_rate": 4.3149773454435295e-05, "loss": 0.1945, "step": 8910 }, { "epoch": 1.056682082295743, "grad_norm": 1.343367968291009, "learning_rate": 4.31481226321856e-05, "loss": 0.3615, "step": 8911 }, { "epoch": 1.056800664057868, "grad_norm": 1.1729068056748642, "learning_rate": 4.314647164263247e-05, "loss": 0.2827, "step": 8912 }, { "epoch": 1.056919245819993, "grad_norm": 1.6603533184132033, "learning_rate": 4.3144820485791124e-05, "loss": 0.339, "step": 8913 }, { "epoch": 1.057037827582118, "grad_norm": 0.9710939359691912, "learning_rate": 4.314316916167679e-05, "loss": 0.2192, "step": 8914 }, { "epoch": 1.057156409344243, "grad_norm": 1.0978209726910404, "learning_rate": 4.3141517670304686e-05, "loss": 0.2285, "step": 8915 }, { "epoch": 1.057274991106368, "grad_norm": 1.0412556571995824, "learning_rate": 4.3139866011690034e-05, "loss": 0.2728, "step": 8916 }, { "epoch": 1.057393572868493, "grad_norm": 1.2514926894578777, "learning_rate": 4.313821418584806e-05, "loss": 0.3034, "step": 8917 }, { "epoch": 1.0575121546306179, "grad_norm": 1.079763254399404, "learning_rate": 4.3136562192793995e-05, "loss": 0.2283, "step": 8918 }, { "epoch": 1.0576307363927429, "grad_norm": 1.1644936700450295, "learning_rate": 4.313491003254306e-05, "loss": 0.2714, "step": 8919 }, { "epoch": 1.0577493181548678, "grad_norm": 1.4955168034119999, "learning_rate": 4.31332577051105e-05, "loss": 0.2799, "step": 8920 }, { "epoch": 1.0578678999169928, "grad_norm": 1.4482151049037677, "learning_rate": 4.313160521051154e-05, "loss": 0.3154, "step": 8921 }, { "epoch": 1.0579864816791178, "grad_norm": 0.9561902265401493, "learning_rate": 4.312995254876141e-05, "loss": 0.2089, "step": 8922 }, { "epoch": 1.0581050634412428, "grad_norm": 1.2550428432264324, "learning_rate": 4.312829971987536e-05, "loss": 0.2357, "step": 8923 }, { "epoch": 1.0582236452033678, "grad_norm": 1.1274592034914908, "learning_rate": 4.312664672386861e-05, "loss": 0.2621, "step": 8924 }, { "epoch": 1.0583422269654927, "grad_norm": 1.0554154600257084, "learning_rate": 4.31249935607564e-05, "loss": 0.2087, "step": 8925 }, { "epoch": 1.0584608087276177, "grad_norm": 1.3625757716577287, "learning_rate": 4.312334023055399e-05, "loss": 0.3026, "step": 8926 }, { "epoch": 1.0585793904897427, "grad_norm": 1.296293967206312, "learning_rate": 4.31216867332766e-05, "loss": 0.2705, "step": 8927 }, { "epoch": 1.0586979722518677, "grad_norm": 1.2736016172555966, "learning_rate": 4.3120033068939474e-05, "loss": 0.3028, "step": 8928 }, { "epoch": 1.0588165540139927, "grad_norm": 1.1639038129477708, "learning_rate": 4.311837923755787e-05, "loss": 0.255, "step": 8929 }, { "epoch": 1.0589351357761176, "grad_norm": 1.0060205868278775, "learning_rate": 4.311672523914703e-05, "loss": 0.214, "step": 8930 }, { "epoch": 1.0590537175382426, "grad_norm": 0.8776638959645615, "learning_rate": 4.311507107372219e-05, "loss": 0.2223, "step": 8931 }, { "epoch": 1.0591722993003676, "grad_norm": 1.0564195305153439, "learning_rate": 4.311341674129862e-05, "loss": 0.2338, "step": 8932 }, { "epoch": 1.0592908810624926, "grad_norm": 1.1491375042693968, "learning_rate": 4.311176224189155e-05, "loss": 0.204, "step": 8933 }, { "epoch": 1.0594094628246176, "grad_norm": 1.720202087651263, "learning_rate": 4.311010757551625e-05, "loss": 0.4948, "step": 8934 }, { "epoch": 1.0595280445867425, "grad_norm": 1.0621428950579845, "learning_rate": 4.3108452742187966e-05, "loss": 0.2183, "step": 8935 }, { "epoch": 1.0596466263488675, "grad_norm": 1.1162439123203791, "learning_rate": 4.310679774192195e-05, "loss": 0.2368, "step": 8936 }, { "epoch": 1.0597652081109925, "grad_norm": 0.7739099401719993, "learning_rate": 4.310514257473347e-05, "loss": 0.1811, "step": 8937 }, { "epoch": 1.0598837898731175, "grad_norm": 1.066524205596646, "learning_rate": 4.310348724063776e-05, "loss": 0.2337, "step": 8938 }, { "epoch": 1.0600023716352425, "grad_norm": 1.4653809793886634, "learning_rate": 4.3101831739650114e-05, "loss": 0.3493, "step": 8939 }, { "epoch": 1.0601209533973674, "grad_norm": 1.5810304447391754, "learning_rate": 4.310017607178578e-05, "loss": 0.3119, "step": 8940 }, { "epoch": 1.0602395351594924, "grad_norm": 1.1318451697779535, "learning_rate": 4.309852023706e-05, "loss": 0.2284, "step": 8941 }, { "epoch": 1.0603581169216174, "grad_norm": 0.9160378736782959, "learning_rate": 4.3096864235488074e-05, "loss": 0.2057, "step": 8942 }, { "epoch": 1.0604766986837424, "grad_norm": 1.15808341029762, "learning_rate": 4.309520806708525e-05, "loss": 0.3078, "step": 8943 }, { "epoch": 1.0605952804458674, "grad_norm": 1.3413889035162123, "learning_rate": 4.3093551731866786e-05, "loss": 0.286, "step": 8944 }, { "epoch": 1.0607138622079924, "grad_norm": 0.7965623356903778, "learning_rate": 4.3091895229847973e-05, "loss": 0.1911, "step": 8945 }, { "epoch": 1.0608324439701173, "grad_norm": 1.3946552556219765, "learning_rate": 4.309023856104407e-05, "loss": 0.357, "step": 8946 }, { "epoch": 1.0609510257322423, "grad_norm": 1.1183816706160334, "learning_rate": 4.3088581725470344e-05, "loss": 0.2413, "step": 8947 }, { "epoch": 1.0610696074943673, "grad_norm": 1.3686165939414234, "learning_rate": 4.308692472314209e-05, "loss": 0.2337, "step": 8948 }, { "epoch": 1.0611881892564923, "grad_norm": 0.9217715080278231, "learning_rate": 4.308526755407456e-05, "loss": 0.2478, "step": 8949 }, { "epoch": 1.0613067710186173, "grad_norm": 1.3718073212395634, "learning_rate": 4.308361021828304e-05, "loss": 0.2803, "step": 8950 }, { "epoch": 1.0614253527807422, "grad_norm": 1.229293830563055, "learning_rate": 4.3081952715782814e-05, "loss": 0.2515, "step": 8951 }, { "epoch": 1.0615439345428672, "grad_norm": 0.9984651829924758, "learning_rate": 4.308029504658915e-05, "loss": 0.287, "step": 8952 }, { "epoch": 1.0616625163049922, "grad_norm": 1.119880996453459, "learning_rate": 4.307863721071734e-05, "loss": 0.2202, "step": 8953 }, { "epoch": 1.0617810980671172, "grad_norm": 1.2831647967365851, "learning_rate": 4.307697920818268e-05, "loss": 0.221, "step": 8954 }, { "epoch": 1.0618996798292422, "grad_norm": 1.1470940780362604, "learning_rate": 4.307532103900042e-05, "loss": 0.2104, "step": 8955 }, { "epoch": 1.0620182615913671, "grad_norm": 1.1047220487826643, "learning_rate": 4.307366270318587e-05, "loss": 0.1999, "step": 8956 }, { "epoch": 1.0621368433534921, "grad_norm": 1.1045817095076218, "learning_rate": 4.307200420075431e-05, "loss": 0.2959, "step": 8957 }, { "epoch": 1.0622554251156173, "grad_norm": 1.2946283968846024, "learning_rate": 4.3070345531721046e-05, "loss": 0.26, "step": 8958 }, { "epoch": 1.0623740068777423, "grad_norm": 1.0736141605270482, "learning_rate": 4.3068686696101344e-05, "loss": 0.2717, "step": 8959 }, { "epoch": 1.0624925886398673, "grad_norm": 0.8447187597186173, "learning_rate": 4.306702769391051e-05, "loss": 0.1633, "step": 8960 }, { "epoch": 1.0626111704019923, "grad_norm": 1.2851040672241327, "learning_rate": 4.306536852516384e-05, "loss": 0.2848, "step": 8961 }, { "epoch": 1.0627297521641172, "grad_norm": 1.3266376397554454, "learning_rate": 4.306370918987662e-05, "loss": 0.2524, "step": 8962 }, { "epoch": 1.0628483339262422, "grad_norm": 1.3851189973140525, "learning_rate": 4.306204968806415e-05, "loss": 0.2685, "step": 8963 }, { "epoch": 1.0629669156883672, "grad_norm": 1.517439237185437, "learning_rate": 4.306039001974174e-05, "loss": 0.3562, "step": 8964 }, { "epoch": 1.0630854974504922, "grad_norm": 1.036291773794552, "learning_rate": 4.305873018492468e-05, "loss": 0.2132, "step": 8965 }, { "epoch": 1.0632040792126172, "grad_norm": 1.761000048706805, "learning_rate": 4.305707018362827e-05, "loss": 0.2972, "step": 8966 }, { "epoch": 1.0633226609747422, "grad_norm": 1.009252824133555, "learning_rate": 4.3055410015867816e-05, "loss": 0.1978, "step": 8967 }, { "epoch": 1.0634412427368671, "grad_norm": 1.150959082696007, "learning_rate": 4.3053749681658626e-05, "loss": 0.3112, "step": 8968 }, { "epoch": 1.0635598244989921, "grad_norm": 1.3942882394484684, "learning_rate": 4.3052089181016e-05, "loss": 0.3407, "step": 8969 }, { "epoch": 1.063678406261117, "grad_norm": 1.3836221604775842, "learning_rate": 4.3050428513955246e-05, "loss": 0.2642, "step": 8970 }, { "epoch": 1.063796988023242, "grad_norm": 1.0041272823612306, "learning_rate": 4.304876768049168e-05, "loss": 0.1541, "step": 8971 }, { "epoch": 1.063915569785367, "grad_norm": 1.518318407834466, "learning_rate": 4.30471066806406e-05, "loss": 0.2788, "step": 8972 }, { "epoch": 1.064034151547492, "grad_norm": 0.979473770560935, "learning_rate": 4.3045445514417335e-05, "loss": 0.1912, "step": 8973 }, { "epoch": 1.064152733309617, "grad_norm": 1.2893948876148587, "learning_rate": 4.304378418183719e-05, "loss": 0.3314, "step": 8974 }, { "epoch": 1.064271315071742, "grad_norm": 1.2384475259864083, "learning_rate": 4.304212268291549e-05, "loss": 0.2289, "step": 8975 }, { "epoch": 1.064389896833867, "grad_norm": 1.281637207632504, "learning_rate": 4.304046101766753e-05, "loss": 0.3511, "step": 8976 }, { "epoch": 1.064508478595992, "grad_norm": 1.1446901988744174, "learning_rate": 4.3038799186108645e-05, "loss": 0.261, "step": 8977 }, { "epoch": 1.064627060358117, "grad_norm": 0.9719391537309511, "learning_rate": 4.303713718825416e-05, "loss": 0.2018, "step": 8978 }, { "epoch": 1.064745642120242, "grad_norm": 1.3742015684465356, "learning_rate": 4.3035475024119375e-05, "loss": 0.2756, "step": 8979 }, { "epoch": 1.064864223882367, "grad_norm": 1.3608032787627595, "learning_rate": 4.3033812693719646e-05, "loss": 0.2581, "step": 8980 }, { "epoch": 1.0649828056444919, "grad_norm": 1.2074513873318233, "learning_rate": 4.303215019707026e-05, "loss": 0.2456, "step": 8981 }, { "epoch": 1.0651013874066169, "grad_norm": 1.3830806370005657, "learning_rate": 4.303048753418657e-05, "loss": 0.2876, "step": 8982 }, { "epoch": 1.0652199691687418, "grad_norm": 1.2143586582734758, "learning_rate": 4.302882470508389e-05, "loss": 0.269, "step": 8983 }, { "epoch": 1.0653385509308668, "grad_norm": 1.2168797711966175, "learning_rate": 4.302716170977756e-05, "loss": 0.2994, "step": 8984 }, { "epoch": 1.0654571326929918, "grad_norm": 1.1437484886528926, "learning_rate": 4.302549854828291e-05, "loss": 0.273, "step": 8985 }, { "epoch": 1.0655757144551168, "grad_norm": 0.9078382308545851, "learning_rate": 4.302383522061526e-05, "loss": 0.1879, "step": 8986 }, { "epoch": 1.0656942962172418, "grad_norm": 0.8795419409066834, "learning_rate": 4.302217172678995e-05, "loss": 0.1623, "step": 8987 }, { "epoch": 1.0658128779793667, "grad_norm": 1.355713394107112, "learning_rate": 4.302050806682233e-05, "loss": 0.3057, "step": 8988 }, { "epoch": 1.0659314597414917, "grad_norm": 1.0960067785588234, "learning_rate": 4.301884424072771e-05, "loss": 0.2258, "step": 8989 }, { "epoch": 1.0660500415036167, "grad_norm": 1.32699973326967, "learning_rate": 4.301718024852145e-05, "loss": 0.2873, "step": 8990 }, { "epoch": 1.0661686232657417, "grad_norm": 1.092241496650955, "learning_rate": 4.3015516090218874e-05, "loss": 0.2718, "step": 8991 }, { "epoch": 1.0662872050278667, "grad_norm": 1.1706829987355307, "learning_rate": 4.3013851765835336e-05, "loss": 0.2231, "step": 8992 }, { "epoch": 1.0664057867899916, "grad_norm": 0.9454342360156471, "learning_rate": 4.301218727538618e-05, "loss": 0.2196, "step": 8993 }, { "epoch": 1.0665243685521166, "grad_norm": 1.1967164930367047, "learning_rate": 4.301052261888674e-05, "loss": 0.1941, "step": 8994 }, { "epoch": 1.0666429503142416, "grad_norm": 1.2763947562754276, "learning_rate": 4.300885779635237e-05, "loss": 0.3452, "step": 8995 }, { "epoch": 1.0667615320763666, "grad_norm": 1.1591706203728867, "learning_rate": 4.300719280779841e-05, "loss": 0.2402, "step": 8996 }, { "epoch": 1.0668801138384916, "grad_norm": 1.8672851178921273, "learning_rate": 4.300552765324022e-05, "loss": 0.3687, "step": 8997 }, { "epoch": 1.0669986956006166, "grad_norm": 1.3462986581049743, "learning_rate": 4.300386233269315e-05, "loss": 0.3041, "step": 8998 }, { "epoch": 1.0671172773627415, "grad_norm": 1.2106621143018468, "learning_rate": 4.300219684617253e-05, "loss": 0.244, "step": 8999 }, { "epoch": 1.0672358591248665, "grad_norm": 1.4677575036285666, "learning_rate": 4.300053119369374e-05, "loss": 0.3419, "step": 9000 }, { "epoch": 1.0673544408869915, "grad_norm": 0.9462512514032652, "learning_rate": 4.299886537527213e-05, "loss": 0.1823, "step": 9001 }, { "epoch": 1.0674730226491165, "grad_norm": 1.3269085245164685, "learning_rate": 4.299719939092305e-05, "loss": 0.2497, "step": 9002 }, { "epoch": 1.0675916044112415, "grad_norm": 1.339687899300843, "learning_rate": 4.2995533240661856e-05, "loss": 0.2999, "step": 9003 }, { "epoch": 1.0677101861733664, "grad_norm": 1.0578807432046256, "learning_rate": 4.2993866924503924e-05, "loss": 0.1824, "step": 9004 }, { "epoch": 1.0678287679354914, "grad_norm": 1.1039892393448962, "learning_rate": 4.29922004424646e-05, "loss": 0.2371, "step": 9005 }, { "epoch": 1.0679473496976164, "grad_norm": 1.1791841363515745, "learning_rate": 4.299053379455925e-05, "loss": 0.272, "step": 9006 }, { "epoch": 1.0680659314597416, "grad_norm": 1.2211231411830847, "learning_rate": 4.298886698080323e-05, "loss": 0.2399, "step": 9007 }, { "epoch": 1.0681845132218664, "grad_norm": 1.3593767532217618, "learning_rate": 4.2987200001211935e-05, "loss": 0.3127, "step": 9008 }, { "epoch": 1.0683030949839916, "grad_norm": 1.1826771488184598, "learning_rate": 4.2985532855800705e-05, "loss": 0.3167, "step": 9009 }, { "epoch": 1.0684216767461165, "grad_norm": 1.2477171845014305, "learning_rate": 4.298386554458491e-05, "loss": 0.2417, "step": 9010 }, { "epoch": 1.0685402585082415, "grad_norm": 1.065942137851997, "learning_rate": 4.298219806757994e-05, "loss": 0.281, "step": 9011 }, { "epoch": 1.0686588402703665, "grad_norm": 1.2075585359525516, "learning_rate": 4.2980530424801146e-05, "loss": 0.2582, "step": 9012 }, { "epoch": 1.0687774220324915, "grad_norm": 0.9711309583236689, "learning_rate": 4.2978862616263915e-05, "loss": 0.2643, "step": 9013 }, { "epoch": 1.0688960037946165, "grad_norm": 1.7254556682709705, "learning_rate": 4.2977194641983614e-05, "loss": 0.475, "step": 9014 }, { "epoch": 1.0690145855567414, "grad_norm": 1.0788452736813126, "learning_rate": 4.2975526501975624e-05, "loss": 0.2638, "step": 9015 }, { "epoch": 1.0691331673188664, "grad_norm": 1.0433752002193102, "learning_rate": 4.297385819625533e-05, "loss": 0.2589, "step": 9016 }, { "epoch": 1.0692517490809914, "grad_norm": 1.2275920345196518, "learning_rate": 4.2972189724838095e-05, "loss": 0.2631, "step": 9017 }, { "epoch": 1.0693703308431164, "grad_norm": 1.146696780834424, "learning_rate": 4.2970521087739314e-05, "loss": 0.2788, "step": 9018 }, { "epoch": 1.0694889126052414, "grad_norm": 1.1909728252723029, "learning_rate": 4.296885228497437e-05, "loss": 0.3278, "step": 9019 }, { "epoch": 1.0696074943673664, "grad_norm": 1.262996716676593, "learning_rate": 4.296718331655863e-05, "loss": 0.2283, "step": 9020 }, { "epoch": 1.0697260761294913, "grad_norm": 1.2604578148124501, "learning_rate": 4.2965514182507505e-05, "loss": 0.2741, "step": 9021 }, { "epoch": 1.0698446578916163, "grad_norm": 1.6119046725692376, "learning_rate": 4.296384488283637e-05, "loss": 0.4329, "step": 9022 }, { "epoch": 1.0699632396537413, "grad_norm": 1.4748898571930285, "learning_rate": 4.296217541756061e-05, "loss": 0.2737, "step": 9023 }, { "epoch": 1.0700818214158663, "grad_norm": 0.9677703894960874, "learning_rate": 4.2960505786695616e-05, "loss": 0.1868, "step": 9024 }, { "epoch": 1.0702004031779913, "grad_norm": 0.9897652041647286, "learning_rate": 4.295883599025679e-05, "loss": 0.2071, "step": 9025 }, { "epoch": 1.0703189849401162, "grad_norm": 1.4089377201804612, "learning_rate": 4.295716602825951e-05, "loss": 0.2608, "step": 9026 }, { "epoch": 1.0704375667022412, "grad_norm": 1.3841278894862825, "learning_rate": 4.295549590071919e-05, "loss": 0.3057, "step": 9027 }, { "epoch": 1.0705561484643662, "grad_norm": 1.0201634744279666, "learning_rate": 4.295382560765121e-05, "loss": 0.1894, "step": 9028 }, { "epoch": 1.0706747302264912, "grad_norm": 1.2110874357248926, "learning_rate": 4.295215514907097e-05, "loss": 0.2433, "step": 9029 }, { "epoch": 1.0707933119886162, "grad_norm": 1.337181480885989, "learning_rate": 4.2950484524993885e-05, "loss": 0.309, "step": 9030 }, { "epoch": 1.0709118937507411, "grad_norm": 1.420344963179303, "learning_rate": 4.294881373543533e-05, "loss": 0.3151, "step": 9031 }, { "epoch": 1.0710304755128661, "grad_norm": 1.3460807564752804, "learning_rate": 4.2947142780410746e-05, "loss": 0.28, "step": 9032 }, { "epoch": 1.071149057274991, "grad_norm": 1.4409974752364296, "learning_rate": 4.29454716599355e-05, "loss": 0.2758, "step": 9033 }, { "epoch": 1.071267639037116, "grad_norm": 1.0416911648245097, "learning_rate": 4.2943800374025e-05, "loss": 0.2346, "step": 9034 }, { "epoch": 1.071386220799241, "grad_norm": 1.318664041998203, "learning_rate": 4.294212892269468e-05, "loss": 0.2875, "step": 9035 }, { "epoch": 1.071504802561366, "grad_norm": 1.1107026645148796, "learning_rate": 4.294045730595993e-05, "loss": 0.1974, "step": 9036 }, { "epoch": 1.071623384323491, "grad_norm": 1.4585842315934443, "learning_rate": 4.2938785523836164e-05, "loss": 0.3108, "step": 9037 }, { "epoch": 1.071741966085616, "grad_norm": 1.0238285458899163, "learning_rate": 4.2937113576338796e-05, "loss": 0.2193, "step": 9038 }, { "epoch": 1.071860547847741, "grad_norm": 1.2088399274242076, "learning_rate": 4.293544146348323e-05, "loss": 0.2737, "step": 9039 }, { "epoch": 1.071979129609866, "grad_norm": 0.970123567436288, "learning_rate": 4.2933769185284894e-05, "loss": 0.2311, "step": 9040 }, { "epoch": 1.072097711371991, "grad_norm": 1.277235530249087, "learning_rate": 4.293209674175919e-05, "loss": 0.2831, "step": 9041 }, { "epoch": 1.072216293134116, "grad_norm": 1.2274878147646704, "learning_rate": 4.293042413292156e-05, "loss": 0.2733, "step": 9042 }, { "epoch": 1.072334874896241, "grad_norm": 0.9828680729303578, "learning_rate": 4.292875135878739e-05, "loss": 0.2253, "step": 9043 }, { "epoch": 1.072453456658366, "grad_norm": 1.2091115868605609, "learning_rate": 4.292707841937212e-05, "loss": 0.2135, "step": 9044 }, { "epoch": 1.0725720384204909, "grad_norm": 0.9443778743518986, "learning_rate": 4.292540531469118e-05, "loss": 0.1519, "step": 9045 }, { "epoch": 1.0726906201826159, "grad_norm": 1.0735978982149512, "learning_rate": 4.292373204475998e-05, "loss": 0.226, "step": 9046 }, { "epoch": 1.0728092019447408, "grad_norm": 1.0355469928970396, "learning_rate": 4.2922058609593944e-05, "loss": 0.2394, "step": 9047 }, { "epoch": 1.0729277837068658, "grad_norm": 1.466589834932478, "learning_rate": 4.292038500920851e-05, "loss": 0.3803, "step": 9048 }, { "epoch": 1.0730463654689908, "grad_norm": 1.266371288808464, "learning_rate": 4.2918711243619106e-05, "loss": 0.2768, "step": 9049 }, { "epoch": 1.0731649472311158, "grad_norm": 1.0677907120441679, "learning_rate": 4.291703731284116e-05, "loss": 0.2027, "step": 9050 }, { "epoch": 1.0732835289932408, "grad_norm": 0.9783059391947715, "learning_rate": 4.291536321689009e-05, "loss": 0.2359, "step": 9051 }, { "epoch": 1.0734021107553657, "grad_norm": 1.2089250305357704, "learning_rate": 4.291368895578134e-05, "loss": 0.2077, "step": 9052 }, { "epoch": 1.0735206925174907, "grad_norm": 1.189631386197382, "learning_rate": 4.2912014529530355e-05, "loss": 0.2602, "step": 9053 }, { "epoch": 1.0736392742796157, "grad_norm": 1.159821925012147, "learning_rate": 4.291033993815255e-05, "loss": 0.2753, "step": 9054 }, { "epoch": 1.0737578560417407, "grad_norm": 1.3166078600238478, "learning_rate": 4.290866518166339e-05, "loss": 0.2946, "step": 9055 }, { "epoch": 1.0738764378038659, "grad_norm": 1.2401311408483102, "learning_rate": 4.290699026007828e-05, "loss": 0.3629, "step": 9056 }, { "epoch": 1.0739950195659906, "grad_norm": 1.3273181340018039, "learning_rate": 4.2905315173412694e-05, "loss": 0.3066, "step": 9057 }, { "epoch": 1.0741136013281158, "grad_norm": 0.9131526394426734, "learning_rate": 4.290363992168205e-05, "loss": 0.164, "step": 9058 }, { "epoch": 1.0742321830902408, "grad_norm": 1.1052317584105726, "learning_rate": 4.29019645049018e-05, "loss": 0.206, "step": 9059 }, { "epoch": 1.0743507648523658, "grad_norm": 1.3106412631445592, "learning_rate": 4.29002889230874e-05, "loss": 0.2342, "step": 9060 }, { "epoch": 1.0744693466144908, "grad_norm": 1.2825405808535364, "learning_rate": 4.289861317625427e-05, "loss": 0.2902, "step": 9061 }, { "epoch": 1.0745879283766158, "grad_norm": 1.0182269493619722, "learning_rate": 4.289693726441789e-05, "loss": 0.219, "step": 9062 }, { "epoch": 1.0747065101387407, "grad_norm": 1.4575696016292132, "learning_rate": 4.28952611875937e-05, "loss": 0.3334, "step": 9063 }, { "epoch": 1.0748250919008657, "grad_norm": 0.9844982821095701, "learning_rate": 4.289358494579714e-05, "loss": 0.1773, "step": 9064 }, { "epoch": 1.0749436736629907, "grad_norm": 1.1053084135359166, "learning_rate": 4.289190853904366e-05, "loss": 0.253, "step": 9065 }, { "epoch": 1.0750622554251157, "grad_norm": 1.036669505544453, "learning_rate": 4.289023196734873e-05, "loss": 0.2321, "step": 9066 }, { "epoch": 1.0751808371872407, "grad_norm": 1.4857114172216324, "learning_rate": 4.28885552307278e-05, "loss": 0.3441, "step": 9067 }, { "epoch": 1.0752994189493656, "grad_norm": 1.1418084859174489, "learning_rate": 4.288687832919633e-05, "loss": 0.2302, "step": 9068 }, { "epoch": 1.0754180007114906, "grad_norm": 0.848256405668171, "learning_rate": 4.288520126276977e-05, "loss": 0.1786, "step": 9069 }, { "epoch": 1.0755365824736156, "grad_norm": 1.106485565416263, "learning_rate": 4.28835240314636e-05, "loss": 0.2467, "step": 9070 }, { "epoch": 1.0756551642357406, "grad_norm": 0.8534562842799192, "learning_rate": 4.288184663529325e-05, "loss": 0.1805, "step": 9071 }, { "epoch": 1.0757737459978656, "grad_norm": 0.986074766616734, "learning_rate": 4.288016907427421e-05, "loss": 0.1752, "step": 9072 }, { "epoch": 1.0758923277599906, "grad_norm": 1.5563381240086647, "learning_rate": 4.287849134842194e-05, "loss": 0.2918, "step": 9073 }, { "epoch": 1.0760109095221155, "grad_norm": 1.2825886543748068, "learning_rate": 4.2876813457751904e-05, "loss": 0.3083, "step": 9074 }, { "epoch": 1.0761294912842405, "grad_norm": 1.2646774288141014, "learning_rate": 4.287513540227956e-05, "loss": 0.2603, "step": 9075 }, { "epoch": 1.0762480730463655, "grad_norm": 1.5849857832828784, "learning_rate": 4.2873457182020405e-05, "loss": 0.309, "step": 9076 }, { "epoch": 1.0763666548084905, "grad_norm": 1.1444843842234906, "learning_rate": 4.287177879698988e-05, "loss": 0.2511, "step": 9077 }, { "epoch": 1.0764852365706155, "grad_norm": 1.015343810650589, "learning_rate": 4.287010024720347e-05, "loss": 0.1957, "step": 9078 }, { "epoch": 1.0766038183327404, "grad_norm": 1.2639319711716797, "learning_rate": 4.286842153267666e-05, "loss": 0.2812, "step": 9079 }, { "epoch": 1.0767224000948654, "grad_norm": 0.9139499707734923, "learning_rate": 4.286674265342491e-05, "loss": 0.2612, "step": 9080 }, { "epoch": 1.0768409818569904, "grad_norm": 1.3629518507191536, "learning_rate": 4.2865063609463695e-05, "loss": 0.2624, "step": 9081 }, { "epoch": 1.0769595636191154, "grad_norm": 1.0527014830219137, "learning_rate": 4.2863384400808506e-05, "loss": 0.1867, "step": 9082 }, { "epoch": 1.0770781453812404, "grad_norm": 1.4530877707706533, "learning_rate": 4.286170502747482e-05, "loss": 0.2993, "step": 9083 }, { "epoch": 1.0771967271433653, "grad_norm": 1.2894894112699467, "learning_rate": 4.2860025489478115e-05, "loss": 0.2551, "step": 9084 }, { "epoch": 1.0773153089054903, "grad_norm": 1.2307716213749689, "learning_rate": 4.2858345786833874e-05, "loss": 0.2915, "step": 9085 }, { "epoch": 1.0774338906676153, "grad_norm": 1.50179134210579, "learning_rate": 4.285666591955759e-05, "loss": 0.3032, "step": 9086 }, { "epoch": 1.0775524724297403, "grad_norm": 1.114447449720133, "learning_rate": 4.2854985887664746e-05, "loss": 0.24, "step": 9087 }, { "epoch": 1.0776710541918653, "grad_norm": 0.9870700067468474, "learning_rate": 4.285330569117082e-05, "loss": 0.1597, "step": 9088 }, { "epoch": 1.0777896359539902, "grad_norm": 1.0596746168299818, "learning_rate": 4.285162533009131e-05, "loss": 0.2242, "step": 9089 }, { "epoch": 1.0779082177161152, "grad_norm": 0.8406095922616023, "learning_rate": 4.284994480444171e-05, "loss": 0.1847, "step": 9090 }, { "epoch": 1.0780267994782402, "grad_norm": 1.0345510340878588, "learning_rate": 4.28482641142375e-05, "loss": 0.2179, "step": 9091 }, { "epoch": 1.0781453812403652, "grad_norm": 0.975461620638216, "learning_rate": 4.2846583259494186e-05, "loss": 0.2105, "step": 9092 }, { "epoch": 1.0782639630024902, "grad_norm": 1.1048821905712536, "learning_rate": 4.2844902240227264e-05, "loss": 0.2554, "step": 9093 }, { "epoch": 1.0783825447646151, "grad_norm": 1.0167333207986267, "learning_rate": 4.284322105645222e-05, "loss": 0.2359, "step": 9094 }, { "epoch": 1.0785011265267401, "grad_norm": 1.2860619685496621, "learning_rate": 4.284153970818456e-05, "loss": 0.2611, "step": 9095 }, { "epoch": 1.078619708288865, "grad_norm": 1.1089967922582202, "learning_rate": 4.283985819543979e-05, "loss": 0.2274, "step": 9096 }, { "epoch": 1.07873829005099, "grad_norm": 1.2213558658432468, "learning_rate": 4.28381765182334e-05, "loss": 0.219, "step": 9097 }, { "epoch": 1.078856871813115, "grad_norm": 1.201846758472491, "learning_rate": 4.283649467658089e-05, "loss": 0.3088, "step": 9098 }, { "epoch": 1.07897545357524, "grad_norm": 1.1980682166810548, "learning_rate": 4.2834812670497774e-05, "loss": 0.2464, "step": 9099 }, { "epoch": 1.079094035337365, "grad_norm": 1.1537468838766292, "learning_rate": 4.283313049999956e-05, "loss": 0.2556, "step": 9100 }, { "epoch": 1.07921261709949, "grad_norm": 1.2103706594429784, "learning_rate": 4.2831448165101754e-05, "loss": 0.2641, "step": 9101 }, { "epoch": 1.079331198861615, "grad_norm": 1.7141584244553743, "learning_rate": 4.2829765665819856e-05, "loss": 0.3217, "step": 9102 }, { "epoch": 1.07944978062374, "grad_norm": 1.4181461952332843, "learning_rate": 4.2828083002169384e-05, "loss": 0.3241, "step": 9103 }, { "epoch": 1.079568362385865, "grad_norm": 1.6922053572592974, "learning_rate": 4.282640017416585e-05, "loss": 0.3536, "step": 9104 }, { "epoch": 1.07968694414799, "grad_norm": 0.9957961595399599, "learning_rate": 4.282471718182476e-05, "loss": 0.1937, "step": 9105 }, { "epoch": 1.079805525910115, "grad_norm": 1.1275301425599134, "learning_rate": 4.282303402516165e-05, "loss": 0.2115, "step": 9106 }, { "epoch": 1.0799241076722401, "grad_norm": 1.5309215131145724, "learning_rate": 4.282135070419201e-05, "loss": 0.3131, "step": 9107 }, { "epoch": 1.0800426894343649, "grad_norm": 1.2793855889529975, "learning_rate": 4.281966721893137e-05, "loss": 0.2787, "step": 9108 }, { "epoch": 1.08016127119649, "grad_norm": 1.05056530809754, "learning_rate": 4.281798356939526e-05, "loss": 0.2119, "step": 9109 }, { "epoch": 1.080279852958615, "grad_norm": 0.951603772277236, "learning_rate": 4.281629975559918e-05, "loss": 0.1715, "step": 9110 }, { "epoch": 1.08039843472074, "grad_norm": 0.9866128667801015, "learning_rate": 4.281461577755868e-05, "loss": 0.2061, "step": 9111 }, { "epoch": 1.080517016482865, "grad_norm": 1.099851354773601, "learning_rate": 4.281293163528925e-05, "loss": 0.1831, "step": 9112 }, { "epoch": 1.08063559824499, "grad_norm": 1.1523562247462882, "learning_rate": 4.281124732880644e-05, "loss": 0.2845, "step": 9113 }, { "epoch": 1.080754180007115, "grad_norm": 1.200690538203935, "learning_rate": 4.280956285812577e-05, "loss": 0.2529, "step": 9114 }, { "epoch": 1.08087276176924, "grad_norm": 1.2545414315480166, "learning_rate": 4.2807878223262774e-05, "loss": 0.3057, "step": 9115 }, { "epoch": 1.080991343531365, "grad_norm": 1.0644210265753857, "learning_rate": 4.280619342423298e-05, "loss": 0.2379, "step": 9116 }, { "epoch": 1.08110992529349, "grad_norm": 1.5232123307371377, "learning_rate": 4.280450846105192e-05, "loss": 0.3437, "step": 9117 }, { "epoch": 1.081228507055615, "grad_norm": 1.3497817162933794, "learning_rate": 4.280282333373512e-05, "loss": 0.2693, "step": 9118 }, { "epoch": 1.0813470888177399, "grad_norm": 1.223590859907483, "learning_rate": 4.280113804229812e-05, "loss": 0.2421, "step": 9119 }, { "epoch": 1.0814656705798649, "grad_norm": 0.9718899217488182, "learning_rate": 4.279945258675646e-05, "loss": 0.201, "step": 9120 }, { "epoch": 1.0815842523419898, "grad_norm": 1.0675611368400064, "learning_rate": 4.279776696712567e-05, "loss": 0.2179, "step": 9121 }, { "epoch": 1.0817028341041148, "grad_norm": 1.3146545463121166, "learning_rate": 4.2796081183421295e-05, "loss": 0.2807, "step": 9122 }, { "epoch": 1.0818214158662398, "grad_norm": 1.06222449513102, "learning_rate": 4.2794395235658875e-05, "loss": 0.2194, "step": 9123 }, { "epoch": 1.0819399976283648, "grad_norm": 1.1032776745367123, "learning_rate": 4.279270912385395e-05, "loss": 0.1914, "step": 9124 }, { "epoch": 1.0820585793904898, "grad_norm": 0.9790355291698437, "learning_rate": 4.279102284802207e-05, "loss": 0.1856, "step": 9125 }, { "epoch": 1.0821771611526148, "grad_norm": 1.3815266053179054, "learning_rate": 4.278933640817877e-05, "loss": 0.2608, "step": 9126 }, { "epoch": 1.0822957429147397, "grad_norm": 1.124652971956732, "learning_rate": 4.278764980433961e-05, "loss": 0.1799, "step": 9127 }, { "epoch": 1.0824143246768647, "grad_norm": 1.3162363543550377, "learning_rate": 4.278596303652013e-05, "loss": 0.2467, "step": 9128 }, { "epoch": 1.0825329064389897, "grad_norm": 1.1470482453271884, "learning_rate": 4.278427610473588e-05, "loss": 0.2879, "step": 9129 }, { "epoch": 1.0826514882011147, "grad_norm": 1.258312303289879, "learning_rate": 4.278258900900241e-05, "loss": 0.287, "step": 9130 }, { "epoch": 1.0827700699632397, "grad_norm": 1.1689072241485048, "learning_rate": 4.278090174933528e-05, "loss": 0.2393, "step": 9131 }, { "epoch": 1.0828886517253646, "grad_norm": 1.0902874096881485, "learning_rate": 4.277921432575004e-05, "loss": 0.2568, "step": 9132 }, { "epoch": 1.0830072334874896, "grad_norm": 1.6724578178500933, "learning_rate": 4.277752673826224e-05, "loss": 0.3572, "step": 9133 }, { "epoch": 1.0831258152496146, "grad_norm": 0.9578315286110548, "learning_rate": 4.2775838986887454e-05, "loss": 0.1935, "step": 9134 }, { "epoch": 1.0832443970117396, "grad_norm": 1.5675319808445998, "learning_rate": 4.277415107164122e-05, "loss": 0.317, "step": 9135 }, { "epoch": 1.0833629787738646, "grad_norm": 0.9029388678156982, "learning_rate": 4.277246299253911e-05, "loss": 0.1845, "step": 9136 }, { "epoch": 1.0834815605359895, "grad_norm": 1.432714886806861, "learning_rate": 4.27707747495967e-05, "loss": 0.2867, "step": 9137 }, { "epoch": 1.0836001422981145, "grad_norm": 1.3860162177963657, "learning_rate": 4.276908634282952e-05, "loss": 0.2932, "step": 9138 }, { "epoch": 1.0837187240602395, "grad_norm": 1.4439714118398406, "learning_rate": 4.2767397772253164e-05, "loss": 0.277, "step": 9139 }, { "epoch": 1.0838373058223645, "grad_norm": 1.3531109777878096, "learning_rate": 4.2765709037883183e-05, "loss": 0.3361, "step": 9140 }, { "epoch": 1.0839558875844895, "grad_norm": 1.4539372963791206, "learning_rate": 4.276402013973516e-05, "loss": 0.2965, "step": 9141 }, { "epoch": 1.0840744693466144, "grad_norm": 1.1794682083605723, "learning_rate": 4.276233107782465e-05, "loss": 0.2737, "step": 9142 }, { "epoch": 1.0841930511087394, "grad_norm": 1.1602231114919903, "learning_rate": 4.2760641852167225e-05, "loss": 0.2407, "step": 9143 }, { "epoch": 1.0843116328708644, "grad_norm": 1.023415265184284, "learning_rate": 4.275895246277847e-05, "loss": 0.1972, "step": 9144 }, { "epoch": 1.0844302146329894, "grad_norm": 1.024978479089089, "learning_rate": 4.275726290967394e-05, "loss": 0.24, "step": 9145 }, { "epoch": 1.0845487963951144, "grad_norm": 1.3752313401616316, "learning_rate": 4.2755573192869226e-05, "loss": 0.3268, "step": 9146 }, { "epoch": 1.0846673781572393, "grad_norm": 1.1315551526965504, "learning_rate": 4.275388331237991e-05, "loss": 0.274, "step": 9147 }, { "epoch": 1.0847859599193643, "grad_norm": 1.3658292891205541, "learning_rate": 4.275219326822154e-05, "loss": 0.3034, "step": 9148 }, { "epoch": 1.0849045416814893, "grad_norm": 0.9785929654468707, "learning_rate": 4.275050306040974e-05, "loss": 0.2411, "step": 9149 }, { "epoch": 1.0850231234436143, "grad_norm": 1.3315396216297213, "learning_rate": 4.274881268896005e-05, "loss": 0.3065, "step": 9150 }, { "epoch": 1.0851417052057393, "grad_norm": 0.9102992321442495, "learning_rate": 4.274712215388809e-05, "loss": 0.147, "step": 9151 }, { "epoch": 1.0852602869678643, "grad_norm": 1.1409938041952026, "learning_rate": 4.2745431455209415e-05, "loss": 0.2508, "step": 9152 }, { "epoch": 1.0853788687299892, "grad_norm": 1.2035567030437635, "learning_rate": 4.2743740592939616e-05, "loss": 0.2943, "step": 9153 }, { "epoch": 1.0854974504921142, "grad_norm": 0.9963882076772004, "learning_rate": 4.27420495670943e-05, "loss": 0.2472, "step": 9154 }, { "epoch": 1.0856160322542392, "grad_norm": 1.6850783138322156, "learning_rate": 4.274035837768904e-05, "loss": 0.3729, "step": 9155 }, { "epoch": 1.0857346140163644, "grad_norm": 1.4619231760322848, "learning_rate": 4.2738667024739435e-05, "loss": 0.2795, "step": 9156 }, { "epoch": 1.0858531957784892, "grad_norm": 1.4228647173252573, "learning_rate": 4.273697550826107e-05, "loss": 0.2669, "step": 9157 }, { "epoch": 1.0859717775406144, "grad_norm": 1.2624370539535106, "learning_rate": 4.273528382826953e-05, "loss": 0.2582, "step": 9158 }, { "epoch": 1.0860903593027393, "grad_norm": 0.9940680320561808, "learning_rate": 4.273359198478044e-05, "loss": 0.2037, "step": 9159 }, { "epoch": 1.0862089410648643, "grad_norm": 1.1891623404083551, "learning_rate": 4.2731899977809366e-05, "loss": 0.2766, "step": 9160 }, { "epoch": 1.0863275228269893, "grad_norm": 1.3533342152120247, "learning_rate": 4.273020780737192e-05, "loss": 0.2513, "step": 9161 }, { "epoch": 1.0864461045891143, "grad_norm": 1.4396197859206152, "learning_rate": 4.27285154734837e-05, "loss": 0.2331, "step": 9162 }, { "epoch": 1.0865646863512393, "grad_norm": 1.3325042244407195, "learning_rate": 4.272682297616032e-05, "loss": 0.2743, "step": 9163 }, { "epoch": 1.0866832681133642, "grad_norm": 1.228057402075941, "learning_rate": 4.2725130315417354e-05, "loss": 0.2842, "step": 9164 }, { "epoch": 1.0868018498754892, "grad_norm": 1.3663128711474655, "learning_rate": 4.272343749127043e-05, "loss": 0.2357, "step": 9165 }, { "epoch": 1.0869204316376142, "grad_norm": 1.2592922710360221, "learning_rate": 4.2721744503735144e-05, "loss": 0.2487, "step": 9166 }, { "epoch": 1.0870390133997392, "grad_norm": 0.8347615286178848, "learning_rate": 4.2720051352827106e-05, "loss": 0.1447, "step": 9167 }, { "epoch": 1.0871575951618642, "grad_norm": 1.2128423519082276, "learning_rate": 4.2718358038561926e-05, "loss": 0.2305, "step": 9168 }, { "epoch": 1.0872761769239891, "grad_norm": 1.046602381197557, "learning_rate": 4.271666456095521e-05, "loss": 0.225, "step": 9169 }, { "epoch": 1.0873947586861141, "grad_norm": 1.2569836006634818, "learning_rate": 4.2714970920022574e-05, "loss": 0.2962, "step": 9170 }, { "epoch": 1.087513340448239, "grad_norm": 0.9337378634987525, "learning_rate": 4.2713277115779626e-05, "loss": 0.22, "step": 9171 }, { "epoch": 1.087631922210364, "grad_norm": 1.1662354634181071, "learning_rate": 4.2711583148241994e-05, "loss": 0.2707, "step": 9172 }, { "epoch": 1.087750503972489, "grad_norm": 2.0172091037622746, "learning_rate": 4.2709889017425276e-05, "loss": 0.4478, "step": 9173 }, { "epoch": 1.087869085734614, "grad_norm": 1.3518027956778271, "learning_rate": 4.27081947233451e-05, "loss": 0.2468, "step": 9174 }, { "epoch": 1.087987667496739, "grad_norm": 1.241243656519273, "learning_rate": 4.270650026601708e-05, "loss": 0.2238, "step": 9175 }, { "epoch": 1.088106249258864, "grad_norm": 1.4276651035296595, "learning_rate": 4.2704805645456845e-05, "loss": 0.2564, "step": 9176 }, { "epoch": 1.088224831020989, "grad_norm": 1.325456462086742, "learning_rate": 4.270311086168002e-05, "loss": 0.3167, "step": 9177 }, { "epoch": 1.088343412783114, "grad_norm": 1.2902247078436213, "learning_rate": 4.270141591470221e-05, "loss": 0.2358, "step": 9178 }, { "epoch": 1.088461994545239, "grad_norm": 1.2492777980281433, "learning_rate": 4.269972080453906e-05, "loss": 0.3023, "step": 9179 }, { "epoch": 1.088580576307364, "grad_norm": 1.2660422587450932, "learning_rate": 4.269802553120619e-05, "loss": 0.3272, "step": 9180 }, { "epoch": 1.088699158069489, "grad_norm": 1.3105136237045314, "learning_rate": 4.2696330094719224e-05, "loss": 0.2296, "step": 9181 }, { "epoch": 1.088817739831614, "grad_norm": 1.0491650788946492, "learning_rate": 4.2694634495093796e-05, "loss": 0.2497, "step": 9182 }, { "epoch": 1.0889363215937389, "grad_norm": 0.9287014668920095, "learning_rate": 4.2692938732345536e-05, "loss": 0.2148, "step": 9183 }, { "epoch": 1.0890549033558639, "grad_norm": 0.907189967760282, "learning_rate": 4.269124280649007e-05, "loss": 0.1785, "step": 9184 }, { "epoch": 1.0891734851179888, "grad_norm": 0.9734860544620869, "learning_rate": 4.268954671754305e-05, "loss": 0.201, "step": 9185 }, { "epoch": 1.0892920668801138, "grad_norm": 1.338161799297769, "learning_rate": 4.26878504655201e-05, "loss": 0.3328, "step": 9186 }, { "epoch": 1.0894106486422388, "grad_norm": 1.0606684645354016, "learning_rate": 4.2686154050436854e-05, "loss": 0.239, "step": 9187 }, { "epoch": 1.0895292304043638, "grad_norm": 1.1197765737542993, "learning_rate": 4.2684457472308956e-05, "loss": 0.2608, "step": 9188 }, { "epoch": 1.0896478121664888, "grad_norm": 1.1839268066559638, "learning_rate": 4.268276073115206e-05, "loss": 0.2588, "step": 9189 }, { "epoch": 1.0897663939286137, "grad_norm": 1.0694378280198684, "learning_rate": 4.268106382698178e-05, "loss": 0.2596, "step": 9190 }, { "epoch": 1.0898849756907387, "grad_norm": 1.1414435999241115, "learning_rate": 4.267936675981378e-05, "loss": 0.2449, "step": 9191 }, { "epoch": 1.0900035574528637, "grad_norm": 0.9573981697396776, "learning_rate": 4.267766952966369e-05, "loss": 0.22, "step": 9192 }, { "epoch": 1.0901221392149887, "grad_norm": 1.3568428763112232, "learning_rate": 4.267597213654717e-05, "loss": 0.3183, "step": 9193 }, { "epoch": 1.0902407209771137, "grad_norm": 1.3316362067110235, "learning_rate": 4.2674274580479866e-05, "loss": 0.3866, "step": 9194 }, { "epoch": 1.0903593027392386, "grad_norm": 1.4468905974318667, "learning_rate": 4.2672576861477424e-05, "loss": 0.2767, "step": 9195 }, { "epoch": 1.0904778845013636, "grad_norm": 1.009169263608157, "learning_rate": 4.2670878979555485e-05, "loss": 0.2077, "step": 9196 }, { "epoch": 1.0905964662634886, "grad_norm": 1.5731328503957709, "learning_rate": 4.266918093472971e-05, "loss": 0.3541, "step": 9197 }, { "epoch": 1.0907150480256136, "grad_norm": 1.1164722787619714, "learning_rate": 4.266748272701577e-05, "loss": 0.2489, "step": 9198 }, { "epoch": 1.0908336297877386, "grad_norm": 1.1480336569784453, "learning_rate": 4.266578435642929e-05, "loss": 0.2491, "step": 9199 }, { "epoch": 1.0909522115498635, "grad_norm": 0.8443236756973576, "learning_rate": 4.2664085822985956e-05, "loss": 0.1959, "step": 9200 }, { "epoch": 1.0910707933119885, "grad_norm": 1.4577428611434744, "learning_rate": 4.26623871267014e-05, "loss": 0.3376, "step": 9201 }, { "epoch": 1.0911893750741135, "grad_norm": 1.2416899346552959, "learning_rate": 4.2660688267591305e-05, "loss": 0.2388, "step": 9202 }, { "epoch": 1.0913079568362385, "grad_norm": 1.2904374202958961, "learning_rate": 4.265898924567131e-05, "loss": 0.2398, "step": 9203 }, { "epoch": 1.0914265385983635, "grad_norm": 1.0849135622709687, "learning_rate": 4.265729006095709e-05, "loss": 0.2205, "step": 9204 }, { "epoch": 1.0915451203604887, "grad_norm": 1.1317579512181228, "learning_rate": 4.265559071346431e-05, "loss": 0.2069, "step": 9205 }, { "epoch": 1.0916637021226134, "grad_norm": 1.2168133551357452, "learning_rate": 4.265389120320864e-05, "loss": 0.2235, "step": 9206 }, { "epoch": 1.0917822838847386, "grad_norm": 1.5190023256565974, "learning_rate": 4.2652191530205735e-05, "loss": 0.4071, "step": 9207 }, { "epoch": 1.0919008656468634, "grad_norm": 1.1790934394785428, "learning_rate": 4.265049169447127e-05, "loss": 0.2389, "step": 9208 }, { "epoch": 1.0920194474089886, "grad_norm": 0.8175285527868961, "learning_rate": 4.2648791696020924e-05, "loss": 0.1622, "step": 9209 }, { "epoch": 1.0921380291711136, "grad_norm": 1.1053471140418476, "learning_rate": 4.2647091534870355e-05, "loss": 0.2419, "step": 9210 }, { "epoch": 1.0922566109332386, "grad_norm": 1.1381055310431798, "learning_rate": 4.264539121103525e-05, "loss": 0.24, "step": 9211 }, { "epoch": 1.0923751926953635, "grad_norm": 1.3492833534512376, "learning_rate": 4.264369072453126e-05, "loss": 0.3022, "step": 9212 }, { "epoch": 1.0924937744574885, "grad_norm": 1.3436732399569105, "learning_rate": 4.2641990075374096e-05, "loss": 0.2961, "step": 9213 }, { "epoch": 1.0926123562196135, "grad_norm": 1.3048377282071555, "learning_rate": 4.26402892635794e-05, "loss": 0.3426, "step": 9214 }, { "epoch": 1.0927309379817385, "grad_norm": 1.3778724835090845, "learning_rate": 4.263858828916288e-05, "loss": 0.3846, "step": 9215 }, { "epoch": 1.0928495197438635, "grad_norm": 1.387870575135757, "learning_rate": 4.2636887152140205e-05, "loss": 0.2875, "step": 9216 }, { "epoch": 1.0929681015059884, "grad_norm": 1.1941143423445184, "learning_rate": 4.263518585252706e-05, "loss": 0.2805, "step": 9217 }, { "epoch": 1.0930866832681134, "grad_norm": 1.1993962512969953, "learning_rate": 4.2633484390339115e-05, "loss": 0.2647, "step": 9218 }, { "epoch": 1.0932052650302384, "grad_norm": 1.44899802330021, "learning_rate": 4.263178276559208e-05, "loss": 0.3078, "step": 9219 }, { "epoch": 1.0933238467923634, "grad_norm": 1.1603161817205814, "learning_rate": 4.263008097830162e-05, "loss": 0.258, "step": 9220 }, { "epoch": 1.0934424285544884, "grad_norm": 0.7747780431231321, "learning_rate": 4.2628379028483444e-05, "loss": 0.1485, "step": 9221 }, { "epoch": 1.0935610103166133, "grad_norm": 1.135129841060286, "learning_rate": 4.262667691615323e-05, "loss": 0.2348, "step": 9222 }, { "epoch": 1.0936795920787383, "grad_norm": 1.7300863176428136, "learning_rate": 4.262497464132666e-05, "loss": 0.3484, "step": 9223 }, { "epoch": 1.0937981738408633, "grad_norm": 1.1776284904295375, "learning_rate": 4.262327220401945e-05, "loss": 0.212, "step": 9224 }, { "epoch": 1.0939167556029883, "grad_norm": 1.0105541776136007, "learning_rate": 4.262156960424727e-05, "loss": 0.254, "step": 9225 }, { "epoch": 1.0940353373651133, "grad_norm": 1.1437405045016575, "learning_rate": 4.261986684202583e-05, "loss": 0.2024, "step": 9226 }, { "epoch": 1.0941539191272383, "grad_norm": 1.7185291081138585, "learning_rate": 4.2618163917370824e-05, "loss": 0.3264, "step": 9227 }, { "epoch": 1.0942725008893632, "grad_norm": 1.2394068408409242, "learning_rate": 4.261646083029795e-05, "loss": 0.2373, "step": 9228 }, { "epoch": 1.0943910826514882, "grad_norm": 1.5932971881120734, "learning_rate": 4.261475758082292e-05, "loss": 0.3077, "step": 9229 }, { "epoch": 1.0945096644136132, "grad_norm": 0.9455972669532549, "learning_rate": 4.261305416896142e-05, "loss": 0.2038, "step": 9230 }, { "epoch": 1.0946282461757382, "grad_norm": 1.2390059917903147, "learning_rate": 4.261135059472915e-05, "loss": 0.2463, "step": 9231 }, { "epoch": 1.0947468279378632, "grad_norm": 1.0405097254882079, "learning_rate": 4.260964685814184e-05, "loss": 0.1948, "step": 9232 }, { "epoch": 1.0948654096999881, "grad_norm": 1.1895912385411036, "learning_rate": 4.260794295921516e-05, "loss": 0.2732, "step": 9233 }, { "epoch": 1.0949839914621131, "grad_norm": 1.6060138076384047, "learning_rate": 4.260623889796486e-05, "loss": 0.3613, "step": 9234 }, { "epoch": 1.095102573224238, "grad_norm": 1.0005079505915218, "learning_rate": 4.260453467440662e-05, "loss": 0.2031, "step": 9235 }, { "epoch": 1.095221154986363, "grad_norm": 0.9418734985927831, "learning_rate": 4.260283028855615e-05, "loss": 0.2036, "step": 9236 }, { "epoch": 1.095339736748488, "grad_norm": 1.081796061344973, "learning_rate": 4.2601125740429185e-05, "loss": 0.2159, "step": 9237 }, { "epoch": 1.095458318510613, "grad_norm": 1.6082870112706082, "learning_rate": 4.259942103004141e-05, "loss": 0.2494, "step": 9238 }, { "epoch": 1.095576900272738, "grad_norm": 1.0747976483561006, "learning_rate": 4.259771615740856e-05, "loss": 0.2067, "step": 9239 }, { "epoch": 1.095695482034863, "grad_norm": 1.0649857995394778, "learning_rate": 4.259601112254635e-05, "loss": 0.317, "step": 9240 }, { "epoch": 1.095814063796988, "grad_norm": 1.1169876828508543, "learning_rate": 4.2594305925470495e-05, "loss": 0.252, "step": 9241 }, { "epoch": 1.095932645559113, "grad_norm": 1.5995477954645707, "learning_rate": 4.259260056619671e-05, "loss": 0.3153, "step": 9242 }, { "epoch": 1.096051227321238, "grad_norm": 1.845271639597029, "learning_rate": 4.259089504474073e-05, "loss": 0.4786, "step": 9243 }, { "epoch": 1.096169809083363, "grad_norm": 1.2015727728066514, "learning_rate": 4.2589189361118266e-05, "loss": 0.2989, "step": 9244 }, { "epoch": 1.096288390845488, "grad_norm": 1.0870535652718698, "learning_rate": 4.258748351534504e-05, "loss": 0.2191, "step": 9245 }, { "epoch": 1.0964069726076129, "grad_norm": 1.0206711736917058, "learning_rate": 4.258577750743678e-05, "loss": 0.2153, "step": 9246 }, { "epoch": 1.0965255543697379, "grad_norm": 1.4982287131448218, "learning_rate": 4.2584071337409224e-05, "loss": 0.3037, "step": 9247 }, { "epoch": 1.0966441361318628, "grad_norm": 1.2135255609443034, "learning_rate": 4.25823650052781e-05, "loss": 0.2948, "step": 9248 }, { "epoch": 1.0967627178939878, "grad_norm": 1.0505610530285765, "learning_rate": 4.258065851105913e-05, "loss": 0.208, "step": 9249 }, { "epoch": 1.0968812996561128, "grad_norm": 1.207656906988747, "learning_rate": 4.257895185476803e-05, "loss": 0.2315, "step": 9250 }, { "epoch": 1.0969998814182378, "grad_norm": 1.07808251993154, "learning_rate": 4.257724503642056e-05, "loss": 0.2068, "step": 9251 }, { "epoch": 1.0971184631803628, "grad_norm": 1.014163606801111, "learning_rate": 4.257553805603245e-05, "loss": 0.2029, "step": 9252 }, { "epoch": 1.0972370449424877, "grad_norm": 1.6482281269095507, "learning_rate": 4.257383091361943e-05, "loss": 0.4504, "step": 9253 }, { "epoch": 1.0973556267046127, "grad_norm": 0.8389435966004057, "learning_rate": 4.257212360919724e-05, "loss": 0.185, "step": 9254 }, { "epoch": 1.0974742084667377, "grad_norm": 1.306398236415953, "learning_rate": 4.257041614278161e-05, "loss": 0.2544, "step": 9255 }, { "epoch": 1.097592790228863, "grad_norm": 1.422092395725231, "learning_rate": 4.25687085143883e-05, "loss": 0.3106, "step": 9256 }, { "epoch": 1.0977113719909877, "grad_norm": 1.1284047401973236, "learning_rate": 4.256700072403304e-05, "loss": 0.2933, "step": 9257 }, { "epoch": 1.0978299537531129, "grad_norm": 1.9658526117436346, "learning_rate": 4.256529277173157e-05, "loss": 0.3745, "step": 9258 }, { "epoch": 1.0979485355152379, "grad_norm": 1.2658310956504497, "learning_rate": 4.2563584657499645e-05, "loss": 0.2285, "step": 9259 }, { "epoch": 1.0980671172773628, "grad_norm": 1.2102194512398154, "learning_rate": 4.256187638135301e-05, "loss": 0.2746, "step": 9260 }, { "epoch": 1.0981856990394878, "grad_norm": 1.3414506716796777, "learning_rate": 4.2560167943307406e-05, "loss": 0.2977, "step": 9261 }, { "epoch": 1.0983042808016128, "grad_norm": 1.0481854078930073, "learning_rate": 4.255845934337859e-05, "loss": 0.2249, "step": 9262 }, { "epoch": 1.0984228625637378, "grad_norm": 1.300661663056757, "learning_rate": 4.2556750581582304e-05, "loss": 0.2312, "step": 9263 }, { "epoch": 1.0985414443258628, "grad_norm": 1.1118936067513359, "learning_rate": 4.255504165793431e-05, "loss": 0.2392, "step": 9264 }, { "epoch": 1.0986600260879877, "grad_norm": 1.5436207924174938, "learning_rate": 4.2553332572450364e-05, "loss": 0.2995, "step": 9265 }, { "epoch": 1.0987786078501127, "grad_norm": 0.9481634240913727, "learning_rate": 4.2551623325146205e-05, "loss": 0.2188, "step": 9266 }, { "epoch": 1.0988971896122377, "grad_norm": 1.3985926245759273, "learning_rate": 4.254991391603762e-05, "loss": 0.2845, "step": 9267 }, { "epoch": 1.0990157713743627, "grad_norm": 0.9849354756045638, "learning_rate": 4.2548204345140344e-05, "loss": 0.219, "step": 9268 }, { "epoch": 1.0991343531364877, "grad_norm": 1.1724496177206996, "learning_rate": 4.254649461247013e-05, "loss": 0.2375, "step": 9269 }, { "epoch": 1.0992529348986126, "grad_norm": 1.324494958360901, "learning_rate": 4.254478471804276e-05, "loss": 0.271, "step": 9270 }, { "epoch": 1.0993715166607376, "grad_norm": 1.478864038368792, "learning_rate": 4.2543074661874e-05, "loss": 0.3473, "step": 9271 }, { "epoch": 1.0994900984228626, "grad_norm": 1.0925536690567057, "learning_rate": 4.25413644439796e-05, "loss": 0.1966, "step": 9272 }, { "epoch": 1.0996086801849876, "grad_norm": 1.2416507817736837, "learning_rate": 4.253965406437532e-05, "loss": 0.2354, "step": 9273 }, { "epoch": 1.0997272619471126, "grad_norm": 1.158745199801717, "learning_rate": 4.253794352307695e-05, "loss": 0.2514, "step": 9274 }, { "epoch": 1.0998458437092375, "grad_norm": 0.9640739236464869, "learning_rate": 4.253623282010024e-05, "loss": 0.1944, "step": 9275 }, { "epoch": 1.0999644254713625, "grad_norm": 1.304531495557837, "learning_rate": 4.253452195546097e-05, "loss": 0.2977, "step": 9276 }, { "epoch": 1.1000830072334875, "grad_norm": 1.3722428705246934, "learning_rate": 4.253281092917492e-05, "loss": 0.2273, "step": 9277 }, { "epoch": 1.1002015889956125, "grad_norm": 1.5070009801935171, "learning_rate": 4.253109974125784e-05, "loss": 0.2608, "step": 9278 }, { "epoch": 1.1003201707577375, "grad_norm": 1.1194295565612786, "learning_rate": 4.252938839172552e-05, "loss": 0.2368, "step": 9279 }, { "epoch": 1.1004387525198625, "grad_norm": 0.8767194532975879, "learning_rate": 4.252767688059374e-05, "loss": 0.1911, "step": 9280 }, { "epoch": 1.1005573342819874, "grad_norm": 1.608366223269653, "learning_rate": 4.252596520787826e-05, "loss": 0.3964, "step": 9281 }, { "epoch": 1.1006759160441124, "grad_norm": 1.2353274023448717, "learning_rate": 4.252425337359489e-05, "loss": 0.2736, "step": 9282 }, { "epoch": 1.1007944978062374, "grad_norm": 1.1803574404904464, "learning_rate": 4.2522541377759385e-05, "loss": 0.2129, "step": 9283 }, { "epoch": 1.1009130795683624, "grad_norm": 1.6385121351753162, "learning_rate": 4.252082922038754e-05, "loss": 0.3201, "step": 9284 }, { "epoch": 1.1010316613304874, "grad_norm": 1.008114358409775, "learning_rate": 4.251911690149513e-05, "loss": 0.2495, "step": 9285 }, { "epoch": 1.1011502430926123, "grad_norm": 0.9658146832001766, "learning_rate": 4.251740442109794e-05, "loss": 0.2042, "step": 9286 }, { "epoch": 1.1012688248547373, "grad_norm": 1.046071983152849, "learning_rate": 4.251569177921178e-05, "loss": 0.2538, "step": 9287 }, { "epoch": 1.1013874066168623, "grad_norm": 1.3460545568560764, "learning_rate": 4.251397897585241e-05, "loss": 0.2636, "step": 9288 }, { "epoch": 1.1015059883789873, "grad_norm": 0.9859513809783174, "learning_rate": 4.251226601103563e-05, "loss": 0.2045, "step": 9289 }, { "epoch": 1.1016245701411123, "grad_norm": 1.2161621367087014, "learning_rate": 4.251055288477723e-05, "loss": 0.2516, "step": 9290 }, { "epoch": 1.1017431519032372, "grad_norm": 0.9868836912484316, "learning_rate": 4.250883959709301e-05, "loss": 0.2032, "step": 9291 }, { "epoch": 1.1018617336653622, "grad_norm": 1.4434264549451872, "learning_rate": 4.250712614799876e-05, "loss": 0.2946, "step": 9292 }, { "epoch": 1.1019803154274872, "grad_norm": 1.4270436182454667, "learning_rate": 4.250541253751028e-05, "loss": 0.2856, "step": 9293 }, { "epoch": 1.1020988971896122, "grad_norm": 1.6870539921653178, "learning_rate": 4.250369876564335e-05, "loss": 0.3867, "step": 9294 }, { "epoch": 1.1022174789517372, "grad_norm": 1.4011801443581566, "learning_rate": 4.2501984832413786e-05, "loss": 0.2266, "step": 9295 }, { "epoch": 1.1023360607138621, "grad_norm": 1.1675747330713295, "learning_rate": 4.250027073783739e-05, "loss": 0.254, "step": 9296 }, { "epoch": 1.1024546424759871, "grad_norm": 1.4292703642100115, "learning_rate": 4.2498556481929966e-05, "loss": 0.2699, "step": 9297 }, { "epoch": 1.102573224238112, "grad_norm": 1.2193584701203546, "learning_rate": 4.249684206470729e-05, "loss": 0.275, "step": 9298 }, { "epoch": 1.102691806000237, "grad_norm": 1.3126048209807395, "learning_rate": 4.24951274861852e-05, "loss": 0.3379, "step": 9299 }, { "epoch": 1.102810387762362, "grad_norm": 1.5592181336519266, "learning_rate": 4.249341274637948e-05, "loss": 0.3044, "step": 9300 }, { "epoch": 1.102928969524487, "grad_norm": 1.3367268275100888, "learning_rate": 4.249169784530595e-05, "loss": 0.2421, "step": 9301 }, { "epoch": 1.103047551286612, "grad_norm": 1.5044796789621735, "learning_rate": 4.248998278298042e-05, "loss": 0.2968, "step": 9302 }, { "epoch": 1.103166133048737, "grad_norm": 1.3621607552184096, "learning_rate": 4.2488267559418696e-05, "loss": 0.2765, "step": 9303 }, { "epoch": 1.103284714810862, "grad_norm": 1.0552376575642626, "learning_rate": 4.248655217463658e-05, "loss": 0.2265, "step": 9304 }, { "epoch": 1.1034032965729872, "grad_norm": 1.6813935415320185, "learning_rate": 4.2484836628649905e-05, "loss": 0.3181, "step": 9305 }, { "epoch": 1.103521878335112, "grad_norm": 1.2767808830664802, "learning_rate": 4.248312092147447e-05, "loss": 0.29, "step": 9306 }, { "epoch": 1.1036404600972372, "grad_norm": 1.399857059821546, "learning_rate": 4.248140505312611e-05, "loss": 0.2794, "step": 9307 }, { "epoch": 1.1037590418593621, "grad_norm": 1.2093920531586615, "learning_rate": 4.247968902362063e-05, "loss": 0.2454, "step": 9308 }, { "epoch": 1.1038776236214871, "grad_norm": 1.3994969088574207, "learning_rate": 4.2477972832973846e-05, "loss": 0.3146, "step": 9309 }, { "epoch": 1.103996205383612, "grad_norm": 1.4788199275818366, "learning_rate": 4.247625648120159e-05, "loss": 0.3718, "step": 9310 }, { "epoch": 1.104114787145737, "grad_norm": 0.9641998105988202, "learning_rate": 4.2474539968319684e-05, "loss": 0.2068, "step": 9311 }, { "epoch": 1.104233368907862, "grad_norm": 1.1436147867511381, "learning_rate": 4.247282329434394e-05, "loss": 0.2217, "step": 9312 }, { "epoch": 1.104351950669987, "grad_norm": 1.4741519451135268, "learning_rate": 4.24711064592902e-05, "loss": 0.4207, "step": 9313 }, { "epoch": 1.104470532432112, "grad_norm": 1.0264992321106334, "learning_rate": 4.246938946317427e-05, "loss": 0.2454, "step": 9314 }, { "epoch": 1.104589114194237, "grad_norm": 0.8517851214075344, "learning_rate": 4.2467672306012e-05, "loss": 0.1638, "step": 9315 }, { "epoch": 1.104707695956362, "grad_norm": 0.917733094422889, "learning_rate": 4.246595498781922e-05, "loss": 0.2032, "step": 9316 }, { "epoch": 1.104826277718487, "grad_norm": 1.3223196503888859, "learning_rate": 4.2464237508611735e-05, "loss": 0.2828, "step": 9317 }, { "epoch": 1.104944859480612, "grad_norm": 1.264934546104584, "learning_rate": 4.246251986840541e-05, "loss": 0.2937, "step": 9318 }, { "epoch": 1.105063441242737, "grad_norm": 1.0605045406961018, "learning_rate": 4.2460802067216066e-05, "loss": 0.2254, "step": 9319 }, { "epoch": 1.105182023004862, "grad_norm": 1.1910642578594683, "learning_rate": 4.2459084105059534e-05, "loss": 0.2063, "step": 9320 }, { "epoch": 1.1053006047669869, "grad_norm": 0.7763314191221506, "learning_rate": 4.245736598195165e-05, "loss": 0.214, "step": 9321 }, { "epoch": 1.1054191865291119, "grad_norm": 1.0913437753766362, "learning_rate": 4.2455647697908266e-05, "loss": 0.2314, "step": 9322 }, { "epoch": 1.1055377682912368, "grad_norm": 1.1599151168793158, "learning_rate": 4.2453929252945224e-05, "loss": 0.2661, "step": 9323 }, { "epoch": 1.1056563500533618, "grad_norm": 0.9895800269070656, "learning_rate": 4.2452210647078346e-05, "loss": 0.1995, "step": 9324 }, { "epoch": 1.1057749318154868, "grad_norm": 1.6174268668353318, "learning_rate": 4.245049188032349e-05, "loss": 0.4101, "step": 9325 }, { "epoch": 1.1058935135776118, "grad_norm": 1.0487360735263698, "learning_rate": 4.2448772952696495e-05, "loss": 0.2091, "step": 9326 }, { "epoch": 1.1060120953397368, "grad_norm": 1.333670053505605, "learning_rate": 4.2447053864213215e-05, "loss": 0.3632, "step": 9327 }, { "epoch": 1.1061306771018617, "grad_norm": 1.2898698433917748, "learning_rate": 4.24453346148895e-05, "loss": 0.2301, "step": 9328 }, { "epoch": 1.1062492588639867, "grad_norm": 0.9058129632799933, "learning_rate": 4.244361520474118e-05, "loss": 0.2089, "step": 9329 }, { "epoch": 1.1063678406261117, "grad_norm": 1.2382270100198876, "learning_rate": 4.2441895633784126e-05, "loss": 0.2847, "step": 9330 }, { "epoch": 1.1064864223882367, "grad_norm": 1.2288051950548058, "learning_rate": 4.244017590203418e-05, "loss": 0.2881, "step": 9331 }, { "epoch": 1.1066050041503617, "grad_norm": 1.0930599724047654, "learning_rate": 4.2438456009507196e-05, "loss": 0.2054, "step": 9332 }, { "epoch": 1.1067235859124867, "grad_norm": 0.8781065280117154, "learning_rate": 4.2436735956219034e-05, "loss": 0.1791, "step": 9333 }, { "epoch": 1.1068421676746116, "grad_norm": 1.066966617252039, "learning_rate": 4.243501574218555e-05, "loss": 0.2307, "step": 9334 }, { "epoch": 1.1069607494367366, "grad_norm": 1.1921986389372716, "learning_rate": 4.243329536742261e-05, "loss": 0.2581, "step": 9335 }, { "epoch": 1.1070793311988616, "grad_norm": 1.2531371679081817, "learning_rate": 4.2431574831946055e-05, "loss": 0.2805, "step": 9336 }, { "epoch": 1.1071979129609866, "grad_norm": 1.4622659554263169, "learning_rate": 4.242985413577175e-05, "loss": 0.2714, "step": 9337 }, { "epoch": 1.1073164947231116, "grad_norm": 0.9395156927588587, "learning_rate": 4.2428133278915574e-05, "loss": 0.2438, "step": 9338 }, { "epoch": 1.1074350764852365, "grad_norm": 1.2496546659695043, "learning_rate": 4.2426412261393375e-05, "loss": 0.2962, "step": 9339 }, { "epoch": 1.1075536582473615, "grad_norm": 1.3173542123018438, "learning_rate": 4.242469108322104e-05, "loss": 0.2726, "step": 9340 }, { "epoch": 1.1076722400094865, "grad_norm": 0.8307401541361101, "learning_rate": 4.2422969744414396e-05, "loss": 0.2089, "step": 9341 }, { "epoch": 1.1077908217716115, "grad_norm": 1.2067841137441755, "learning_rate": 4.2421248244989356e-05, "loss": 0.3581, "step": 9342 }, { "epoch": 1.1079094035337365, "grad_norm": 1.2759106507290412, "learning_rate": 4.241952658496176e-05, "loss": 0.2579, "step": 9343 }, { "epoch": 1.1080279852958614, "grad_norm": 0.976026287081038, "learning_rate": 4.241780476434749e-05, "loss": 0.1961, "step": 9344 }, { "epoch": 1.1081465670579864, "grad_norm": 1.3154586542919822, "learning_rate": 4.241608278316243e-05, "loss": 0.3101, "step": 9345 }, { "epoch": 1.1082651488201114, "grad_norm": 1.308331170537835, "learning_rate": 4.241436064142244e-05, "loss": 0.2776, "step": 9346 }, { "epoch": 1.1083837305822364, "grad_norm": 1.2661938045211834, "learning_rate": 4.241263833914339e-05, "loss": 0.2554, "step": 9347 }, { "epoch": 1.1085023123443614, "grad_norm": 1.3989914547762192, "learning_rate": 4.241091587634117e-05, "loss": 0.3095, "step": 9348 }, { "epoch": 1.1086208941064863, "grad_norm": 0.8965737987075469, "learning_rate": 4.240919325303167e-05, "loss": 0.1786, "step": 9349 }, { "epoch": 1.1087394758686113, "grad_norm": 0.9400241904867619, "learning_rate": 4.2407470469230735e-05, "loss": 0.2369, "step": 9350 }, { "epoch": 1.1088580576307363, "grad_norm": 1.306578829516904, "learning_rate": 4.240574752495429e-05, "loss": 0.2902, "step": 9351 }, { "epoch": 1.1089766393928613, "grad_norm": 1.1881484947589362, "learning_rate": 4.240402442021818e-05, "loss": 0.2438, "step": 9352 }, { "epoch": 1.1090952211549863, "grad_norm": 1.0134823822588304, "learning_rate": 4.240230115503832e-05, "loss": 0.1934, "step": 9353 }, { "epoch": 1.1092138029171112, "grad_norm": 1.205425685088804, "learning_rate": 4.240057772943057e-05, "loss": 0.2863, "step": 9354 }, { "epoch": 1.1093323846792362, "grad_norm": 1.0698543787884205, "learning_rate": 4.239885414341084e-05, "loss": 0.257, "step": 9355 }, { "epoch": 1.1094509664413614, "grad_norm": 1.3221395074864668, "learning_rate": 4.239713039699501e-05, "loss": 0.3091, "step": 9356 }, { "epoch": 1.1095695482034862, "grad_norm": 1.2481239201762337, "learning_rate": 4.2395406490198973e-05, "loss": 0.2768, "step": 9357 }, { "epoch": 1.1096881299656114, "grad_norm": 1.74169538114162, "learning_rate": 4.239368242303862e-05, "loss": 0.4179, "step": 9358 }, { "epoch": 1.1098067117277364, "grad_norm": 1.3180577683217374, "learning_rate": 4.2391958195529846e-05, "loss": 0.2779, "step": 9359 }, { "epoch": 1.1099252934898614, "grad_norm": 1.2701918132334962, "learning_rate": 4.239023380768854e-05, "loss": 0.2635, "step": 9360 }, { "epoch": 1.1100438752519863, "grad_norm": 1.3631838812835353, "learning_rate": 4.2388509259530604e-05, "loss": 0.3254, "step": 9361 }, { "epoch": 1.1101624570141113, "grad_norm": 0.7972353374308723, "learning_rate": 4.238678455107194e-05, "loss": 0.163, "step": 9362 }, { "epoch": 1.1102810387762363, "grad_norm": 1.0371460946691855, "learning_rate": 4.2385059682328444e-05, "loss": 0.1939, "step": 9363 }, { "epoch": 1.1103996205383613, "grad_norm": 1.1915398286047387, "learning_rate": 4.238333465331601e-05, "loss": 0.2173, "step": 9364 }, { "epoch": 1.1105182023004863, "grad_norm": 1.1284254492101695, "learning_rate": 4.238160946405055e-05, "loss": 0.2083, "step": 9365 }, { "epoch": 1.1106367840626112, "grad_norm": 1.0585046521096821, "learning_rate": 4.2379884114547965e-05, "loss": 0.2446, "step": 9366 }, { "epoch": 1.1107553658247362, "grad_norm": 1.0933002639459048, "learning_rate": 4.237815860482416e-05, "loss": 0.2608, "step": 9367 }, { "epoch": 1.1108739475868612, "grad_norm": 0.9340681428361943, "learning_rate": 4.237643293489505e-05, "loss": 0.184, "step": 9368 }, { "epoch": 1.1109925293489862, "grad_norm": 0.9209828699727749, "learning_rate": 4.237470710477653e-05, "loss": 0.1983, "step": 9369 }, { "epoch": 1.1111111111111112, "grad_norm": 1.2018983308457902, "learning_rate": 4.237298111448452e-05, "loss": 0.2774, "step": 9370 }, { "epoch": 1.1112296928732361, "grad_norm": 1.3194538894159258, "learning_rate": 4.237125496403492e-05, "loss": 0.2944, "step": 9371 }, { "epoch": 1.1113482746353611, "grad_norm": 1.3496461288269717, "learning_rate": 4.236952865344367e-05, "loss": 0.2635, "step": 9372 }, { "epoch": 1.111466856397486, "grad_norm": 1.1640917505115014, "learning_rate": 4.2367802182726653e-05, "loss": 0.2499, "step": 9373 }, { "epoch": 1.111585438159611, "grad_norm": 1.2814509491174613, "learning_rate": 4.23660755518998e-05, "loss": 0.328, "step": 9374 }, { "epoch": 1.111704019921736, "grad_norm": 0.8983563021354333, "learning_rate": 4.236434876097902e-05, "loss": 0.2128, "step": 9375 }, { "epoch": 1.111822601683861, "grad_norm": 1.0511252212535487, "learning_rate": 4.236262180998025e-05, "loss": 0.1914, "step": 9376 }, { "epoch": 1.111941183445986, "grad_norm": 1.0179478862264009, "learning_rate": 4.2360894698919384e-05, "loss": 0.2648, "step": 9377 }, { "epoch": 1.112059765208111, "grad_norm": 1.305092932579881, "learning_rate": 4.235916742781236e-05, "loss": 0.2481, "step": 9378 }, { "epoch": 1.112178346970236, "grad_norm": 1.6380154858166616, "learning_rate": 4.2357439996675116e-05, "loss": 0.3442, "step": 9379 }, { "epoch": 1.112296928732361, "grad_norm": 0.9438072231157584, "learning_rate": 4.235571240552355e-05, "loss": 0.2474, "step": 9380 }, { "epoch": 1.112415510494486, "grad_norm": 1.45313262963439, "learning_rate": 4.235398465437359e-05, "loss": 0.2867, "step": 9381 }, { "epoch": 1.112534092256611, "grad_norm": 1.698131573426022, "learning_rate": 4.235225674324118e-05, "loss": 0.4831, "step": 9382 }, { "epoch": 1.112652674018736, "grad_norm": 1.2655280682618903, "learning_rate": 4.235052867214225e-05, "loss": 0.3239, "step": 9383 }, { "epoch": 1.112771255780861, "grad_norm": 1.142053069965225, "learning_rate": 4.234880044109271e-05, "loss": 0.2545, "step": 9384 }, { "epoch": 1.1128898375429859, "grad_norm": 1.0063932051429743, "learning_rate": 4.234707205010851e-05, "loss": 0.236, "step": 9385 }, { "epoch": 1.1130084193051109, "grad_norm": 0.9415770148740454, "learning_rate": 4.234534349920558e-05, "loss": 0.1664, "step": 9386 }, { "epoch": 1.1131270010672358, "grad_norm": 1.4801871318378694, "learning_rate": 4.234361478839985e-05, "loss": 0.27, "step": 9387 }, { "epoch": 1.1132455828293608, "grad_norm": 1.3533804243634349, "learning_rate": 4.2341885917707256e-05, "loss": 0.3012, "step": 9388 }, { "epoch": 1.1133641645914858, "grad_norm": 1.1878874764927498, "learning_rate": 4.234015688714374e-05, "loss": 0.2827, "step": 9389 }, { "epoch": 1.1134827463536108, "grad_norm": 1.104991891328091, "learning_rate": 4.2338427696725245e-05, "loss": 0.2021, "step": 9390 }, { "epoch": 1.1136013281157358, "grad_norm": 1.0568907081559218, "learning_rate": 4.233669834646772e-05, "loss": 0.2498, "step": 9391 }, { "epoch": 1.1137199098778607, "grad_norm": 1.1869196093944383, "learning_rate": 4.233496883638708e-05, "loss": 0.2458, "step": 9392 }, { "epoch": 1.1138384916399857, "grad_norm": 0.9460801052239469, "learning_rate": 4.2333239166499284e-05, "loss": 0.1794, "step": 9393 }, { "epoch": 1.1139570734021107, "grad_norm": 1.11204845178699, "learning_rate": 4.2331509336820284e-05, "loss": 0.1973, "step": 9394 }, { "epoch": 1.1140756551642357, "grad_norm": 1.1705044798617903, "learning_rate": 4.232977934736602e-05, "loss": 0.2412, "step": 9395 }, { "epoch": 1.1141942369263607, "grad_norm": 1.4712884444335375, "learning_rate": 4.232804919815245e-05, "loss": 0.3128, "step": 9396 }, { "epoch": 1.1143128186884856, "grad_norm": 0.8508198771548285, "learning_rate": 4.232631888919551e-05, "loss": 0.1653, "step": 9397 }, { "epoch": 1.1144314004506106, "grad_norm": 1.3948802280579062, "learning_rate": 4.2324588420511145e-05, "loss": 0.3025, "step": 9398 }, { "epoch": 1.1145499822127356, "grad_norm": 1.2557174325111164, "learning_rate": 4.232285779211533e-05, "loss": 0.241, "step": 9399 }, { "epoch": 1.1146685639748606, "grad_norm": 1.5892370728618912, "learning_rate": 4.2321127004024014e-05, "loss": 0.3075, "step": 9400 }, { "epoch": 1.1147871457369856, "grad_norm": 1.0022969959599117, "learning_rate": 4.2319396056253134e-05, "loss": 0.1913, "step": 9401 }, { "epoch": 1.1149057274991105, "grad_norm": 1.2818853799490868, "learning_rate": 4.231766494881867e-05, "loss": 0.2249, "step": 9402 }, { "epoch": 1.1150243092612355, "grad_norm": 1.2097298036876527, "learning_rate": 4.231593368173658e-05, "loss": 0.2342, "step": 9403 }, { "epoch": 1.1151428910233605, "grad_norm": 0.9926001076374248, "learning_rate": 4.23142022550228e-05, "loss": 0.2513, "step": 9404 }, { "epoch": 1.1152614727854857, "grad_norm": 1.009162547971583, "learning_rate": 4.2312470668693314e-05, "loss": 0.2196, "step": 9405 }, { "epoch": 1.1153800545476105, "grad_norm": 1.07226202427023, "learning_rate": 4.231073892276408e-05, "loss": 0.2315, "step": 9406 }, { "epoch": 1.1154986363097357, "grad_norm": 1.2399523483983215, "learning_rate": 4.2309007017251056e-05, "loss": 0.2621, "step": 9407 }, { "epoch": 1.1156172180718607, "grad_norm": 1.4588646823172582, "learning_rate": 4.230727495217021e-05, "loss": 0.3572, "step": 9408 }, { "epoch": 1.1157357998339856, "grad_norm": 1.35815004987775, "learning_rate": 4.2305542727537515e-05, "loss": 0.3358, "step": 9409 }, { "epoch": 1.1158543815961106, "grad_norm": 0.9866226145159706, "learning_rate": 4.230381034336894e-05, "loss": 0.2286, "step": 9410 }, { "epoch": 1.1159729633582356, "grad_norm": 0.7855986223441359, "learning_rate": 4.2302077799680465e-05, "loss": 0.1689, "step": 9411 }, { "epoch": 1.1160915451203606, "grad_norm": 1.2162999648820363, "learning_rate": 4.230034509648804e-05, "loss": 0.2547, "step": 9412 }, { "epoch": 1.1162101268824856, "grad_norm": 1.0287737232251362, "learning_rate": 4.229861223380764e-05, "loss": 0.2105, "step": 9413 }, { "epoch": 1.1163287086446105, "grad_norm": 1.298022986063039, "learning_rate": 4.229687921165525e-05, "loss": 0.236, "step": 9414 }, { "epoch": 1.1164472904067355, "grad_norm": 1.265667465140052, "learning_rate": 4.2295146030046854e-05, "loss": 0.3069, "step": 9415 }, { "epoch": 1.1165658721688605, "grad_norm": 1.4455172004118615, "learning_rate": 4.2293412688998416e-05, "loss": 0.3132, "step": 9416 }, { "epoch": 1.1166844539309855, "grad_norm": 1.3317260469665473, "learning_rate": 4.229167918852592e-05, "loss": 0.3028, "step": 9417 }, { "epoch": 1.1168030356931105, "grad_norm": 0.956842399620342, "learning_rate": 4.2289945528645344e-05, "loss": 0.203, "step": 9418 }, { "epoch": 1.1169216174552354, "grad_norm": 1.134431042887857, "learning_rate": 4.2288211709372674e-05, "loss": 0.2265, "step": 9419 }, { "epoch": 1.1170401992173604, "grad_norm": 1.7916629101204904, "learning_rate": 4.228647773072389e-05, "loss": 0.4385, "step": 9420 }, { "epoch": 1.1171587809794854, "grad_norm": 1.4361868079613795, "learning_rate": 4.228474359271498e-05, "loss": 0.2951, "step": 9421 }, { "epoch": 1.1172773627416104, "grad_norm": 1.0682058176687264, "learning_rate": 4.228300929536193e-05, "loss": 0.2653, "step": 9422 }, { "epoch": 1.1173959445037354, "grad_norm": 1.0706852418572903, "learning_rate": 4.2281274838680735e-05, "loss": 0.1983, "step": 9423 }, { "epoch": 1.1175145262658603, "grad_norm": 0.9550560463854861, "learning_rate": 4.227954022268738e-05, "loss": 0.1798, "step": 9424 }, { "epoch": 1.1176331080279853, "grad_norm": 1.4683541252510492, "learning_rate": 4.227780544739783e-05, "loss": 0.2582, "step": 9425 }, { "epoch": 1.1177516897901103, "grad_norm": 1.5650915054174988, "learning_rate": 4.227607051282813e-05, "loss": 0.4009, "step": 9426 }, { "epoch": 1.1178702715522353, "grad_norm": 1.037122860398587, "learning_rate": 4.2274335418994224e-05, "loss": 0.2252, "step": 9427 }, { "epoch": 1.1179888533143603, "grad_norm": 1.1212525627206786, "learning_rate": 4.2272600165912144e-05, "loss": 0.2361, "step": 9428 }, { "epoch": 1.1181074350764852, "grad_norm": 1.6344693417473175, "learning_rate": 4.227086475359786e-05, "loss": 0.4027, "step": 9429 }, { "epoch": 1.1182260168386102, "grad_norm": 0.941061109470346, "learning_rate": 4.226912918206739e-05, "loss": 0.2002, "step": 9430 }, { "epoch": 1.1183445986007352, "grad_norm": 1.035921419363779, "learning_rate": 4.2267393451336724e-05, "loss": 0.2161, "step": 9431 }, { "epoch": 1.1184631803628602, "grad_norm": 1.0542905322076106, "learning_rate": 4.226565756142186e-05, "loss": 0.1981, "step": 9432 }, { "epoch": 1.1185817621249852, "grad_norm": 1.1377264948338868, "learning_rate": 4.226392151233881e-05, "loss": 0.2324, "step": 9433 }, { "epoch": 1.1187003438871101, "grad_norm": 1.122719189518195, "learning_rate": 4.2262185304103574e-05, "loss": 0.2762, "step": 9434 }, { "epoch": 1.1188189256492351, "grad_norm": 1.0785445076213456, "learning_rate": 4.2260448936732155e-05, "loss": 0.2352, "step": 9435 }, { "epoch": 1.11893750741136, "grad_norm": 0.8194734162702694, "learning_rate": 4.225871241024056e-05, "loss": 0.2154, "step": 9436 }, { "epoch": 1.119056089173485, "grad_norm": 1.1417342467989287, "learning_rate": 4.2256975724644815e-05, "loss": 0.2885, "step": 9437 }, { "epoch": 1.11917467093561, "grad_norm": 1.255196960447618, "learning_rate": 4.22552388799609e-05, "loss": 0.2737, "step": 9438 }, { "epoch": 1.119293252697735, "grad_norm": 0.9707589884965621, "learning_rate": 4.225350187620485e-05, "loss": 0.2033, "step": 9439 }, { "epoch": 1.11941183445986, "grad_norm": 1.2235711176442867, "learning_rate": 4.225176471339267e-05, "loss": 0.251, "step": 9440 }, { "epoch": 1.119530416221985, "grad_norm": 1.2315301315296676, "learning_rate": 4.225002739154038e-05, "loss": 0.2634, "step": 9441 }, { "epoch": 1.11964899798411, "grad_norm": 1.1656298272407657, "learning_rate": 4.2248289910663976e-05, "loss": 0.222, "step": 9442 }, { "epoch": 1.119767579746235, "grad_norm": 0.9635057965986775, "learning_rate": 4.2246552270779504e-05, "loss": 0.1756, "step": 9443 }, { "epoch": 1.11988616150836, "grad_norm": 1.0789148210403412, "learning_rate": 4.224481447190296e-05, "loss": 0.2206, "step": 9444 }, { "epoch": 1.120004743270485, "grad_norm": 1.8488426018393302, "learning_rate": 4.224307651405038e-05, "loss": 0.5383, "step": 9445 }, { "epoch": 1.12012332503261, "grad_norm": 1.2793495416222123, "learning_rate": 4.224133839723778e-05, "loss": 0.2208, "step": 9446 }, { "epoch": 1.120241906794735, "grad_norm": 1.470932349421977, "learning_rate": 4.223960012148118e-05, "loss": 0.3492, "step": 9447 }, { "epoch": 1.1203604885568599, "grad_norm": 1.565275508030098, "learning_rate": 4.223786168679661e-05, "loss": 0.3517, "step": 9448 }, { "epoch": 1.1204790703189849, "grad_norm": 0.9734316245000455, "learning_rate": 4.223612309320009e-05, "loss": 0.2507, "step": 9449 }, { "epoch": 1.1205976520811098, "grad_norm": 1.400912080661782, "learning_rate": 4.223438434070765e-05, "loss": 0.3433, "step": 9450 }, { "epoch": 1.1207162338432348, "grad_norm": 1.8926519354009306, "learning_rate": 4.223264542933532e-05, "loss": 0.4143, "step": 9451 }, { "epoch": 1.1208348156053598, "grad_norm": 0.9864296708791939, "learning_rate": 4.2230906359099136e-05, "loss": 0.2065, "step": 9452 }, { "epoch": 1.1209533973674848, "grad_norm": 1.101689237497081, "learning_rate": 4.222916713001512e-05, "loss": 0.281, "step": 9453 }, { "epoch": 1.1210719791296098, "grad_norm": 1.0995415390116752, "learning_rate": 4.222742774209932e-05, "loss": 0.2245, "step": 9454 }, { "epoch": 1.1211905608917347, "grad_norm": 1.1280014544469223, "learning_rate": 4.2225688195367755e-05, "loss": 0.2517, "step": 9455 }, { "epoch": 1.12130914265386, "grad_norm": 0.8191708169134806, "learning_rate": 4.2223948489836475e-05, "loss": 0.1978, "step": 9456 }, { "epoch": 1.1214277244159847, "grad_norm": 1.2390824850476863, "learning_rate": 4.22222086255215e-05, "loss": 0.2462, "step": 9457 }, { "epoch": 1.12154630617811, "grad_norm": 1.3340081564607897, "learning_rate": 4.222046860243889e-05, "loss": 0.2695, "step": 9458 }, { "epoch": 1.121664887940235, "grad_norm": 0.8523200799988407, "learning_rate": 4.221872842060467e-05, "loss": 0.1478, "step": 9459 }, { "epoch": 1.1217834697023599, "grad_norm": 1.5991545322615488, "learning_rate": 4.221698808003489e-05, "loss": 0.3828, "step": 9460 }, { "epoch": 1.1219020514644849, "grad_norm": 0.847421306682847, "learning_rate": 4.22152475807456e-05, "loss": 0.1735, "step": 9461 }, { "epoch": 1.1220206332266098, "grad_norm": 1.1153896131730445, "learning_rate": 4.2213506922752836e-05, "loss": 0.2368, "step": 9462 }, { "epoch": 1.1221392149887348, "grad_norm": 1.4103354425365464, "learning_rate": 4.2211766106072637e-05, "loss": 0.3396, "step": 9463 }, { "epoch": 1.1222577967508598, "grad_norm": 1.6252600445479943, "learning_rate": 4.221002513072106e-05, "loss": 0.3657, "step": 9464 }, { "epoch": 1.1223763785129848, "grad_norm": 1.4062158536725813, "learning_rate": 4.220828399671417e-05, "loss": 0.2796, "step": 9465 }, { "epoch": 1.1224949602751098, "grad_norm": 1.3783722370947131, "learning_rate": 4.2206542704068e-05, "loss": 0.3252, "step": 9466 }, { "epoch": 1.1226135420372347, "grad_norm": 1.0755219536313942, "learning_rate": 4.220480125279861e-05, "loss": 0.2281, "step": 9467 }, { "epoch": 1.1227321237993597, "grad_norm": 1.0249358945202947, "learning_rate": 4.220305964292204e-05, "loss": 0.2502, "step": 9468 }, { "epoch": 1.1228507055614847, "grad_norm": 1.014782925424238, "learning_rate": 4.2201317874454363e-05, "loss": 0.2281, "step": 9469 }, { "epoch": 1.1229692873236097, "grad_norm": 1.1291828665297095, "learning_rate": 4.2199575947411617e-05, "loss": 0.1808, "step": 9470 }, { "epoch": 1.1230878690857347, "grad_norm": 1.1030525076530564, "learning_rate": 4.2197833861809886e-05, "loss": 0.2661, "step": 9471 }, { "epoch": 1.1232064508478596, "grad_norm": 0.9824430815731175, "learning_rate": 4.219609161766521e-05, "loss": 0.1645, "step": 9472 }, { "epoch": 1.1233250326099846, "grad_norm": 1.3746952793411726, "learning_rate": 4.2194349214993656e-05, "loss": 0.2386, "step": 9473 }, { "epoch": 1.1234436143721096, "grad_norm": 1.3378276795758457, "learning_rate": 4.219260665381129e-05, "loss": 0.3062, "step": 9474 }, { "epoch": 1.1235621961342346, "grad_norm": 1.0161395882345192, "learning_rate": 4.2190863934134174e-05, "loss": 0.2225, "step": 9475 }, { "epoch": 1.1236807778963596, "grad_norm": 1.291029472157644, "learning_rate": 4.218912105597838e-05, "loss": 0.2624, "step": 9476 }, { "epoch": 1.1237993596584845, "grad_norm": 1.457849123181044, "learning_rate": 4.218737801935995e-05, "loss": 0.2934, "step": 9477 }, { "epoch": 1.1239179414206095, "grad_norm": 0.8935444942793107, "learning_rate": 4.218563482429498e-05, "loss": 0.1693, "step": 9478 }, { "epoch": 1.1240365231827345, "grad_norm": 0.9377205719367073, "learning_rate": 4.2183891470799536e-05, "loss": 0.1835, "step": 9479 }, { "epoch": 1.1241551049448595, "grad_norm": 1.181045941812752, "learning_rate": 4.2182147958889685e-05, "loss": 0.2492, "step": 9480 }, { "epoch": 1.1242736867069845, "grad_norm": 0.9478662631935663, "learning_rate": 4.2180404288581494e-05, "loss": 0.2, "step": 9481 }, { "epoch": 1.1243922684691094, "grad_norm": 1.023618891006282, "learning_rate": 4.217866045989105e-05, "loss": 0.2594, "step": 9482 }, { "epoch": 1.1245108502312344, "grad_norm": 1.38338608803272, "learning_rate": 4.2176916472834414e-05, "loss": 0.296, "step": 9483 }, { "epoch": 1.1246294319933594, "grad_norm": 0.9512473885866779, "learning_rate": 4.217517232742768e-05, "loss": 0.2053, "step": 9484 }, { "epoch": 1.1247480137554844, "grad_norm": 1.5949051944152366, "learning_rate": 4.217342802368691e-05, "loss": 0.3626, "step": 9485 }, { "epoch": 1.1248665955176094, "grad_norm": 1.1557801894731636, "learning_rate": 4.2171683561628206e-05, "loss": 0.233, "step": 9486 }, { "epoch": 1.1249851772797343, "grad_norm": 1.0951945761196924, "learning_rate": 4.216993894126763e-05, "loss": 0.2461, "step": 9487 }, { "epoch": 1.1251037590418593, "grad_norm": 1.2276149908987795, "learning_rate": 4.2168194162621263e-05, "loss": 0.2507, "step": 9488 }, { "epoch": 1.1252223408039843, "grad_norm": 1.0878350625309503, "learning_rate": 4.2166449225705214e-05, "loss": 0.1807, "step": 9489 }, { "epoch": 1.1253409225661093, "grad_norm": 1.2118505474902643, "learning_rate": 4.216470413053555e-05, "loss": 0.2719, "step": 9490 }, { "epoch": 1.1254595043282343, "grad_norm": 1.1973040154396364, "learning_rate": 4.2162958877128354e-05, "loss": 0.2641, "step": 9491 }, { "epoch": 1.1255780860903593, "grad_norm": 1.0506003237249484, "learning_rate": 4.216121346549973e-05, "loss": 0.2018, "step": 9492 }, { "epoch": 1.1256966678524842, "grad_norm": 1.9136858208945668, "learning_rate": 4.215946789566576e-05, "loss": 0.4221, "step": 9493 }, { "epoch": 1.1258152496146092, "grad_norm": 1.1709987133461066, "learning_rate": 4.215772216764254e-05, "loss": 0.2251, "step": 9494 }, { "epoch": 1.1259338313767342, "grad_norm": 1.145604640747553, "learning_rate": 4.215597628144616e-05, "loss": 0.2274, "step": 9495 }, { "epoch": 1.1260524131388592, "grad_norm": 1.0202132638577668, "learning_rate": 4.215423023709272e-05, "loss": 0.2514, "step": 9496 }, { "epoch": 1.1261709949009842, "grad_norm": 1.0252527934689764, "learning_rate": 4.215248403459832e-05, "loss": 0.2196, "step": 9497 }, { "epoch": 1.1262895766631091, "grad_norm": 1.0970065037496002, "learning_rate": 4.215073767397904e-05, "loss": 0.2066, "step": 9498 }, { "epoch": 1.1264081584252341, "grad_norm": 1.277479675639411, "learning_rate": 4.2148991155250996e-05, "loss": 0.2971, "step": 9499 }, { "epoch": 1.126526740187359, "grad_norm": 0.9927236121404561, "learning_rate": 4.214724447843028e-05, "loss": 0.2231, "step": 9500 }, { "epoch": 1.126645321949484, "grad_norm": 1.3171319767368792, "learning_rate": 4.2145497643532994e-05, "loss": 0.2953, "step": 9501 }, { "epoch": 1.126763903711609, "grad_norm": 1.2263969195848192, "learning_rate": 4.2143750650575254e-05, "loss": 0.2591, "step": 9502 }, { "epoch": 1.1268824854737343, "grad_norm": 1.364307485943857, "learning_rate": 4.2142003499573147e-05, "loss": 0.2547, "step": 9503 }, { "epoch": 1.127001067235859, "grad_norm": 1.9876024658814844, "learning_rate": 4.214025619054279e-05, "loss": 0.3253, "step": 9504 }, { "epoch": 1.1271196489979842, "grad_norm": 1.1343553708268583, "learning_rate": 4.213850872350029e-05, "loss": 0.2207, "step": 9505 }, { "epoch": 1.127238230760109, "grad_norm": 0.9945084851093255, "learning_rate": 4.2136761098461766e-05, "loss": 0.1504, "step": 9506 }, { "epoch": 1.1273568125222342, "grad_norm": 1.1092792721881386, "learning_rate": 4.213501331544331e-05, "loss": 0.2354, "step": 9507 }, { "epoch": 1.127475394284359, "grad_norm": 1.3410752683953866, "learning_rate": 4.213326537446104e-05, "loss": 0.2537, "step": 9508 }, { "epoch": 1.1275939760464841, "grad_norm": 1.1883038857120036, "learning_rate": 4.213151727553108e-05, "loss": 0.2682, "step": 9509 }, { "epoch": 1.1277125578086091, "grad_norm": 1.470514039271162, "learning_rate": 4.212976901866954e-05, "loss": 0.3706, "step": 9510 }, { "epoch": 1.127831139570734, "grad_norm": 1.2652989776827253, "learning_rate": 4.212802060389253e-05, "loss": 0.2781, "step": 9511 }, { "epoch": 1.127949721332859, "grad_norm": 1.6635125300242328, "learning_rate": 4.212627203121617e-05, "loss": 0.4596, "step": 9512 }, { "epoch": 1.128068303094984, "grad_norm": 1.0303967704026114, "learning_rate": 4.212452330065659e-05, "loss": 0.1951, "step": 9513 }, { "epoch": 1.128186884857109, "grad_norm": 0.9587794202929916, "learning_rate": 4.2122774412229914e-05, "loss": 0.144, "step": 9514 }, { "epoch": 1.128305466619234, "grad_norm": 1.285648178940054, "learning_rate": 4.2121025365952244e-05, "loss": 0.2962, "step": 9515 }, { "epoch": 1.128424048381359, "grad_norm": 0.9857441434565866, "learning_rate": 4.2119276161839715e-05, "loss": 0.2392, "step": 9516 }, { "epoch": 1.128542630143484, "grad_norm": 1.1940985457581383, "learning_rate": 4.2117526799908455e-05, "loss": 0.3145, "step": 9517 }, { "epoch": 1.128661211905609, "grad_norm": 1.188513840834025, "learning_rate": 4.211577728017459e-05, "loss": 0.243, "step": 9518 }, { "epoch": 1.128779793667734, "grad_norm": 1.3144092783553007, "learning_rate": 4.211402760265425e-05, "loss": 0.3017, "step": 9519 }, { "epoch": 1.128898375429859, "grad_norm": 0.974110437345182, "learning_rate": 4.211227776736355e-05, "loss": 0.2307, "step": 9520 }, { "epoch": 1.129016957191984, "grad_norm": 0.978076044607706, "learning_rate": 4.211052777431865e-05, "loss": 0.2176, "step": 9521 }, { "epoch": 1.129135538954109, "grad_norm": 1.042448048296959, "learning_rate": 4.2108777623535657e-05, "loss": 0.233, "step": 9522 }, { "epoch": 1.1292541207162339, "grad_norm": 1.278477695545076, "learning_rate": 4.210702731503071e-05, "loss": 0.2548, "step": 9523 }, { "epoch": 1.1293727024783589, "grad_norm": 1.1975634102002457, "learning_rate": 4.2105276848819956e-05, "loss": 0.2135, "step": 9524 }, { "epoch": 1.1294912842404838, "grad_norm": 0.98552305655481, "learning_rate": 4.210352622491953e-05, "loss": 0.229, "step": 9525 }, { "epoch": 1.1296098660026088, "grad_norm": 1.0491441536666648, "learning_rate": 4.210177544334555e-05, "loss": 0.2579, "step": 9526 }, { "epoch": 1.1297284477647338, "grad_norm": 1.1252154564018293, "learning_rate": 4.2100024504114185e-05, "loss": 0.213, "step": 9527 }, { "epoch": 1.1298470295268588, "grad_norm": 0.8729775627013676, "learning_rate": 4.2098273407241565e-05, "loss": 0.1629, "step": 9528 }, { "epoch": 1.1299656112889838, "grad_norm": 0.9234447564164187, "learning_rate": 4.2096522152743824e-05, "loss": 0.1628, "step": 9529 }, { "epoch": 1.1300841930511087, "grad_norm": 1.2140585616289477, "learning_rate": 4.209477074063711e-05, "loss": 0.1852, "step": 9530 }, { "epoch": 1.1302027748132337, "grad_norm": 1.015256998014857, "learning_rate": 4.209301917093759e-05, "loss": 0.2104, "step": 9531 }, { "epoch": 1.1303213565753587, "grad_norm": 0.9750203100611816, "learning_rate": 4.2091267443661375e-05, "loss": 0.2307, "step": 9532 }, { "epoch": 1.1304399383374837, "grad_norm": 1.188611217910767, "learning_rate": 4.2089515558824646e-05, "loss": 0.2329, "step": 9533 }, { "epoch": 1.1305585200996087, "grad_norm": 1.224883317964741, "learning_rate": 4.2087763516443536e-05, "loss": 0.2325, "step": 9534 }, { "epoch": 1.1306771018617336, "grad_norm": 1.5112535019283295, "learning_rate": 4.20860113165342e-05, "loss": 0.341, "step": 9535 }, { "epoch": 1.1307956836238586, "grad_norm": 1.2058590230305346, "learning_rate": 4.2084258959112785e-05, "loss": 0.2811, "step": 9536 }, { "epoch": 1.1309142653859836, "grad_norm": 1.3351531074481564, "learning_rate": 4.208250644419546e-05, "loss": 0.2721, "step": 9537 }, { "epoch": 1.1310328471481086, "grad_norm": 1.6231952693205658, "learning_rate": 4.208075377179837e-05, "loss": 0.3286, "step": 9538 }, { "epoch": 1.1311514289102336, "grad_norm": 1.0566508518965614, "learning_rate": 4.2079000941937685e-05, "loss": 0.234, "step": 9539 }, { "epoch": 1.1312700106723586, "grad_norm": 1.089411180702893, "learning_rate": 4.207724795462955e-05, "loss": 0.2016, "step": 9540 }, { "epoch": 1.1313885924344835, "grad_norm": 0.8923797106022836, "learning_rate": 4.207549480989013e-05, "loss": 0.195, "step": 9541 }, { "epoch": 1.1315071741966085, "grad_norm": 1.401385264675306, "learning_rate": 4.2073741507735586e-05, "loss": 0.3539, "step": 9542 }, { "epoch": 1.1316257559587335, "grad_norm": 1.147712283664233, "learning_rate": 4.207198804818209e-05, "loss": 0.2629, "step": 9543 }, { "epoch": 1.1317443377208585, "grad_norm": 1.3836353840532343, "learning_rate": 4.2070234431245794e-05, "loss": 0.3229, "step": 9544 }, { "epoch": 1.1318629194829835, "grad_norm": 1.3996515972963874, "learning_rate": 4.206848065694286e-05, "loss": 0.2966, "step": 9545 }, { "epoch": 1.1319815012451084, "grad_norm": 1.253022189614683, "learning_rate": 4.206672672528947e-05, "loss": 0.3101, "step": 9546 }, { "epoch": 1.1321000830072334, "grad_norm": 0.8757351680813305, "learning_rate": 4.20649726363018e-05, "loss": 0.2038, "step": 9547 }, { "epoch": 1.1322186647693584, "grad_norm": 1.0727111538596061, "learning_rate": 4.2063218389995995e-05, "loss": 0.2097, "step": 9548 }, { "epoch": 1.1323372465314834, "grad_norm": 1.183969955631657, "learning_rate": 4.206146398638825e-05, "loss": 0.2514, "step": 9549 }, { "epoch": 1.1324558282936084, "grad_norm": 1.561851846783253, "learning_rate": 4.205970942549472e-05, "loss": 0.3829, "step": 9550 }, { "epoch": 1.1325744100557333, "grad_norm": 0.8617201147556757, "learning_rate": 4.205795470733159e-05, "loss": 0.2066, "step": 9551 }, { "epoch": 1.1326929918178583, "grad_norm": 1.1490108560894308, "learning_rate": 4.2056199831915045e-05, "loss": 0.2834, "step": 9552 }, { "epoch": 1.1328115735799833, "grad_norm": 1.0567016518899273, "learning_rate": 4.2054444799261253e-05, "loss": 0.2268, "step": 9553 }, { "epoch": 1.1329301553421085, "grad_norm": 1.3169108425541456, "learning_rate": 4.205268960938638e-05, "loss": 0.2654, "step": 9554 }, { "epoch": 1.1330487371042333, "grad_norm": 1.0324119472935256, "learning_rate": 4.205093426230663e-05, "loss": 0.2512, "step": 9555 }, { "epoch": 1.1331673188663585, "grad_norm": 1.018042423918306, "learning_rate": 4.2049178758038174e-05, "loss": 0.1959, "step": 9556 }, { "epoch": 1.1332859006284832, "grad_norm": 1.1911158289418267, "learning_rate": 4.204742309659719e-05, "loss": 0.264, "step": 9557 }, { "epoch": 1.1334044823906084, "grad_norm": 0.902694393814431, "learning_rate": 4.204566727799988e-05, "loss": 0.2082, "step": 9558 }, { "epoch": 1.1335230641527334, "grad_norm": 1.1052615841407465, "learning_rate": 4.204391130226242e-05, "loss": 0.2447, "step": 9559 }, { "epoch": 1.1336416459148584, "grad_norm": 1.1963124685820308, "learning_rate": 4.2042155169401e-05, "loss": 0.2321, "step": 9560 }, { "epoch": 1.1337602276769834, "grad_norm": 1.116310676071142, "learning_rate": 4.2040398879431794e-05, "loss": 0.2254, "step": 9561 }, { "epoch": 1.1338788094391083, "grad_norm": 1.0201071065009046, "learning_rate": 4.203864243237102e-05, "loss": 0.2199, "step": 9562 }, { "epoch": 1.1339973912012333, "grad_norm": 1.1849076342217482, "learning_rate": 4.2036885828234856e-05, "loss": 0.2684, "step": 9563 }, { "epoch": 1.1341159729633583, "grad_norm": 1.238528657583238, "learning_rate": 4.203512906703949e-05, "loss": 0.242, "step": 9564 }, { "epoch": 1.1342345547254833, "grad_norm": 1.0563521716306172, "learning_rate": 4.203337214880113e-05, "loss": 0.21, "step": 9565 }, { "epoch": 1.1343531364876083, "grad_norm": 1.1532044032164726, "learning_rate": 4.2031615073535965e-05, "loss": 0.2708, "step": 9566 }, { "epoch": 1.1344717182497333, "grad_norm": 1.1309636208754372, "learning_rate": 4.202985784126019e-05, "loss": 0.2449, "step": 9567 }, { "epoch": 1.1345903000118582, "grad_norm": 1.4646997201424854, "learning_rate": 4.202810045199001e-05, "loss": 0.3174, "step": 9568 }, { "epoch": 1.1347088817739832, "grad_norm": 1.4811087011274067, "learning_rate": 4.202634290574163e-05, "loss": 0.2856, "step": 9569 }, { "epoch": 1.1348274635361082, "grad_norm": 1.6831835754356492, "learning_rate": 4.2024585202531246e-05, "loss": 0.3862, "step": 9570 }, { "epoch": 1.1349460452982332, "grad_norm": 1.2042969711393754, "learning_rate": 4.202282734237506e-05, "loss": 0.3111, "step": 9571 }, { "epoch": 1.1350646270603582, "grad_norm": 1.262085680311482, "learning_rate": 4.202106932528929e-05, "loss": 0.2455, "step": 9572 }, { "epoch": 1.1351832088224831, "grad_norm": 1.5966754916798365, "learning_rate": 4.2019311151290116e-05, "loss": 0.3011, "step": 9573 }, { "epoch": 1.1353017905846081, "grad_norm": 1.1659659255569643, "learning_rate": 4.201755282039378e-05, "loss": 0.2475, "step": 9574 }, { "epoch": 1.135420372346733, "grad_norm": 1.1686446104522077, "learning_rate": 4.201579433261647e-05, "loss": 0.2568, "step": 9575 }, { "epoch": 1.135538954108858, "grad_norm": 0.7386834513813855, "learning_rate": 4.201403568797441e-05, "loss": 0.1647, "step": 9576 }, { "epoch": 1.135657535870983, "grad_norm": 0.8415000485117524, "learning_rate": 4.201227688648379e-05, "loss": 0.1841, "step": 9577 }, { "epoch": 1.135776117633108, "grad_norm": 0.9381977190076284, "learning_rate": 4.201051792816085e-05, "loss": 0.1888, "step": 9578 }, { "epoch": 1.135894699395233, "grad_norm": 1.1067299226740845, "learning_rate": 4.2008758813021784e-05, "loss": 0.2386, "step": 9579 }, { "epoch": 1.136013281157358, "grad_norm": 1.1754939672333016, "learning_rate": 4.200699954108283e-05, "loss": 0.2551, "step": 9580 }, { "epoch": 1.136131862919483, "grad_norm": 1.307308090706873, "learning_rate": 4.2005240112360186e-05, "loss": 0.2559, "step": 9581 }, { "epoch": 1.136250444681608, "grad_norm": 1.120992503095006, "learning_rate": 4.200348052687009e-05, "loss": 0.2629, "step": 9582 }, { "epoch": 1.136369026443733, "grad_norm": 0.9921956894919534, "learning_rate": 4.200172078462875e-05, "loss": 0.2325, "step": 9583 }, { "epoch": 1.136487608205858, "grad_norm": 1.0673488629815806, "learning_rate": 4.199996088565239e-05, "loss": 0.2488, "step": 9584 }, { "epoch": 1.136606189967983, "grad_norm": 1.2691425838262738, "learning_rate": 4.1998200829957245e-05, "loss": 0.2912, "step": 9585 }, { "epoch": 1.1367247717301079, "grad_norm": 1.0965999845803793, "learning_rate": 4.199644061755953e-05, "loss": 0.2255, "step": 9586 }, { "epoch": 1.1368433534922329, "grad_norm": 1.567321678194128, "learning_rate": 4.1994680248475473e-05, "loss": 0.3782, "step": 9587 }, { "epoch": 1.1369619352543578, "grad_norm": 1.7361132592874282, "learning_rate": 4.199291972272131e-05, "loss": 0.3626, "step": 9588 }, { "epoch": 1.1370805170164828, "grad_norm": 1.1186819039045572, "learning_rate": 4.199115904031326e-05, "loss": 0.2095, "step": 9589 }, { "epoch": 1.1371990987786078, "grad_norm": 1.197814395753835, "learning_rate": 4.198939820126756e-05, "loss": 0.1897, "step": 9590 }, { "epoch": 1.1373176805407328, "grad_norm": 0.9863672280123341, "learning_rate": 4.198763720560044e-05, "loss": 0.1915, "step": 9591 }, { "epoch": 1.1374362623028578, "grad_norm": 1.1899704416817065, "learning_rate": 4.1985876053328146e-05, "loss": 0.2432, "step": 9592 }, { "epoch": 1.1375548440649828, "grad_norm": 1.2791669673584152, "learning_rate": 4.1984114744466895e-05, "loss": 0.2516, "step": 9593 }, { "epoch": 1.1376734258271077, "grad_norm": 1.5714901633926353, "learning_rate": 4.198235327903294e-05, "loss": 0.4119, "step": 9594 }, { "epoch": 1.1377920075892327, "grad_norm": 1.3336019214432762, "learning_rate": 4.19805916570425e-05, "loss": 0.2824, "step": 9595 }, { "epoch": 1.1379105893513577, "grad_norm": 0.8608551177534084, "learning_rate": 4.197882987851185e-05, "loss": 0.1853, "step": 9596 }, { "epoch": 1.1380291711134827, "grad_norm": 1.050869385855372, "learning_rate": 4.197706794345719e-05, "loss": 0.2448, "step": 9597 }, { "epoch": 1.1381477528756077, "grad_norm": 1.4875774953492171, "learning_rate": 4.1975305851894786e-05, "loss": 0.3488, "step": 9598 }, { "epoch": 1.1382663346377326, "grad_norm": 1.5861479695328355, "learning_rate": 4.197354360384088e-05, "loss": 0.3372, "step": 9599 }, { "epoch": 1.1383849163998576, "grad_norm": 1.7164875957372139, "learning_rate": 4.197178119931172e-05, "loss": 0.334, "step": 9600 }, { "epoch": 1.1385034981619826, "grad_norm": 1.0260462479222048, "learning_rate": 4.197001863832355e-05, "loss": 0.2566, "step": 9601 }, { "epoch": 1.1386220799241076, "grad_norm": 1.210948758665711, "learning_rate": 4.1968255920892614e-05, "loss": 0.2843, "step": 9602 }, { "epoch": 1.1387406616862328, "grad_norm": 1.4568212438871426, "learning_rate": 4.196649304703516e-05, "loss": 0.2899, "step": 9603 }, { "epoch": 1.1388592434483575, "grad_norm": 1.0234970520188253, "learning_rate": 4.196473001676746e-05, "loss": 0.2393, "step": 9604 }, { "epoch": 1.1389778252104827, "grad_norm": 1.0343728187165242, "learning_rate": 4.1962966830105744e-05, "loss": 0.1754, "step": 9605 }, { "epoch": 1.1390964069726075, "grad_norm": 1.0023079223415905, "learning_rate": 4.196120348706628e-05, "loss": 0.1937, "step": 9606 }, { "epoch": 1.1392149887347327, "grad_norm": 1.661691120208774, "learning_rate": 4.195943998766531e-05, "loss": 0.3357, "step": 9607 }, { "epoch": 1.1393335704968575, "grad_norm": 1.0633490580018665, "learning_rate": 4.1957676331919104e-05, "loss": 0.2522, "step": 9608 }, { "epoch": 1.1394521522589827, "grad_norm": 0.803769495005734, "learning_rate": 4.195591251984393e-05, "loss": 0.2054, "step": 9609 }, { "epoch": 1.1395707340211076, "grad_norm": 1.3841534324338196, "learning_rate": 4.195414855145602e-05, "loss": 0.2764, "step": 9610 }, { "epoch": 1.1396893157832326, "grad_norm": 1.5709376501502126, "learning_rate": 4.195238442677166e-05, "loss": 0.3666, "step": 9611 }, { "epoch": 1.1398078975453576, "grad_norm": 0.9138681078302203, "learning_rate": 4.19506201458071e-05, "loss": 0.2376, "step": 9612 }, { "epoch": 1.1399264793074826, "grad_norm": 1.1423654455587167, "learning_rate": 4.1948855708578606e-05, "loss": 0.2838, "step": 9613 }, { "epoch": 1.1400450610696076, "grad_norm": 0.9160321897640372, "learning_rate": 4.1947091115102446e-05, "loss": 0.2102, "step": 9614 }, { "epoch": 1.1401636428317325, "grad_norm": 1.7556103785568193, "learning_rate": 4.194532636539489e-05, "loss": 0.4117, "step": 9615 }, { "epoch": 1.1402822245938575, "grad_norm": 1.593385660312138, "learning_rate": 4.1943561459472205e-05, "loss": 0.4248, "step": 9616 }, { "epoch": 1.1404008063559825, "grad_norm": 1.057610929153699, "learning_rate": 4.1941796397350665e-05, "loss": 0.2348, "step": 9617 }, { "epoch": 1.1405193881181075, "grad_norm": 1.5464695195585707, "learning_rate": 4.194003117904653e-05, "loss": 0.3333, "step": 9618 }, { "epoch": 1.1406379698802325, "grad_norm": 1.2185727957403742, "learning_rate": 4.193826580457609e-05, "loss": 0.2235, "step": 9619 }, { "epoch": 1.1407565516423575, "grad_norm": 1.1896955917140066, "learning_rate": 4.19365002739556e-05, "loss": 0.2704, "step": 9620 }, { "epoch": 1.1408751334044824, "grad_norm": 1.0562558401923945, "learning_rate": 4.193473458720135e-05, "loss": 0.229, "step": 9621 }, { "epoch": 1.1409937151666074, "grad_norm": 1.0066642658022935, "learning_rate": 4.193296874432961e-05, "loss": 0.2498, "step": 9622 }, { "epoch": 1.1411122969287324, "grad_norm": 1.3317183331496318, "learning_rate": 4.1931202745356665e-05, "loss": 0.353, "step": 9623 }, { "epoch": 1.1412308786908574, "grad_norm": 1.1823374380379026, "learning_rate": 4.192943659029881e-05, "loss": 0.2581, "step": 9624 }, { "epoch": 1.1413494604529824, "grad_norm": 1.028652549460109, "learning_rate": 4.1927670279172294e-05, "loss": 0.2389, "step": 9625 }, { "epoch": 1.1414680422151073, "grad_norm": 0.9556883308619061, "learning_rate": 4.1925903811993416e-05, "loss": 0.1688, "step": 9626 }, { "epoch": 1.1415866239772323, "grad_norm": 1.3017711349919292, "learning_rate": 4.192413718877846e-05, "loss": 0.1886, "step": 9627 }, { "epoch": 1.1417052057393573, "grad_norm": 0.978280168234373, "learning_rate": 4.192237040954372e-05, "loss": 0.2512, "step": 9628 }, { "epoch": 1.1418237875014823, "grad_norm": 1.0990891187615237, "learning_rate": 4.1920603474305465e-05, "loss": 0.1985, "step": 9629 }, { "epoch": 1.1419423692636073, "grad_norm": 1.3484275075951457, "learning_rate": 4.191883638308001e-05, "loss": 0.314, "step": 9630 }, { "epoch": 1.1420609510257322, "grad_norm": 1.6838172421743567, "learning_rate": 4.191706913588362e-05, "loss": 0.316, "step": 9631 }, { "epoch": 1.1421795327878572, "grad_norm": 1.1578423336095374, "learning_rate": 4.1915301732732605e-05, "loss": 0.2099, "step": 9632 }, { "epoch": 1.1422981145499822, "grad_norm": 1.0051581195829182, "learning_rate": 4.191353417364324e-05, "loss": 0.2551, "step": 9633 }, { "epoch": 1.1424166963121072, "grad_norm": 1.18736384805768, "learning_rate": 4.191176645863184e-05, "loss": 0.2448, "step": 9634 }, { "epoch": 1.1425352780742322, "grad_norm": 1.2802547233398902, "learning_rate": 4.190999858771469e-05, "loss": 0.3054, "step": 9635 }, { "epoch": 1.1426538598363571, "grad_norm": 1.1671891164888442, "learning_rate": 4.1908230560908086e-05, "loss": 0.2145, "step": 9636 }, { "epoch": 1.1427724415984821, "grad_norm": 0.9044852976777734, "learning_rate": 4.190646237822833e-05, "loss": 0.1591, "step": 9637 }, { "epoch": 1.142891023360607, "grad_norm": 0.9705924900045289, "learning_rate": 4.190469403969173e-05, "loss": 0.2228, "step": 9638 }, { "epoch": 1.143009605122732, "grad_norm": 1.3543573290259971, "learning_rate": 4.190292554531458e-05, "loss": 0.2791, "step": 9639 }, { "epoch": 1.143128186884857, "grad_norm": 1.06463583370425, "learning_rate": 4.190115689511318e-05, "loss": 0.2715, "step": 9640 }, { "epoch": 1.143246768646982, "grad_norm": 1.6372946834206963, "learning_rate": 4.189938808910384e-05, "loss": 0.3417, "step": 9641 }, { "epoch": 1.143365350409107, "grad_norm": 0.9512463580407413, "learning_rate": 4.189761912730286e-05, "loss": 0.2276, "step": 9642 }, { "epoch": 1.143483932171232, "grad_norm": 0.8910832626881221, "learning_rate": 4.1895850009726564e-05, "loss": 0.1938, "step": 9643 }, { "epoch": 1.143602513933357, "grad_norm": 1.202146728787868, "learning_rate": 4.189408073639124e-05, "loss": 0.225, "step": 9644 }, { "epoch": 1.143721095695482, "grad_norm": 0.997064125410387, "learning_rate": 4.189231130731321e-05, "loss": 0.2005, "step": 9645 }, { "epoch": 1.143839677457607, "grad_norm": 1.6341589767831413, "learning_rate": 4.189054172250879e-05, "loss": 0.3481, "step": 9646 }, { "epoch": 1.143958259219732, "grad_norm": 0.9985844789141011, "learning_rate": 4.188877198199429e-05, "loss": 0.2219, "step": 9647 }, { "epoch": 1.144076840981857, "grad_norm": 1.5725267493933235, "learning_rate": 4.188700208578601e-05, "loss": 0.3693, "step": 9648 }, { "epoch": 1.144195422743982, "grad_norm": 1.4405483176444003, "learning_rate": 4.188523203390029e-05, "loss": 0.3217, "step": 9649 }, { "epoch": 1.1443140045061069, "grad_norm": 1.1204798776371678, "learning_rate": 4.188346182635343e-05, "loss": 0.2644, "step": 9650 }, { "epoch": 1.1444325862682319, "grad_norm": 0.8976956338231642, "learning_rate": 4.188169146316177e-05, "loss": 0.1998, "step": 9651 }, { "epoch": 1.1445511680303568, "grad_norm": 0.9660082546175005, "learning_rate": 4.1879920944341597e-05, "loss": 0.2122, "step": 9652 }, { "epoch": 1.1446697497924818, "grad_norm": 1.4408541033882343, "learning_rate": 4.187815026990926e-05, "loss": 0.2839, "step": 9653 }, { "epoch": 1.144788331554607, "grad_norm": 0.8218421461902522, "learning_rate": 4.187637943988108e-05, "loss": 0.1677, "step": 9654 }, { "epoch": 1.1449069133167318, "grad_norm": 1.0757035127994463, "learning_rate": 4.187460845427337e-05, "loss": 0.2009, "step": 9655 }, { "epoch": 1.145025495078857, "grad_norm": 1.511063346253126, "learning_rate": 4.187283731310247e-05, "loss": 0.3588, "step": 9656 }, { "epoch": 1.1451440768409817, "grad_norm": 1.3304812910171966, "learning_rate": 4.1871066016384694e-05, "loss": 0.2471, "step": 9657 }, { "epoch": 1.145262658603107, "grad_norm": 1.0134322941260123, "learning_rate": 4.1869294564136383e-05, "loss": 0.2195, "step": 9658 }, { "epoch": 1.145381240365232, "grad_norm": 1.0809127651312571, "learning_rate": 4.1867522956373854e-05, "loss": 0.1924, "step": 9659 }, { "epoch": 1.145499822127357, "grad_norm": 1.2802743484357586, "learning_rate": 4.186575119311346e-05, "loss": 0.2419, "step": 9660 }, { "epoch": 1.1456184038894819, "grad_norm": 1.274586313360551, "learning_rate": 4.186397927437151e-05, "loss": 0.2481, "step": 9661 }, { "epoch": 1.1457369856516069, "grad_norm": 1.2764285283071424, "learning_rate": 4.186220720016436e-05, "loss": 0.2441, "step": 9662 }, { "epoch": 1.1458555674137318, "grad_norm": 1.1555743912083654, "learning_rate": 4.186043497050833e-05, "loss": 0.2376, "step": 9663 }, { "epoch": 1.1459741491758568, "grad_norm": 1.572899136117067, "learning_rate": 4.185866258541977e-05, "loss": 0.3171, "step": 9664 }, { "epoch": 1.1460927309379818, "grad_norm": 1.1180596797838855, "learning_rate": 4.185689004491502e-05, "loss": 0.2574, "step": 9665 }, { "epoch": 1.1462113127001068, "grad_norm": 1.1042516220423861, "learning_rate": 4.185511734901041e-05, "loss": 0.2113, "step": 9666 }, { "epoch": 1.1463298944622318, "grad_norm": 0.9760002736847903, "learning_rate": 4.185334449772228e-05, "loss": 0.2148, "step": 9667 }, { "epoch": 1.1464484762243567, "grad_norm": 1.0250787578744802, "learning_rate": 4.185157149106699e-05, "loss": 0.2324, "step": 9668 }, { "epoch": 1.1465670579864817, "grad_norm": 1.0226348762612674, "learning_rate": 4.184979832906088e-05, "loss": 0.2139, "step": 9669 }, { "epoch": 1.1466856397486067, "grad_norm": 1.241533754088076, "learning_rate": 4.184802501172028e-05, "loss": 0.2974, "step": 9670 }, { "epoch": 1.1468042215107317, "grad_norm": 1.6700638512684611, "learning_rate": 4.184625153906155e-05, "loss": 0.3145, "step": 9671 }, { "epoch": 1.1469228032728567, "grad_norm": 1.1324410861247851, "learning_rate": 4.184447791110104e-05, "loss": 0.253, "step": 9672 }, { "epoch": 1.1470413850349817, "grad_norm": 1.1366312943049606, "learning_rate": 4.1842704127855106e-05, "loss": 0.2645, "step": 9673 }, { "epoch": 1.1471599667971066, "grad_norm": 0.9839740839698634, "learning_rate": 4.18409301893401e-05, "loss": 0.1953, "step": 9674 }, { "epoch": 1.1472785485592316, "grad_norm": 1.0306888482476495, "learning_rate": 4.183915609557236e-05, "loss": 0.1884, "step": 9675 }, { "epoch": 1.1473971303213566, "grad_norm": 0.9410340946536296, "learning_rate": 4.1837381846568246e-05, "loss": 0.242, "step": 9676 }, { "epoch": 1.1475157120834816, "grad_norm": 1.1464753188951873, "learning_rate": 4.183560744234413e-05, "loss": 0.2693, "step": 9677 }, { "epoch": 1.1476342938456066, "grad_norm": 1.0778195080349073, "learning_rate": 4.1833832882916346e-05, "loss": 0.2208, "step": 9678 }, { "epoch": 1.1477528756077315, "grad_norm": 1.7559538043195355, "learning_rate": 4.1832058168301266e-05, "loss": 0.2907, "step": 9679 }, { "epoch": 1.1478714573698565, "grad_norm": 1.0785611852367463, "learning_rate": 4.183028329851526e-05, "loss": 0.2656, "step": 9680 }, { "epoch": 1.1479900391319815, "grad_norm": 1.1450716076064014, "learning_rate": 4.182850827357467e-05, "loss": 0.2628, "step": 9681 }, { "epoch": 1.1481086208941065, "grad_norm": 0.9655641781103969, "learning_rate": 4.1826733093495884e-05, "loss": 0.1816, "step": 9682 }, { "epoch": 1.1482272026562315, "grad_norm": 1.7014758720187313, "learning_rate": 4.1824957758295244e-05, "loss": 0.3545, "step": 9683 }, { "epoch": 1.1483457844183564, "grad_norm": 0.9986963102955382, "learning_rate": 4.182318226798913e-05, "loss": 0.225, "step": 9684 }, { "epoch": 1.1484643661804814, "grad_norm": 1.2945107823339637, "learning_rate": 4.1821406622593904e-05, "loss": 0.2757, "step": 9685 }, { "epoch": 1.1485829479426064, "grad_norm": 1.143919673027592, "learning_rate": 4.1819630822125934e-05, "loss": 0.3033, "step": 9686 }, { "epoch": 1.1487015297047314, "grad_norm": 0.8922398897989006, "learning_rate": 4.1817854866601596e-05, "loss": 0.2158, "step": 9687 }, { "epoch": 1.1488201114668564, "grad_norm": 1.1197180930064663, "learning_rate": 4.1816078756037256e-05, "loss": 0.249, "step": 9688 }, { "epoch": 1.1489386932289813, "grad_norm": 1.0191240075722947, "learning_rate": 4.18143024904493e-05, "loss": 0.2065, "step": 9689 }, { "epoch": 1.1490572749911063, "grad_norm": 1.408094052512272, "learning_rate": 4.1812526069854087e-05, "loss": 0.2859, "step": 9690 }, { "epoch": 1.1491758567532313, "grad_norm": 1.115043332026719, "learning_rate": 4.1810749494268e-05, "loss": 0.2125, "step": 9691 }, { "epoch": 1.1492944385153563, "grad_norm": 1.322023439674604, "learning_rate": 4.180897276370742e-05, "loss": 0.2721, "step": 9692 }, { "epoch": 1.1494130202774813, "grad_norm": 1.1035833364794494, "learning_rate": 4.180719587818872e-05, "loss": 0.2306, "step": 9693 }, { "epoch": 1.1495316020396062, "grad_norm": 1.1553647181045057, "learning_rate": 4.180541883772829e-05, "loss": 0.2369, "step": 9694 }, { "epoch": 1.1496501838017312, "grad_norm": 1.3394598211267421, "learning_rate": 4.180364164234251e-05, "loss": 0.2809, "step": 9695 }, { "epoch": 1.1497687655638562, "grad_norm": 1.1603123838940432, "learning_rate": 4.180186429204775e-05, "loss": 0.2481, "step": 9696 }, { "epoch": 1.1498873473259812, "grad_norm": 1.2810664759132562, "learning_rate": 4.1800086786860406e-05, "loss": 0.1886, "step": 9697 }, { "epoch": 1.1500059290881062, "grad_norm": 1.3071092148340149, "learning_rate": 4.179830912679686e-05, "loss": 0.2472, "step": 9698 }, { "epoch": 1.1501245108502312, "grad_norm": 1.241831256126419, "learning_rate": 4.179653131187352e-05, "loss": 0.3243, "step": 9699 }, { "epoch": 1.1502430926123561, "grad_norm": 1.5699625508136668, "learning_rate": 4.179475334210674e-05, "loss": 0.31, "step": 9700 }, { "epoch": 1.1503616743744811, "grad_norm": 1.197611415715529, "learning_rate": 4.179297521751294e-05, "loss": 0.2212, "step": 9701 }, { "epoch": 1.150480256136606, "grad_norm": 1.190163368161288, "learning_rate": 4.17911969381085e-05, "loss": 0.2417, "step": 9702 }, { "epoch": 1.1505988378987313, "grad_norm": 1.102185605154122, "learning_rate": 4.178941850390981e-05, "loss": 0.2305, "step": 9703 }, { "epoch": 1.150717419660856, "grad_norm": 1.500447182652989, "learning_rate": 4.1787639914933276e-05, "loss": 0.2804, "step": 9704 }, { "epoch": 1.1508360014229813, "grad_norm": 1.2376228886375582, "learning_rate": 4.178586117119528e-05, "loss": 0.2378, "step": 9705 }, { "epoch": 1.150954583185106, "grad_norm": 1.3941356109738277, "learning_rate": 4.178408227271224e-05, "loss": 0.2521, "step": 9706 }, { "epoch": 1.1510731649472312, "grad_norm": 0.8628374206930609, "learning_rate": 4.178230321950054e-05, "loss": 0.2044, "step": 9707 }, { "epoch": 1.151191746709356, "grad_norm": 1.291538331549025, "learning_rate": 4.1780524011576585e-05, "loss": 0.2517, "step": 9708 }, { "epoch": 1.1513103284714812, "grad_norm": 1.0365636059719874, "learning_rate": 4.1778744648956767e-05, "loss": 0.213, "step": 9709 }, { "epoch": 1.1514289102336062, "grad_norm": 1.212879415988456, "learning_rate": 4.177696513165751e-05, "loss": 0.2173, "step": 9710 }, { "epoch": 1.1515474919957311, "grad_norm": 1.198726704882082, "learning_rate": 4.17751854596952e-05, "loss": 0.2018, "step": 9711 }, { "epoch": 1.1516660737578561, "grad_norm": 1.2350194253516336, "learning_rate": 4.177340563308625e-05, "loss": 0.2033, "step": 9712 }, { "epoch": 1.151784655519981, "grad_norm": 1.0262618324698836, "learning_rate": 4.177162565184707e-05, "loss": 0.2147, "step": 9713 }, { "epoch": 1.151903237282106, "grad_norm": 1.5794155626842283, "learning_rate": 4.176984551599408e-05, "loss": 0.2982, "step": 9714 }, { "epoch": 1.152021819044231, "grad_norm": 0.8901675161974288, "learning_rate": 4.176806522554366e-05, "loss": 0.2129, "step": 9715 }, { "epoch": 1.152140400806356, "grad_norm": 1.0848323518197425, "learning_rate": 4.1766284780512255e-05, "loss": 0.2593, "step": 9716 }, { "epoch": 1.152258982568481, "grad_norm": 0.9251817762780647, "learning_rate": 4.1764504180916264e-05, "loss": 0.2564, "step": 9717 }, { "epoch": 1.152377564330606, "grad_norm": 1.1773942713653593, "learning_rate": 4.17627234267721e-05, "loss": 0.1827, "step": 9718 }, { "epoch": 1.152496146092731, "grad_norm": 0.9096649744936289, "learning_rate": 4.176094251809618e-05, "loss": 0.1824, "step": 9719 }, { "epoch": 1.152614727854856, "grad_norm": 0.6915340935104606, "learning_rate": 4.1759161454904924e-05, "loss": 0.1727, "step": 9720 }, { "epoch": 1.152733309616981, "grad_norm": 1.1324786579056412, "learning_rate": 4.175738023721475e-05, "loss": 0.2605, "step": 9721 }, { "epoch": 1.152851891379106, "grad_norm": 0.9165939411140239, "learning_rate": 4.175559886504208e-05, "loss": 0.1771, "step": 9722 }, { "epoch": 1.152970473141231, "grad_norm": 1.5248751684269233, "learning_rate": 4.175381733840334e-05, "loss": 0.3501, "step": 9723 }, { "epoch": 1.153089054903356, "grad_norm": 1.2449844756770585, "learning_rate": 4.175203565731493e-05, "loss": 0.3235, "step": 9724 }, { "epoch": 1.1532076366654809, "grad_norm": 1.2546645115452637, "learning_rate": 4.1750253821793314e-05, "loss": 0.3042, "step": 9725 }, { "epoch": 1.1533262184276059, "grad_norm": 1.411971220307702, "learning_rate": 4.17484718318549e-05, "loss": 0.3095, "step": 9726 }, { "epoch": 1.1534448001897308, "grad_norm": 2.0731875539536015, "learning_rate": 4.1746689687516106e-05, "loss": 0.4783, "step": 9727 }, { "epoch": 1.1535633819518558, "grad_norm": 1.119912412113706, "learning_rate": 4.174490738879336e-05, "loss": 0.22, "step": 9728 }, { "epoch": 1.1536819637139808, "grad_norm": 1.2737620732853396, "learning_rate": 4.1743124935703123e-05, "loss": 0.2579, "step": 9729 }, { "epoch": 1.1538005454761058, "grad_norm": 0.9031692459746484, "learning_rate": 4.174134232826179e-05, "loss": 0.2415, "step": 9730 }, { "epoch": 1.1539191272382308, "grad_norm": 1.1292252058815346, "learning_rate": 4.173955956648582e-05, "loss": 0.1949, "step": 9731 }, { "epoch": 1.1540377090003557, "grad_norm": 0.9293089415154345, "learning_rate": 4.173777665039163e-05, "loss": 0.2468, "step": 9732 }, { "epoch": 1.1541562907624807, "grad_norm": 1.0394854986713913, "learning_rate": 4.173599357999567e-05, "loss": 0.2069, "step": 9733 }, { "epoch": 1.1542748725246057, "grad_norm": 0.9306677355959424, "learning_rate": 4.1734210355314375e-05, "loss": 0.2359, "step": 9734 }, { "epoch": 1.1543934542867307, "grad_norm": 1.227113592635986, "learning_rate": 4.173242697636417e-05, "loss": 0.2583, "step": 9735 }, { "epoch": 1.1545120360488557, "grad_norm": 1.0811616302298324, "learning_rate": 4.173064344316151e-05, "loss": 0.2062, "step": 9736 }, { "epoch": 1.1546306178109806, "grad_norm": 1.097442212989027, "learning_rate": 4.172885975572284e-05, "loss": 0.2436, "step": 9737 }, { "epoch": 1.1547491995731056, "grad_norm": 1.1538305820840913, "learning_rate": 4.1727075914064594e-05, "loss": 0.2317, "step": 9738 }, { "epoch": 1.1548677813352306, "grad_norm": 1.2054163815806151, "learning_rate": 4.1725291918203224e-05, "loss": 0.2991, "step": 9739 }, { "epoch": 1.1549863630973556, "grad_norm": 1.1594772665600603, "learning_rate": 4.172350776815517e-05, "loss": 0.2234, "step": 9740 }, { "epoch": 1.1551049448594806, "grad_norm": 1.335084165955372, "learning_rate": 4.172172346393688e-05, "loss": 0.2907, "step": 9741 }, { "epoch": 1.1552235266216055, "grad_norm": 1.2799698569810303, "learning_rate": 4.171993900556481e-05, "loss": 0.284, "step": 9742 }, { "epoch": 1.1553421083837305, "grad_norm": 0.9033953699337584, "learning_rate": 4.17181543930554e-05, "loss": 0.1952, "step": 9743 }, { "epoch": 1.1554606901458555, "grad_norm": 1.0318392144194215, "learning_rate": 4.1716369626425115e-05, "loss": 0.23, "step": 9744 }, { "epoch": 1.1555792719079805, "grad_norm": 1.4668517690740324, "learning_rate": 4.1714584705690394e-05, "loss": 0.2414, "step": 9745 }, { "epoch": 1.1556978536701055, "grad_norm": 1.3069755772806961, "learning_rate": 4.17127996308677e-05, "loss": 0.2536, "step": 9746 }, { "epoch": 1.1558164354322304, "grad_norm": 1.6714885893640758, "learning_rate": 4.171101440197349e-05, "loss": 0.3386, "step": 9747 }, { "epoch": 1.1559350171943554, "grad_norm": 1.3462706176178598, "learning_rate": 4.170922901902422e-05, "loss": 0.2946, "step": 9748 }, { "epoch": 1.1560535989564804, "grad_norm": 1.4802040680786244, "learning_rate": 4.1707443482036344e-05, "loss": 0.4042, "step": 9749 }, { "epoch": 1.1561721807186054, "grad_norm": 0.8312086513375094, "learning_rate": 4.1705657791026334e-05, "loss": 0.1692, "step": 9750 }, { "epoch": 1.1562907624807304, "grad_norm": 1.1998655089222094, "learning_rate": 4.1703871946010644e-05, "loss": 0.2446, "step": 9751 }, { "epoch": 1.1564093442428554, "grad_norm": 0.780388296784744, "learning_rate": 4.170208594700573e-05, "loss": 0.1699, "step": 9752 }, { "epoch": 1.1565279260049803, "grad_norm": 0.9342335149491425, "learning_rate": 4.1700299794028073e-05, "loss": 0.2217, "step": 9753 }, { "epoch": 1.1566465077671055, "grad_norm": 1.0633226550107404, "learning_rate": 4.1698513487094126e-05, "loss": 0.1839, "step": 9754 }, { "epoch": 1.1567650895292303, "grad_norm": 1.1909133663811307, "learning_rate": 4.169672702622037e-05, "loss": 0.2673, "step": 9755 }, { "epoch": 1.1568836712913555, "grad_norm": 1.3012722295630257, "learning_rate": 4.169494041142326e-05, "loss": 0.2886, "step": 9756 }, { "epoch": 1.1570022530534803, "grad_norm": 1.3091384940568283, "learning_rate": 4.169315364271927e-05, "loss": 0.274, "step": 9757 }, { "epoch": 1.1571208348156055, "grad_norm": 0.8991335564339591, "learning_rate": 4.169136672012487e-05, "loss": 0.2264, "step": 9758 }, { "epoch": 1.1572394165777304, "grad_norm": 1.2097664518602953, "learning_rate": 4.168957964365655e-05, "loss": 0.2355, "step": 9759 }, { "epoch": 1.1573579983398554, "grad_norm": 1.0346514373866775, "learning_rate": 4.168779241333076e-05, "loss": 0.231, "step": 9760 }, { "epoch": 1.1574765801019804, "grad_norm": 1.0664691030531535, "learning_rate": 4.1686005029164e-05, "loss": 0.2143, "step": 9761 }, { "epoch": 1.1575951618641054, "grad_norm": 1.0553275029458815, "learning_rate": 4.1684217491172725e-05, "loss": 0.2128, "step": 9762 }, { "epoch": 1.1577137436262304, "grad_norm": 1.0243106800741328, "learning_rate": 4.168242979937343e-05, "loss": 0.1982, "step": 9763 }, { "epoch": 1.1578323253883553, "grad_norm": 1.3418307299101522, "learning_rate": 4.1680641953782586e-05, "loss": 0.2899, "step": 9764 }, { "epoch": 1.1579509071504803, "grad_norm": 0.9839271912034352, "learning_rate": 4.167885395441668e-05, "loss": 0.2123, "step": 9765 }, { "epoch": 1.1580694889126053, "grad_norm": 1.1827584601587167, "learning_rate": 4.167706580129219e-05, "loss": 0.2943, "step": 9766 }, { "epoch": 1.1581880706747303, "grad_norm": 1.4771893247582986, "learning_rate": 4.167527749442561e-05, "loss": 0.3659, "step": 9767 }, { "epoch": 1.1583066524368553, "grad_norm": 1.6592091652561418, "learning_rate": 4.167348903383342e-05, "loss": 0.2699, "step": 9768 }, { "epoch": 1.1584252341989802, "grad_norm": 1.170107441710715, "learning_rate": 4.16717004195321e-05, "loss": 0.2564, "step": 9769 }, { "epoch": 1.1585438159611052, "grad_norm": 1.0123080467567847, "learning_rate": 4.166991165153815e-05, "loss": 0.2002, "step": 9770 }, { "epoch": 1.1586623977232302, "grad_norm": 0.921675071556106, "learning_rate": 4.166812272986806e-05, "loss": 0.1614, "step": 9771 }, { "epoch": 1.1587809794853552, "grad_norm": 1.0787532502577128, "learning_rate": 4.1666333654538315e-05, "loss": 0.2756, "step": 9772 }, { "epoch": 1.1588995612474802, "grad_norm": 0.8226939628998595, "learning_rate": 4.1664544425565413e-05, "loss": 0.1796, "step": 9773 }, { "epoch": 1.1590181430096052, "grad_norm": 1.6073377841583745, "learning_rate": 4.166275504296585e-05, "loss": 0.3575, "step": 9774 }, { "epoch": 1.1591367247717301, "grad_norm": 1.454711590410997, "learning_rate": 4.166096550675611e-05, "loss": 0.3194, "step": 9775 }, { "epoch": 1.1592553065338551, "grad_norm": 1.4885901661737309, "learning_rate": 4.165917581695271e-05, "loss": 0.38, "step": 9776 }, { "epoch": 1.15937388829598, "grad_norm": 1.1313666844815329, "learning_rate": 4.1657385973572125e-05, "loss": 0.2185, "step": 9777 }, { "epoch": 1.159492470058105, "grad_norm": 0.9167553498515507, "learning_rate": 4.165559597663088e-05, "loss": 0.2294, "step": 9778 }, { "epoch": 1.15961105182023, "grad_norm": 1.3199569743343973, "learning_rate": 4.165380582614545e-05, "loss": 0.2633, "step": 9779 }, { "epoch": 1.159729633582355, "grad_norm": 1.2466368348250365, "learning_rate": 4.165201552213237e-05, "loss": 0.2618, "step": 9780 }, { "epoch": 1.15984821534448, "grad_norm": 1.0819060012568134, "learning_rate": 4.1650225064608114e-05, "loss": 0.2655, "step": 9781 }, { "epoch": 1.159966797106605, "grad_norm": 1.048671910140433, "learning_rate": 4.1648434453589205e-05, "loss": 0.219, "step": 9782 }, { "epoch": 1.16008537886873, "grad_norm": 1.2394270012766493, "learning_rate": 4.164664368909215e-05, "loss": 0.2569, "step": 9783 }, { "epoch": 1.160203960630855, "grad_norm": 0.9911566886348759, "learning_rate": 4.164485277113345e-05, "loss": 0.2063, "step": 9784 }, { "epoch": 1.16032254239298, "grad_norm": 0.9551759817701917, "learning_rate": 4.1643061699729616e-05, "loss": 0.2033, "step": 9785 }, { "epoch": 1.160441124155105, "grad_norm": 1.5045113175578044, "learning_rate": 4.164127047489716e-05, "loss": 0.3585, "step": 9786 }, { "epoch": 1.16055970591723, "grad_norm": 1.1165018265564772, "learning_rate": 4.1639479096652615e-05, "loss": 0.2774, "step": 9787 }, { "epoch": 1.1606782876793549, "grad_norm": 0.9124884888775723, "learning_rate": 4.1637687565012454e-05, "loss": 0.231, "step": 9788 }, { "epoch": 1.1607968694414799, "grad_norm": 1.0139559934694458, "learning_rate": 4.1635895879993234e-05, "loss": 0.248, "step": 9789 }, { "epoch": 1.1609154512036048, "grad_norm": 1.4112774099724366, "learning_rate": 4.163410404161144e-05, "loss": 0.2861, "step": 9790 }, { "epoch": 1.1610340329657298, "grad_norm": 1.1131576384905648, "learning_rate": 4.163231204988362e-05, "loss": 0.2568, "step": 9791 }, { "epoch": 1.1611526147278548, "grad_norm": 0.9758611704113783, "learning_rate": 4.163051990482626e-05, "loss": 0.1853, "step": 9792 }, { "epoch": 1.1612711964899798, "grad_norm": 1.285430208001626, "learning_rate": 4.1628727606455916e-05, "loss": 0.211, "step": 9793 }, { "epoch": 1.1613897782521048, "grad_norm": 1.226431189814614, "learning_rate": 4.162693515478909e-05, "loss": 0.2379, "step": 9794 }, { "epoch": 1.1615083600142297, "grad_norm": 1.25437174139283, "learning_rate": 4.1625142549842314e-05, "loss": 0.2994, "step": 9795 }, { "epoch": 1.1616269417763547, "grad_norm": 1.3057988891547039, "learning_rate": 4.1623349791632106e-05, "loss": 0.2627, "step": 9796 }, { "epoch": 1.1617455235384797, "grad_norm": 1.104830886125268, "learning_rate": 4.1621556880174996e-05, "loss": 0.2294, "step": 9797 }, { "epoch": 1.1618641053006047, "grad_norm": 0.8775242047857768, "learning_rate": 4.1619763815487526e-05, "loss": 0.2051, "step": 9798 }, { "epoch": 1.1619826870627297, "grad_norm": 1.0529561409815242, "learning_rate": 4.1617970597586195e-05, "loss": 0.2155, "step": 9799 }, { "epoch": 1.1621012688248546, "grad_norm": 1.644925106954984, "learning_rate": 4.161617722648757e-05, "loss": 0.4443, "step": 9800 }, { "epoch": 1.1622198505869796, "grad_norm": 1.1177934138536756, "learning_rate": 4.161438370220816e-05, "loss": 0.2234, "step": 9801 }, { "epoch": 1.1623384323491046, "grad_norm": 1.088155934293742, "learning_rate": 4.161259002476451e-05, "loss": 0.238, "step": 9802 }, { "epoch": 1.1624570141112298, "grad_norm": 1.1632184199437015, "learning_rate": 4.161079619417314e-05, "loss": 0.2541, "step": 9803 }, { "epoch": 1.1625755958733546, "grad_norm": 1.4332054654630273, "learning_rate": 4.1609002210450614e-05, "loss": 0.2794, "step": 9804 }, { "epoch": 1.1626941776354798, "grad_norm": 1.2202499467622312, "learning_rate": 4.1607208073613444e-05, "loss": 0.2355, "step": 9805 }, { "epoch": 1.1628127593976045, "grad_norm": 1.0039622196510454, "learning_rate": 4.160541378367819e-05, "loss": 0.1935, "step": 9806 }, { "epoch": 1.1629313411597297, "grad_norm": 1.1528751059491416, "learning_rate": 4.160361934066138e-05, "loss": 0.2299, "step": 9807 }, { "epoch": 1.1630499229218545, "grad_norm": 1.3755391092912754, "learning_rate": 4.160182474457955e-05, "loss": 0.2727, "step": 9808 }, { "epoch": 1.1631685046839797, "grad_norm": 1.3194624648364048, "learning_rate": 4.1600029995449275e-05, "loss": 0.3031, "step": 9809 }, { "epoch": 1.1632870864461047, "grad_norm": 1.0407352475986944, "learning_rate": 4.1598235093287066e-05, "loss": 0.2053, "step": 9810 }, { "epoch": 1.1634056682082297, "grad_norm": 1.0585452329867486, "learning_rate": 4.1596440038109486e-05, "loss": 0.1945, "step": 9811 }, { "epoch": 1.1635242499703546, "grad_norm": 1.0840101349385005, "learning_rate": 4.159464482993308e-05, "loss": 0.2667, "step": 9812 }, { "epoch": 1.1636428317324796, "grad_norm": 1.4310247026262102, "learning_rate": 4.15928494687744e-05, "loss": 0.334, "step": 9813 }, { "epoch": 1.1637614134946046, "grad_norm": 1.2410633879844142, "learning_rate": 4.1591053954649995e-05, "loss": 0.2281, "step": 9814 }, { "epoch": 1.1638799952567296, "grad_norm": 0.9290741487012489, "learning_rate": 4.158925828757642e-05, "loss": 0.1955, "step": 9815 }, { "epoch": 1.1639985770188546, "grad_norm": 1.0803453483057235, "learning_rate": 4.1587462467570214e-05, "loss": 0.2263, "step": 9816 }, { "epoch": 1.1641171587809795, "grad_norm": 1.47772207533791, "learning_rate": 4.158566649464796e-05, "loss": 0.2412, "step": 9817 }, { "epoch": 1.1642357405431045, "grad_norm": 1.2927298286013915, "learning_rate": 4.15838703688262e-05, "loss": 0.2552, "step": 9818 }, { "epoch": 1.1643543223052295, "grad_norm": 0.9744732739082427, "learning_rate": 4.158207409012148e-05, "loss": 0.2238, "step": 9819 }, { "epoch": 1.1644729040673545, "grad_norm": 1.122468525704558, "learning_rate": 4.158027765855038e-05, "loss": 0.2521, "step": 9820 }, { "epoch": 1.1645914858294795, "grad_norm": 1.1402595988510433, "learning_rate": 4.157848107412945e-05, "loss": 0.2352, "step": 9821 }, { "epoch": 1.1647100675916044, "grad_norm": 1.2724707505654502, "learning_rate": 4.157668433687525e-05, "loss": 0.265, "step": 9822 }, { "epoch": 1.1648286493537294, "grad_norm": 1.0017155432098488, "learning_rate": 4.1574887446804355e-05, "loss": 0.1616, "step": 9823 }, { "epoch": 1.1649472311158544, "grad_norm": 1.3190684631312177, "learning_rate": 4.1573090403933325e-05, "loss": 0.2589, "step": 9824 }, { "epoch": 1.1650658128779794, "grad_norm": 1.2840647576528115, "learning_rate": 4.1571293208278714e-05, "loss": 0.2458, "step": 9825 }, { "epoch": 1.1651843946401044, "grad_norm": 1.3983914609076906, "learning_rate": 4.156949585985711e-05, "loss": 0.3042, "step": 9826 }, { "epoch": 1.1653029764022294, "grad_norm": 1.6631557381441104, "learning_rate": 4.1567698358685066e-05, "loss": 0.2582, "step": 9827 }, { "epoch": 1.1654215581643543, "grad_norm": 1.0759956898146648, "learning_rate": 4.1565900704779166e-05, "loss": 0.2936, "step": 9828 }, { "epoch": 1.1655401399264793, "grad_norm": 0.9521726567399845, "learning_rate": 4.156410289815597e-05, "loss": 0.2113, "step": 9829 }, { "epoch": 1.1656587216886043, "grad_norm": 1.144028532434178, "learning_rate": 4.1562304938832065e-05, "loss": 0.2072, "step": 9830 }, { "epoch": 1.1657773034507293, "grad_norm": 1.25410391992121, "learning_rate": 4.156050682682401e-05, "loss": 0.2852, "step": 9831 }, { "epoch": 1.1658958852128543, "grad_norm": 1.0774115931922472, "learning_rate": 4.15587085621484e-05, "loss": 0.2632, "step": 9832 }, { "epoch": 1.1660144669749792, "grad_norm": 1.2431147371794047, "learning_rate": 4.155691014482179e-05, "loss": 0.2287, "step": 9833 }, { "epoch": 1.1661330487371042, "grad_norm": 1.2406532194871465, "learning_rate": 4.1555111574860785e-05, "loss": 0.2741, "step": 9834 }, { "epoch": 1.1662516304992292, "grad_norm": 0.9772674313310136, "learning_rate": 4.155331285228194e-05, "loss": 0.2026, "step": 9835 }, { "epoch": 1.1663702122613542, "grad_norm": 0.993123148868634, "learning_rate": 4.1551513977101855e-05, "loss": 0.2521, "step": 9836 }, { "epoch": 1.1664887940234792, "grad_norm": 1.6563210480289874, "learning_rate": 4.154971494933712e-05, "loss": 0.4134, "step": 9837 }, { "epoch": 1.1666073757856041, "grad_norm": 1.150657114019511, "learning_rate": 4.154791576900429e-05, "loss": 0.2637, "step": 9838 }, { "epoch": 1.1667259575477291, "grad_norm": 1.2727136471032243, "learning_rate": 4.1546116436119974e-05, "loss": 0.2992, "step": 9839 }, { "epoch": 1.166844539309854, "grad_norm": 0.9388260427273223, "learning_rate": 4.154431695070076e-05, "loss": 0.1929, "step": 9840 }, { "epoch": 1.166963121071979, "grad_norm": 0.7656858863570207, "learning_rate": 4.154251731276323e-05, "loss": 0.2054, "step": 9841 }, { "epoch": 1.167081702834104, "grad_norm": 1.0439938466513767, "learning_rate": 4.154071752232397e-05, "loss": 0.2419, "step": 9842 }, { "epoch": 1.167200284596229, "grad_norm": 1.1932076156053617, "learning_rate": 4.153891757939958e-05, "loss": 0.2065, "step": 9843 }, { "epoch": 1.167318866358354, "grad_norm": 1.5972977229375684, "learning_rate": 4.153711748400665e-05, "loss": 0.3152, "step": 9844 }, { "epoch": 1.167437448120479, "grad_norm": 0.9843814492084669, "learning_rate": 4.1535317236161786e-05, "loss": 0.1989, "step": 9845 }, { "epoch": 1.167556029882604, "grad_norm": 1.2540010542527213, "learning_rate": 4.153351683588157e-05, "loss": 0.2573, "step": 9846 }, { "epoch": 1.167674611644729, "grad_norm": 1.3198587568886864, "learning_rate": 4.15317162831826e-05, "loss": 0.3202, "step": 9847 }, { "epoch": 1.167793193406854, "grad_norm": 1.027119386475778, "learning_rate": 4.152991557808148e-05, "loss": 0.204, "step": 9848 }, { "epoch": 1.167911775168979, "grad_norm": 1.4814958043091926, "learning_rate": 4.15281147205948e-05, "loss": 0.3037, "step": 9849 }, { "epoch": 1.168030356931104, "grad_norm": 1.0264446863245735, "learning_rate": 4.1526313710739184e-05, "loss": 0.2106, "step": 9850 }, { "epoch": 1.1681489386932289, "grad_norm": 0.8266353023627034, "learning_rate": 4.1524512548531216e-05, "loss": 0.2119, "step": 9851 }, { "epoch": 1.1682675204553539, "grad_norm": 0.9795202024734199, "learning_rate": 4.15227112339875e-05, "loss": 0.1908, "step": 9852 }, { "epoch": 1.1683861022174789, "grad_norm": 1.3482002795640746, "learning_rate": 4.152090976712466e-05, "loss": 0.3398, "step": 9853 }, { "epoch": 1.168504683979604, "grad_norm": 1.0874607370834783, "learning_rate": 4.1519108147959284e-05, "loss": 0.2613, "step": 9854 }, { "epoch": 1.1686232657417288, "grad_norm": 1.0334165623123646, "learning_rate": 4.1517306376507994e-05, "loss": 0.2502, "step": 9855 }, { "epoch": 1.168741847503854, "grad_norm": 1.1147390942817372, "learning_rate": 4.151550445278739e-05, "loss": 0.2406, "step": 9856 }, { "epoch": 1.1688604292659788, "grad_norm": 0.9847574238416811, "learning_rate": 4.151370237681408e-05, "loss": 0.2393, "step": 9857 }, { "epoch": 1.168979011028104, "grad_norm": 1.2351941771138877, "learning_rate": 4.151190014860469e-05, "loss": 0.2272, "step": 9858 }, { "epoch": 1.169097592790229, "grad_norm": 1.2827311339507022, "learning_rate": 4.151009776817583e-05, "loss": 0.2621, "step": 9859 }, { "epoch": 1.169216174552354, "grad_norm": 1.338624658076609, "learning_rate": 4.150829523554412e-05, "loss": 0.2221, "step": 9860 }, { "epoch": 1.169334756314479, "grad_norm": 1.0858753494377686, "learning_rate": 4.150649255072616e-05, "loss": 0.2398, "step": 9861 }, { "epoch": 1.169453338076604, "grad_norm": 1.1636164376677873, "learning_rate": 4.150468971373859e-05, "loss": 0.204, "step": 9862 }, { "epoch": 1.1695719198387289, "grad_norm": 1.40193557302608, "learning_rate": 4.1502886724598015e-05, "loss": 0.299, "step": 9863 }, { "epoch": 1.1696905016008539, "grad_norm": 1.2759804439624638, "learning_rate": 4.1501083583321065e-05, "loss": 0.2481, "step": 9864 }, { "epoch": 1.1698090833629788, "grad_norm": 1.3464461917648929, "learning_rate": 4.149928028992436e-05, "loss": 0.3104, "step": 9865 }, { "epoch": 1.1699276651251038, "grad_norm": 0.9735145839018802, "learning_rate": 4.1497476844424514e-05, "loss": 0.167, "step": 9866 }, { "epoch": 1.1700462468872288, "grad_norm": 1.3373323055609687, "learning_rate": 4.149567324683818e-05, "loss": 0.2528, "step": 9867 }, { "epoch": 1.1701648286493538, "grad_norm": 1.1418564074701067, "learning_rate": 4.149386949718195e-05, "loss": 0.2487, "step": 9868 }, { "epoch": 1.1702834104114788, "grad_norm": 1.1166387221895955, "learning_rate": 4.1492065595472476e-05, "loss": 0.2517, "step": 9869 }, { "epoch": 1.1704019921736037, "grad_norm": 1.6459870477127574, "learning_rate": 4.1490261541726384e-05, "loss": 0.3932, "step": 9870 }, { "epoch": 1.1705205739357287, "grad_norm": 1.3640329752856664, "learning_rate": 4.1488457335960294e-05, "loss": 0.2512, "step": 9871 }, { "epoch": 1.1706391556978537, "grad_norm": 0.8366678951735907, "learning_rate": 4.1486652978190855e-05, "loss": 0.1671, "step": 9872 }, { "epoch": 1.1707577374599787, "grad_norm": 1.4251123419599485, "learning_rate": 4.148484846843469e-05, "loss": 0.2617, "step": 9873 }, { "epoch": 1.1708763192221037, "grad_norm": 1.3714201235435066, "learning_rate": 4.148304380670843e-05, "loss": 0.3043, "step": 9874 }, { "epoch": 1.1709949009842286, "grad_norm": 0.8941006401503712, "learning_rate": 4.1481238993028734e-05, "loss": 0.2101, "step": 9875 }, { "epoch": 1.1711134827463536, "grad_norm": 0.9609752992345864, "learning_rate": 4.147943402741221e-05, "loss": 0.1801, "step": 9876 }, { "epoch": 1.1712320645084786, "grad_norm": 1.2463443210245537, "learning_rate": 4.147762890987552e-05, "loss": 0.2546, "step": 9877 }, { "epoch": 1.1713506462706036, "grad_norm": 1.4654431682726694, "learning_rate": 4.14758236404353e-05, "loss": 0.2724, "step": 9878 }, { "epoch": 1.1714692280327286, "grad_norm": 1.304400839203449, "learning_rate": 4.147401821910819e-05, "loss": 0.3232, "step": 9879 }, { "epoch": 1.1715878097948536, "grad_norm": 1.3041892830182265, "learning_rate": 4.147221264591083e-05, "loss": 0.3115, "step": 9880 }, { "epoch": 1.1717063915569785, "grad_norm": 0.9615758740407383, "learning_rate": 4.147040692085987e-05, "loss": 0.2444, "step": 9881 }, { "epoch": 1.1718249733191035, "grad_norm": 1.3555152780357733, "learning_rate": 4.1468601043971954e-05, "loss": 0.3392, "step": 9882 }, { "epoch": 1.1719435550812285, "grad_norm": 1.1847817443463542, "learning_rate": 4.146679501526373e-05, "loss": 0.2324, "step": 9883 }, { "epoch": 1.1720621368433535, "grad_norm": 1.1107691152678436, "learning_rate": 4.146498883475186e-05, "loss": 0.2426, "step": 9884 }, { "epoch": 1.1721807186054785, "grad_norm": 1.049560617634572, "learning_rate": 4.146318250245297e-05, "loss": 0.2291, "step": 9885 }, { "epoch": 1.1722993003676034, "grad_norm": 0.9992669082628642, "learning_rate": 4.1461376018383736e-05, "loss": 0.2381, "step": 9886 }, { "epoch": 1.1724178821297284, "grad_norm": 1.0685505397464568, "learning_rate": 4.145956938256079e-05, "loss": 0.2727, "step": 9887 }, { "epoch": 1.1725364638918534, "grad_norm": 1.2198324948515844, "learning_rate": 4.145776259500081e-05, "loss": 0.2798, "step": 9888 }, { "epoch": 1.1726550456539784, "grad_norm": 1.1744359395413069, "learning_rate": 4.1455955655720433e-05, "loss": 0.2951, "step": 9889 }, { "epoch": 1.1727736274161034, "grad_norm": 1.2864017940752666, "learning_rate": 4.1454148564736325e-05, "loss": 0.2742, "step": 9890 }, { "epoch": 1.1728922091782283, "grad_norm": 1.1695769711880684, "learning_rate": 4.1452341322065144e-05, "loss": 0.208, "step": 9891 }, { "epoch": 1.1730107909403533, "grad_norm": 1.1502385274142486, "learning_rate": 4.1450533927723565e-05, "loss": 0.2369, "step": 9892 }, { "epoch": 1.1731293727024783, "grad_norm": 1.185523774907947, "learning_rate": 4.144872638172822e-05, "loss": 0.2686, "step": 9893 }, { "epoch": 1.1732479544646033, "grad_norm": 1.2080063366779088, "learning_rate": 4.144691868409579e-05, "loss": 0.2312, "step": 9894 }, { "epoch": 1.1733665362267283, "grad_norm": 0.7867245553011812, "learning_rate": 4.1445110834842945e-05, "loss": 0.179, "step": 9895 }, { "epoch": 1.1734851179888532, "grad_norm": 1.3281814605842954, "learning_rate": 4.1443302833986345e-05, "loss": 0.2988, "step": 9896 }, { "epoch": 1.1736036997509782, "grad_norm": 1.2534884365291157, "learning_rate": 4.1441494681542655e-05, "loss": 0.3231, "step": 9897 }, { "epoch": 1.1737222815131032, "grad_norm": 1.5221586241226934, "learning_rate": 4.143968637752854e-05, "loss": 0.3021, "step": 9898 }, { "epoch": 1.1738408632752282, "grad_norm": 1.0607087089451566, "learning_rate": 4.1437877921960684e-05, "loss": 0.2052, "step": 9899 }, { "epoch": 1.1739594450373532, "grad_norm": 1.043886961719104, "learning_rate": 4.143606931485574e-05, "loss": 0.2224, "step": 9900 }, { "epoch": 1.1740780267994781, "grad_norm": 1.6441170754799121, "learning_rate": 4.143426055623041e-05, "loss": 0.3937, "step": 9901 }, { "epoch": 1.1741966085616031, "grad_norm": 1.332322003409126, "learning_rate": 4.1432451646101336e-05, "loss": 0.3119, "step": 9902 }, { "epoch": 1.1743151903237283, "grad_norm": 1.1270320230376007, "learning_rate": 4.143064258448521e-05, "loss": 0.2808, "step": 9903 }, { "epoch": 1.174433772085853, "grad_norm": 1.242482093182845, "learning_rate": 4.1428833371398715e-05, "loss": 0.2851, "step": 9904 }, { "epoch": 1.1745523538479783, "grad_norm": 1.03475857944016, "learning_rate": 4.1427024006858514e-05, "loss": 0.2278, "step": 9905 }, { "epoch": 1.174670935610103, "grad_norm": 1.295992646111444, "learning_rate": 4.1425214490881305e-05, "loss": 0.272, "step": 9906 }, { "epoch": 1.1747895173722283, "grad_norm": 1.2126493211612044, "learning_rate": 4.1423404823483756e-05, "loss": 0.2915, "step": 9907 }, { "epoch": 1.174908099134353, "grad_norm": 0.9577608443502641, "learning_rate": 4.142159500468256e-05, "loss": 0.1889, "step": 9908 }, { "epoch": 1.1750266808964782, "grad_norm": 1.070377433805415, "learning_rate": 4.1419785034494385e-05, "loss": 0.249, "step": 9909 }, { "epoch": 1.1751452626586032, "grad_norm": 1.0185744640922725, "learning_rate": 4.141797491293593e-05, "loss": 0.2583, "step": 9910 }, { "epoch": 1.1752638444207282, "grad_norm": 1.3401447537764066, "learning_rate": 4.141616464002388e-05, "loss": 0.2689, "step": 9911 }, { "epoch": 1.1753824261828532, "grad_norm": 1.166643613473188, "learning_rate": 4.141435421577493e-05, "loss": 0.2798, "step": 9912 }, { "epoch": 1.1755010079449781, "grad_norm": 0.8489100320189321, "learning_rate": 4.141254364020576e-05, "loss": 0.1562, "step": 9913 }, { "epoch": 1.1756195897071031, "grad_norm": 1.6571421388439227, "learning_rate": 4.1410732913333056e-05, "loss": 0.4044, "step": 9914 }, { "epoch": 1.175738171469228, "grad_norm": 1.1316985155984705, "learning_rate": 4.140892203517353e-05, "loss": 0.242, "step": 9915 }, { "epoch": 1.175856753231353, "grad_norm": 1.1661889056594879, "learning_rate": 4.1407111005743855e-05, "loss": 0.2853, "step": 9916 }, { "epoch": 1.175975334993478, "grad_norm": 1.1691422034512469, "learning_rate": 4.140529982506074e-05, "loss": 0.2805, "step": 9917 }, { "epoch": 1.176093916755603, "grad_norm": 1.2094653984802082, "learning_rate": 4.140348849314087e-05, "loss": 0.3113, "step": 9918 }, { "epoch": 1.176212498517728, "grad_norm": 1.0417101181432251, "learning_rate": 4.140167701000097e-05, "loss": 0.2401, "step": 9919 }, { "epoch": 1.176331080279853, "grad_norm": 0.8956477035028819, "learning_rate": 4.139986537565771e-05, "loss": 0.2167, "step": 9920 }, { "epoch": 1.176449662041978, "grad_norm": 1.0118632710777948, "learning_rate": 4.13980535901278e-05, "loss": 0.2284, "step": 9921 }, { "epoch": 1.176568243804103, "grad_norm": 1.0105674038917705, "learning_rate": 4.139624165342794e-05, "loss": 0.2221, "step": 9922 }, { "epoch": 1.176686825566228, "grad_norm": 1.6524583559164925, "learning_rate": 4.1394429565574847e-05, "loss": 0.2712, "step": 9923 }, { "epoch": 1.176805407328353, "grad_norm": 1.2149345787799346, "learning_rate": 4.1392617326585215e-05, "loss": 0.2246, "step": 9924 }, { "epoch": 1.176923989090478, "grad_norm": 1.2909319929148482, "learning_rate": 4.139080493647576e-05, "loss": 0.246, "step": 9925 }, { "epoch": 1.1770425708526029, "grad_norm": 1.0137554901265642, "learning_rate": 4.138899239526317e-05, "loss": 0.2327, "step": 9926 }, { "epoch": 1.1771611526147279, "grad_norm": 0.6798392232470067, "learning_rate": 4.138717970296419e-05, "loss": 0.1331, "step": 9927 }, { "epoch": 1.1772797343768528, "grad_norm": 1.0245696709648877, "learning_rate": 4.138536685959549e-05, "loss": 0.2533, "step": 9928 }, { "epoch": 1.1773983161389778, "grad_norm": 0.8418573536443829, "learning_rate": 4.13835538651738e-05, "loss": 0.2054, "step": 9929 }, { "epoch": 1.1775168979011028, "grad_norm": 1.6388329351227398, "learning_rate": 4.138174071971583e-05, "loss": 0.495, "step": 9930 }, { "epoch": 1.1776354796632278, "grad_norm": 0.8757003463885359, "learning_rate": 4.1379927423238315e-05, "loss": 0.1773, "step": 9931 }, { "epoch": 1.1777540614253528, "grad_norm": 1.1489619684296808, "learning_rate": 4.1378113975757945e-05, "loss": 0.2602, "step": 9932 }, { "epoch": 1.1778726431874778, "grad_norm": 1.1702775921045052, "learning_rate": 4.137630037729145e-05, "loss": 0.267, "step": 9933 }, { "epoch": 1.1779912249496027, "grad_norm": 0.9014630821867519, "learning_rate": 4.137448662785555e-05, "loss": 0.176, "step": 9934 }, { "epoch": 1.1781098067117277, "grad_norm": 1.5502465033962605, "learning_rate": 4.1372672727466963e-05, "loss": 0.2839, "step": 9935 }, { "epoch": 1.1782283884738527, "grad_norm": 1.0602085439812772, "learning_rate": 4.137085867614241e-05, "loss": 0.2311, "step": 9936 }, { "epoch": 1.1783469702359777, "grad_norm": 1.009000720919505, "learning_rate": 4.1369044473898614e-05, "loss": 0.1745, "step": 9937 }, { "epoch": 1.1784655519981027, "grad_norm": 0.7878211407300001, "learning_rate": 4.136723012075231e-05, "loss": 0.2336, "step": 9938 }, { "epoch": 1.1785841337602276, "grad_norm": 1.096632040419667, "learning_rate": 4.13654156167202e-05, "loss": 0.2149, "step": 9939 }, { "epoch": 1.1787027155223526, "grad_norm": 1.2029146150017398, "learning_rate": 4.1363600961819035e-05, "loss": 0.2328, "step": 9940 }, { "epoch": 1.1788212972844776, "grad_norm": 1.5155268917453095, "learning_rate": 4.136178615606553e-05, "loss": 0.3418, "step": 9941 }, { "epoch": 1.1789398790466026, "grad_norm": 1.1378860172565022, "learning_rate": 4.1359971199476426e-05, "loss": 0.2228, "step": 9942 }, { "epoch": 1.1790584608087276, "grad_norm": 0.8742737294710003, "learning_rate": 4.135815609206844e-05, "loss": 0.1631, "step": 9943 }, { "epoch": 1.1791770425708525, "grad_norm": 1.2351671383363447, "learning_rate": 4.135634083385833e-05, "loss": 0.2512, "step": 9944 }, { "epoch": 1.1792956243329775, "grad_norm": 1.1057055300556915, "learning_rate": 4.135452542486281e-05, "loss": 0.2318, "step": 9945 }, { "epoch": 1.1794142060951025, "grad_norm": 1.3033374750276467, "learning_rate": 4.135270986509862e-05, "loss": 0.2086, "step": 9946 }, { "epoch": 1.1795327878572275, "grad_norm": 1.4020015000568131, "learning_rate": 4.13508941545825e-05, "loss": 0.3649, "step": 9947 }, { "epoch": 1.1796513696193525, "grad_norm": 1.3336228107245442, "learning_rate": 4.1349078293331176e-05, "loss": 0.2111, "step": 9948 }, { "epoch": 1.1797699513814774, "grad_norm": 1.3476119594423759, "learning_rate": 4.1347262281361406e-05, "loss": 0.2503, "step": 9949 }, { "epoch": 1.1798885331436024, "grad_norm": 1.011779596905743, "learning_rate": 4.134544611868992e-05, "loss": 0.1999, "step": 9950 }, { "epoch": 1.1800071149057274, "grad_norm": 1.1745208847776751, "learning_rate": 4.1343629805333476e-05, "loss": 0.2082, "step": 9951 }, { "epoch": 1.1801256966678526, "grad_norm": 1.1142083367689635, "learning_rate": 4.13418133413088e-05, "loss": 0.2073, "step": 9952 }, { "epoch": 1.1802442784299774, "grad_norm": 1.0875870060473547, "learning_rate": 4.1339996726632645e-05, "loss": 0.2165, "step": 9953 }, { "epoch": 1.1803628601921026, "grad_norm": 0.9972110800563697, "learning_rate": 4.1338179961321754e-05, "loss": 0.2071, "step": 9954 }, { "epoch": 1.1804814419542273, "grad_norm": 1.1627154791961782, "learning_rate": 4.133636304539289e-05, "loss": 0.3033, "step": 9955 }, { "epoch": 1.1806000237163525, "grad_norm": 0.7330426456515045, "learning_rate": 4.133454597886278e-05, "loss": 0.1702, "step": 9956 }, { "epoch": 1.1807186054784773, "grad_norm": 0.9822046096735356, "learning_rate": 4.1332728761748196e-05, "loss": 0.1811, "step": 9957 }, { "epoch": 1.1808371872406025, "grad_norm": 1.1843700820217926, "learning_rate": 4.1330911394065874e-05, "loss": 0.2647, "step": 9958 }, { "epoch": 1.1809557690027275, "grad_norm": 1.000045061574419, "learning_rate": 4.1329093875832574e-05, "loss": 0.2403, "step": 9959 }, { "epoch": 1.1810743507648525, "grad_norm": 1.2402739748959346, "learning_rate": 4.132727620706506e-05, "loss": 0.3181, "step": 9960 }, { "epoch": 1.1811929325269774, "grad_norm": 1.368562143952398, "learning_rate": 4.132545838778008e-05, "loss": 0.3102, "step": 9961 }, { "epoch": 1.1813115142891024, "grad_norm": 0.931065424641166, "learning_rate": 4.132364041799438e-05, "loss": 0.1997, "step": 9962 }, { "epoch": 1.1814300960512274, "grad_norm": 1.1081104477681922, "learning_rate": 4.132182229772475e-05, "loss": 0.2184, "step": 9963 }, { "epoch": 1.1815486778133524, "grad_norm": 1.2246360042289595, "learning_rate": 4.132000402698792e-05, "loss": 0.2452, "step": 9964 }, { "epoch": 1.1816672595754774, "grad_norm": 1.0581212692083337, "learning_rate": 4.1318185605800674e-05, "loss": 0.2098, "step": 9965 }, { "epoch": 1.1817858413376023, "grad_norm": 1.2033934801249544, "learning_rate": 4.131636703417977e-05, "loss": 0.2729, "step": 9966 }, { "epoch": 1.1819044230997273, "grad_norm": 1.0613843690185267, "learning_rate": 4.1314548312141965e-05, "loss": 0.2236, "step": 9967 }, { "epoch": 1.1820230048618523, "grad_norm": 1.1566663203111867, "learning_rate": 4.131272943970403e-05, "loss": 0.2318, "step": 9968 }, { "epoch": 1.1821415866239773, "grad_norm": 1.0669259034299514, "learning_rate": 4.131091041688274e-05, "loss": 0.2135, "step": 9969 }, { "epoch": 1.1822601683861023, "grad_norm": 1.1012305396783593, "learning_rate": 4.130909124369485e-05, "loss": 0.273, "step": 9970 }, { "epoch": 1.1823787501482272, "grad_norm": 1.4773069732204058, "learning_rate": 4.130727192015714e-05, "loss": 0.3403, "step": 9971 }, { "epoch": 1.1824973319103522, "grad_norm": 0.899201688894045, "learning_rate": 4.130545244628639e-05, "loss": 0.2304, "step": 9972 }, { "epoch": 1.1826159136724772, "grad_norm": 1.944712495797384, "learning_rate": 4.130363282209935e-05, "loss": 0.4701, "step": 9973 }, { "epoch": 1.1827344954346022, "grad_norm": 0.8137255167219068, "learning_rate": 4.130181304761281e-05, "loss": 0.1747, "step": 9974 }, { "epoch": 1.1828530771967272, "grad_norm": 1.631956465355469, "learning_rate": 4.129999312284355e-05, "loss": 0.3132, "step": 9975 }, { "epoch": 1.1829716589588521, "grad_norm": 1.0177764839277437, "learning_rate": 4.129817304780834e-05, "loss": 0.2113, "step": 9976 }, { "epoch": 1.1830902407209771, "grad_norm": 0.7716437464896979, "learning_rate": 4.129635282252396e-05, "loss": 0.1778, "step": 9977 }, { "epoch": 1.183208822483102, "grad_norm": 1.2346177430571956, "learning_rate": 4.1294532447007194e-05, "loss": 0.257, "step": 9978 }, { "epoch": 1.183327404245227, "grad_norm": 1.5415806149421716, "learning_rate": 4.129271192127482e-05, "loss": 0.3339, "step": 9979 }, { "epoch": 1.183445986007352, "grad_norm": 1.020769336155493, "learning_rate": 4.129089124534362e-05, "loss": 0.2212, "step": 9980 }, { "epoch": 1.183564567769477, "grad_norm": 1.0864521433861034, "learning_rate": 4.128907041923038e-05, "loss": 0.2162, "step": 9981 }, { "epoch": 1.183683149531602, "grad_norm": 1.2607925093747274, "learning_rate": 4.128724944295188e-05, "loss": 0.3024, "step": 9982 }, { "epoch": 1.183801731293727, "grad_norm": 0.8371038524811966, "learning_rate": 4.128542831652492e-05, "loss": 0.2069, "step": 9983 }, { "epoch": 1.183920313055852, "grad_norm": 1.247981430858475, "learning_rate": 4.128360703996628e-05, "loss": 0.2543, "step": 9984 }, { "epoch": 1.184038894817977, "grad_norm": 1.030058554382611, "learning_rate": 4.128178561329275e-05, "loss": 0.2269, "step": 9985 }, { "epoch": 1.184157476580102, "grad_norm": 1.2731577244632462, "learning_rate": 4.1279964036521126e-05, "loss": 0.305, "step": 9986 }, { "epoch": 1.184276058342227, "grad_norm": 0.9746936237369059, "learning_rate": 4.1278142309668193e-05, "loss": 0.1719, "step": 9987 }, { "epoch": 1.184394640104352, "grad_norm": 1.2081597023803257, "learning_rate": 4.127632043275075e-05, "loss": 0.3007, "step": 9988 }, { "epoch": 1.184513221866477, "grad_norm": 1.1196884408606516, "learning_rate": 4.1274498405785594e-05, "loss": 0.2249, "step": 9989 }, { "epoch": 1.1846318036286019, "grad_norm": 1.1754652987737844, "learning_rate": 4.127267622878952e-05, "loss": 0.2012, "step": 9990 }, { "epoch": 1.1847503853907269, "grad_norm": 1.1389923545073035, "learning_rate": 4.1270853901779326e-05, "loss": 0.2571, "step": 9991 }, { "epoch": 1.1848689671528518, "grad_norm": 1.0346201405897446, "learning_rate": 4.126903142477181e-05, "loss": 0.2623, "step": 9992 }, { "epoch": 1.1849875489149768, "grad_norm": 0.8393651470813581, "learning_rate": 4.1267208797783774e-05, "loss": 0.1585, "step": 9993 }, { "epoch": 1.1851061306771018, "grad_norm": 0.9388010736867392, "learning_rate": 4.126538602083202e-05, "loss": 0.1895, "step": 9994 }, { "epoch": 1.1852247124392268, "grad_norm": 0.969202782116698, "learning_rate": 4.126356309393335e-05, "loss": 0.2397, "step": 9995 }, { "epoch": 1.1853432942013518, "grad_norm": 1.34194856154961, "learning_rate": 4.1261740017104585e-05, "loss": 0.2568, "step": 9996 }, { "epoch": 1.1854618759634767, "grad_norm": 1.0472136527742688, "learning_rate": 4.1259916790362504e-05, "loss": 0.1761, "step": 9997 }, { "epoch": 1.1855804577256017, "grad_norm": 1.0150440806496466, "learning_rate": 4.1258093413723944e-05, "loss": 0.1927, "step": 9998 }, { "epoch": 1.1856990394877267, "grad_norm": 0.8894385428180444, "learning_rate": 4.125626988720569e-05, "loss": 0.2027, "step": 9999 }, { "epoch": 1.1858176212498517, "grad_norm": 0.881098564401981, "learning_rate": 4.125444621082456e-05, "loss": 0.1994, "step": 10000 }, { "epoch": 1.1859362030119767, "grad_norm": 0.9737219072876793, "learning_rate": 4.1252622384597375e-05, "loss": 0.2348, "step": 10001 }, { "epoch": 1.1860547847741016, "grad_norm": 0.8533148673286778, "learning_rate": 4.125079840854094e-05, "loss": 0.2001, "step": 10002 }, { "epoch": 1.1861733665362268, "grad_norm": 1.4647298068381414, "learning_rate": 4.124897428267207e-05, "loss": 0.2837, "step": 10003 }, { "epoch": 1.1862919482983516, "grad_norm": 1.3187824055324755, "learning_rate": 4.124715000700758e-05, "loss": 0.2605, "step": 10004 }, { "epoch": 1.1864105300604768, "grad_norm": 1.7101149000760858, "learning_rate": 4.1245325581564295e-05, "loss": 0.386, "step": 10005 }, { "epoch": 1.1865291118226016, "grad_norm": 1.1252107640982718, "learning_rate": 4.124350100635904e-05, "loss": 0.2499, "step": 10006 }, { "epoch": 1.1866476935847268, "grad_norm": 0.8412698370908729, "learning_rate": 4.124167628140861e-05, "loss": 0.1456, "step": 10007 }, { "epoch": 1.1867662753468518, "grad_norm": 1.1121246103907676, "learning_rate": 4.123985140672984e-05, "loss": 0.2193, "step": 10008 }, { "epoch": 1.1868848571089767, "grad_norm": 1.4925966309436212, "learning_rate": 4.1238026382339564e-05, "loss": 0.2634, "step": 10009 }, { "epoch": 1.1870034388711017, "grad_norm": 1.6233904180461858, "learning_rate": 4.123620120825459e-05, "loss": 0.4017, "step": 10010 }, { "epoch": 1.1871220206332267, "grad_norm": 1.872639664823403, "learning_rate": 4.123437588449176e-05, "loss": 0.3652, "step": 10011 }, { "epoch": 1.1872406023953517, "grad_norm": 1.3672756235604047, "learning_rate": 4.123255041106788e-05, "loss": 0.2782, "step": 10012 }, { "epoch": 1.1873591841574767, "grad_norm": 0.975838306491177, "learning_rate": 4.123072478799981e-05, "loss": 0.1946, "step": 10013 }, { "epoch": 1.1874777659196016, "grad_norm": 0.9565713435940582, "learning_rate": 4.122889901530434e-05, "loss": 0.1999, "step": 10014 }, { "epoch": 1.1875963476817266, "grad_norm": 0.8348810217156815, "learning_rate": 4.122707309299834e-05, "loss": 0.1577, "step": 10015 }, { "epoch": 1.1877149294438516, "grad_norm": 1.2287444585258527, "learning_rate": 4.122524702109861e-05, "loss": 0.3203, "step": 10016 }, { "epoch": 1.1878335112059766, "grad_norm": 1.2112152892478654, "learning_rate": 4.1223420799622e-05, "loss": 0.254, "step": 10017 }, { "epoch": 1.1879520929681016, "grad_norm": 1.0246462970061452, "learning_rate": 4.122159442858535e-05, "loss": 0.1981, "step": 10018 }, { "epoch": 1.1880706747302265, "grad_norm": 1.2763974390949775, "learning_rate": 4.1219767908005494e-05, "loss": 0.2967, "step": 10019 }, { "epoch": 1.1881892564923515, "grad_norm": 1.0191008102146855, "learning_rate": 4.121794123789926e-05, "loss": 0.2076, "step": 10020 }, { "epoch": 1.1883078382544765, "grad_norm": 1.0982502205477862, "learning_rate": 4.12161144182835e-05, "loss": 0.2287, "step": 10021 }, { "epoch": 1.1884264200166015, "grad_norm": 1.2775144964224978, "learning_rate": 4.121428744917506e-05, "loss": 0.2909, "step": 10022 }, { "epoch": 1.1885450017787265, "grad_norm": 1.0537366426450205, "learning_rate": 4.1212460330590755e-05, "loss": 0.2321, "step": 10023 }, { "epoch": 1.1886635835408514, "grad_norm": 0.9737574393698872, "learning_rate": 4.121063306254746e-05, "loss": 0.2385, "step": 10024 }, { "epoch": 1.1887821653029764, "grad_norm": 1.1708321551836631, "learning_rate": 4.1208805645062e-05, "loss": 0.2869, "step": 10025 }, { "epoch": 1.1889007470651014, "grad_norm": 1.1327364750686586, "learning_rate": 4.120697807815124e-05, "loss": 0.2442, "step": 10026 }, { "epoch": 1.1890193288272264, "grad_norm": 1.0383264198009878, "learning_rate": 4.1205150361832004e-05, "loss": 0.2008, "step": 10027 }, { "epoch": 1.1891379105893514, "grad_norm": 1.342640713691856, "learning_rate": 4.1203322496121156e-05, "loss": 0.2899, "step": 10028 }, { "epoch": 1.1892564923514763, "grad_norm": 0.9124764615881577, "learning_rate": 4.120149448103554e-05, "loss": 0.1979, "step": 10029 }, { "epoch": 1.1893750741136013, "grad_norm": 1.1089271492263129, "learning_rate": 4.119966631659202e-05, "loss": 0.259, "step": 10030 }, { "epoch": 1.1894936558757263, "grad_norm": 1.2162980141605995, "learning_rate": 4.1197838002807444e-05, "loss": 0.2527, "step": 10031 }, { "epoch": 1.1896122376378513, "grad_norm": 1.2187457255297345, "learning_rate": 4.1196009539698665e-05, "loss": 0.2635, "step": 10032 }, { "epoch": 1.1897308193999763, "grad_norm": 1.3382176576802678, "learning_rate": 4.1194180927282534e-05, "loss": 0.2451, "step": 10033 }, { "epoch": 1.1898494011621013, "grad_norm": 1.3328680025956914, "learning_rate": 4.119235216557591e-05, "loss": 0.2787, "step": 10034 }, { "epoch": 1.1899679829242262, "grad_norm": 1.7024757450642831, "learning_rate": 4.119052325459566e-05, "loss": 0.3156, "step": 10035 }, { "epoch": 1.1900865646863512, "grad_norm": 1.305684560019124, "learning_rate": 4.118869419435864e-05, "loss": 0.2792, "step": 10036 }, { "epoch": 1.1902051464484762, "grad_norm": 1.4580031137522618, "learning_rate": 4.118686498488171e-05, "loss": 0.3527, "step": 10037 }, { "epoch": 1.1903237282106012, "grad_norm": 1.2809001717622404, "learning_rate": 4.118503562618173e-05, "loss": 0.2849, "step": 10038 }, { "epoch": 1.1904423099727262, "grad_norm": 1.1052808408011252, "learning_rate": 4.118320611827558e-05, "loss": 0.2333, "step": 10039 }, { "epoch": 1.1905608917348511, "grad_norm": 0.9615671160407735, "learning_rate": 4.118137646118011e-05, "loss": 0.1941, "step": 10040 }, { "epoch": 1.1906794734969761, "grad_norm": 0.99486064818346, "learning_rate": 4.1179546654912194e-05, "loss": 0.2225, "step": 10041 }, { "epoch": 1.190798055259101, "grad_norm": 1.5000986941521068, "learning_rate": 4.1177716699488686e-05, "loss": 0.3139, "step": 10042 }, { "epoch": 1.190916637021226, "grad_norm": 0.9148342288496802, "learning_rate": 4.117588659492648e-05, "loss": 0.2072, "step": 10043 }, { "epoch": 1.191035218783351, "grad_norm": 0.7814348508110738, "learning_rate": 4.117405634124244e-05, "loss": 0.1848, "step": 10044 }, { "epoch": 1.191153800545476, "grad_norm": 0.9514781810529025, "learning_rate": 4.117222593845343e-05, "loss": 0.2113, "step": 10045 }, { "epoch": 1.191272382307601, "grad_norm": 1.1813576145761178, "learning_rate": 4.1170395386576324e-05, "loss": 0.2665, "step": 10046 }, { "epoch": 1.191390964069726, "grad_norm": 1.0993275348987124, "learning_rate": 4.1168564685628006e-05, "loss": 0.2255, "step": 10047 }, { "epoch": 1.191509545831851, "grad_norm": 0.8468399693028651, "learning_rate": 4.116673383562535e-05, "loss": 0.1559, "step": 10048 }, { "epoch": 1.191628127593976, "grad_norm": 1.2752742849027023, "learning_rate": 4.116490283658523e-05, "loss": 0.271, "step": 10049 }, { "epoch": 1.191746709356101, "grad_norm": 1.01190539989614, "learning_rate": 4.116307168852452e-05, "loss": 0.2137, "step": 10050 }, { "epoch": 1.191865291118226, "grad_norm": 1.10561980420319, "learning_rate": 4.116124039146012e-05, "loss": 0.2095, "step": 10051 }, { "epoch": 1.1919838728803511, "grad_norm": 1.1340971778359288, "learning_rate": 4.1159408945408905e-05, "loss": 0.233, "step": 10052 }, { "epoch": 1.1921024546424759, "grad_norm": 1.480943538506598, "learning_rate": 4.1157577350387746e-05, "loss": 0.345, "step": 10053 }, { "epoch": 1.192221036404601, "grad_norm": 1.254460877011242, "learning_rate": 4.1155745606413546e-05, "loss": 0.2563, "step": 10054 }, { "epoch": 1.1923396181667258, "grad_norm": 1.1760302150679185, "learning_rate": 4.115391371350318e-05, "loss": 0.2273, "step": 10055 }, { "epoch": 1.192458199928851, "grad_norm": 0.9354294146194708, "learning_rate": 4.1152081671673536e-05, "loss": 0.2428, "step": 10056 }, { "epoch": 1.1925767816909758, "grad_norm": 0.8622930063910595, "learning_rate": 4.115024948094151e-05, "loss": 0.1836, "step": 10057 }, { "epoch": 1.192695363453101, "grad_norm": 1.11930570261697, "learning_rate": 4.114841714132399e-05, "loss": 0.2401, "step": 10058 }, { "epoch": 1.192813945215226, "grad_norm": 1.002601771952509, "learning_rate": 4.1146584652837864e-05, "loss": 0.2655, "step": 10059 }, { "epoch": 1.192932526977351, "grad_norm": 1.1950791654862472, "learning_rate": 4.114475201550003e-05, "loss": 0.2535, "step": 10060 }, { "epoch": 1.193051108739476, "grad_norm": 1.2809739855902749, "learning_rate": 4.114291922932737e-05, "loss": 0.2397, "step": 10061 }, { "epoch": 1.193169690501601, "grad_norm": 1.0530885695552947, "learning_rate": 4.114108629433681e-05, "loss": 0.2424, "step": 10062 }, { "epoch": 1.193288272263726, "grad_norm": 1.1318306463590113, "learning_rate": 4.11392532105452e-05, "loss": 0.228, "step": 10063 }, { "epoch": 1.193406854025851, "grad_norm": 1.3409888128825862, "learning_rate": 4.113741997796949e-05, "loss": 0.295, "step": 10064 }, { "epoch": 1.1935254357879759, "grad_norm": 1.4078713392286897, "learning_rate": 4.113558659662655e-05, "loss": 0.2619, "step": 10065 }, { "epoch": 1.1936440175501009, "grad_norm": 0.9910690185514176, "learning_rate": 4.1133753066533287e-05, "loss": 0.1899, "step": 10066 }, { "epoch": 1.1937625993122258, "grad_norm": 1.8223101539495545, "learning_rate": 4.113191938770661e-05, "loss": 0.2983, "step": 10067 }, { "epoch": 1.1938811810743508, "grad_norm": 0.8850762581803602, "learning_rate": 4.1130085560163413e-05, "loss": 0.1862, "step": 10068 }, { "epoch": 1.1939997628364758, "grad_norm": 1.2139070059443837, "learning_rate": 4.112825158392061e-05, "loss": 0.3049, "step": 10069 }, { "epoch": 1.1941183445986008, "grad_norm": 1.0318096176686138, "learning_rate": 4.112641745899509e-05, "loss": 0.2284, "step": 10070 }, { "epoch": 1.1942369263607258, "grad_norm": 1.4071475200101742, "learning_rate": 4.11245831854038e-05, "loss": 0.3364, "step": 10071 }, { "epoch": 1.1943555081228507, "grad_norm": 0.6896891120633695, "learning_rate": 4.112274876316362e-05, "loss": 0.1537, "step": 10072 }, { "epoch": 1.1944740898849757, "grad_norm": 1.1305616906142837, "learning_rate": 4.112091419229146e-05, "loss": 0.1962, "step": 10073 }, { "epoch": 1.1945926716471007, "grad_norm": 1.2711411237027763, "learning_rate": 4.111907947280424e-05, "loss": 0.2851, "step": 10074 }, { "epoch": 1.1947112534092257, "grad_norm": 0.9873327457294878, "learning_rate": 4.1117244604718885e-05, "loss": 0.2085, "step": 10075 }, { "epoch": 1.1948298351713507, "grad_norm": 0.9906426628166938, "learning_rate": 4.111540958805229e-05, "loss": 0.1932, "step": 10076 }, { "epoch": 1.1949484169334756, "grad_norm": 1.0290947331949611, "learning_rate": 4.1113574422821386e-05, "loss": 0.2523, "step": 10077 }, { "epoch": 1.1950669986956006, "grad_norm": 1.0091965916158459, "learning_rate": 4.111173910904308e-05, "loss": 0.2118, "step": 10078 }, { "epoch": 1.1951855804577256, "grad_norm": 1.3905273924028243, "learning_rate": 4.110990364673429e-05, "loss": 0.2777, "step": 10079 }, { "epoch": 1.1953041622198506, "grad_norm": 1.0163832196534681, "learning_rate": 4.1108068035911953e-05, "loss": 0.1906, "step": 10080 }, { "epoch": 1.1954227439819756, "grad_norm": 1.2341878186217987, "learning_rate": 4.110623227659298e-05, "loss": 0.2672, "step": 10081 }, { "epoch": 1.1955413257441005, "grad_norm": 1.0828517545624288, "learning_rate": 4.1104396368794305e-05, "loss": 0.2219, "step": 10082 }, { "epoch": 1.1956599075062255, "grad_norm": 1.3479305823618697, "learning_rate": 4.1102560312532836e-05, "loss": 0.3233, "step": 10083 }, { "epoch": 1.1957784892683505, "grad_norm": 1.425892492914055, "learning_rate": 4.110072410782551e-05, "loss": 0.296, "step": 10084 }, { "epoch": 1.1958970710304755, "grad_norm": 1.0874423473863672, "learning_rate": 4.1098887754689244e-05, "loss": 0.2449, "step": 10085 }, { "epoch": 1.1960156527926005, "grad_norm": 1.0316214363441358, "learning_rate": 4.109705125314098e-05, "loss": 0.1924, "step": 10086 }, { "epoch": 1.1961342345547255, "grad_norm": 1.1897217944846108, "learning_rate": 4.109521460319764e-05, "loss": 0.224, "step": 10087 }, { "epoch": 1.1962528163168504, "grad_norm": 1.0796877274996588, "learning_rate": 4.1093377804876164e-05, "loss": 0.223, "step": 10088 }, { "epoch": 1.1963713980789754, "grad_norm": 1.4292158933650818, "learning_rate": 4.109154085819348e-05, "loss": 0.3201, "step": 10089 }, { "epoch": 1.1964899798411004, "grad_norm": 1.077672814351693, "learning_rate": 4.108970376316652e-05, "loss": 0.2314, "step": 10090 }, { "epoch": 1.1966085616032254, "grad_norm": 0.9703955192781616, "learning_rate": 4.108786651981222e-05, "loss": 0.2307, "step": 10091 }, { "epoch": 1.1967271433653504, "grad_norm": 0.9929272234996583, "learning_rate": 4.108602912814752e-05, "loss": 0.2473, "step": 10092 }, { "epoch": 1.1968457251274753, "grad_norm": 1.6204028045635575, "learning_rate": 4.108419158818935e-05, "loss": 0.2824, "step": 10093 }, { "epoch": 1.1969643068896003, "grad_norm": 1.1126336367559204, "learning_rate": 4.108235389995467e-05, "loss": 0.2098, "step": 10094 }, { "epoch": 1.1970828886517253, "grad_norm": 1.3047808175374893, "learning_rate": 4.1080516063460404e-05, "loss": 0.257, "step": 10095 }, { "epoch": 1.1972014704138503, "grad_norm": 1.5239748642365574, "learning_rate": 4.1078678078723496e-05, "loss": 0.4302, "step": 10096 }, { "epoch": 1.1973200521759753, "grad_norm": 0.9021567292839666, "learning_rate": 4.10768399457609e-05, "loss": 0.1945, "step": 10097 }, { "epoch": 1.1974386339381002, "grad_norm": 1.2210753135489492, "learning_rate": 4.107500166458954e-05, "loss": 0.2492, "step": 10098 }, { "epoch": 1.1975572157002252, "grad_norm": 1.7901417681180676, "learning_rate": 4.107316323522639e-05, "loss": 0.3029, "step": 10099 }, { "epoch": 1.1976757974623502, "grad_norm": 1.2207776807407729, "learning_rate": 4.107132465768838e-05, "loss": 0.27, "step": 10100 }, { "epoch": 1.1977943792244752, "grad_norm": 1.1523701573429772, "learning_rate": 4.1069485931992476e-05, "loss": 0.2149, "step": 10101 }, { "epoch": 1.1979129609866002, "grad_norm": 1.3618277761590907, "learning_rate": 4.10676470581556e-05, "loss": 0.2334, "step": 10102 }, { "epoch": 1.1980315427487254, "grad_norm": 1.220199250547045, "learning_rate": 4.106580803619473e-05, "loss": 0.2753, "step": 10103 }, { "epoch": 1.1981501245108501, "grad_norm": 1.064974511226199, "learning_rate": 4.106396886612681e-05, "loss": 0.1995, "step": 10104 }, { "epoch": 1.1982687062729753, "grad_norm": 1.4312149235302567, "learning_rate": 4.10621295479688e-05, "loss": 0.2297, "step": 10105 }, { "epoch": 1.1983872880351, "grad_norm": 1.1388420398243768, "learning_rate": 4.106029008173765e-05, "loss": 0.244, "step": 10106 }, { "epoch": 1.1985058697972253, "grad_norm": 0.9473173268032994, "learning_rate": 4.1058450467450314e-05, "loss": 0.1968, "step": 10107 }, { "epoch": 1.1986244515593503, "grad_norm": 1.1399923720215428, "learning_rate": 4.105661070512377e-05, "loss": 0.2484, "step": 10108 }, { "epoch": 1.1987430333214752, "grad_norm": 0.7975345000555337, "learning_rate": 4.1054770794774955e-05, "loss": 0.164, "step": 10109 }, { "epoch": 1.1988616150836002, "grad_norm": 1.3836767097907945, "learning_rate": 4.105293073642084e-05, "loss": 0.3196, "step": 10110 }, { "epoch": 1.1989801968457252, "grad_norm": 0.838408440100419, "learning_rate": 4.105109053007839e-05, "loss": 0.1594, "step": 10111 }, { "epoch": 1.1990987786078502, "grad_norm": 1.0772857030288403, "learning_rate": 4.104925017576458e-05, "loss": 0.2494, "step": 10112 }, { "epoch": 1.1992173603699752, "grad_norm": 1.4583294271764262, "learning_rate": 4.104740967349635e-05, "loss": 0.3401, "step": 10113 }, { "epoch": 1.1993359421321002, "grad_norm": 1.7702943474503443, "learning_rate": 4.104556902329069e-05, "loss": 0.3734, "step": 10114 }, { "epoch": 1.1994545238942251, "grad_norm": 1.1529287201234797, "learning_rate": 4.1043728225164544e-05, "loss": 0.2544, "step": 10115 }, { "epoch": 1.1995731056563501, "grad_norm": 1.5990518288830944, "learning_rate": 4.104188727913492e-05, "loss": 0.3738, "step": 10116 }, { "epoch": 1.199691687418475, "grad_norm": 1.0887062679382393, "learning_rate": 4.1040046185218756e-05, "loss": 0.2249, "step": 10117 }, { "epoch": 1.1998102691806, "grad_norm": 1.4283878740281928, "learning_rate": 4.1038204943433036e-05, "loss": 0.3386, "step": 10118 }, { "epoch": 1.199928850942725, "grad_norm": 1.2021059683705713, "learning_rate": 4.103636355379473e-05, "loss": 0.2849, "step": 10119 }, { "epoch": 1.20004743270485, "grad_norm": 1.1353258113817053, "learning_rate": 4.103452201632083e-05, "loss": 0.2579, "step": 10120 }, { "epoch": 1.200166014466975, "grad_norm": 1.0716221811004785, "learning_rate": 4.1032680331028285e-05, "loss": 0.2471, "step": 10121 }, { "epoch": 1.2002845962291, "grad_norm": 1.064036276251583, "learning_rate": 4.1030838497934096e-05, "loss": 0.2319, "step": 10122 }, { "epoch": 1.200403177991225, "grad_norm": 1.0551527579060316, "learning_rate": 4.102899651705523e-05, "loss": 0.1501, "step": 10123 }, { "epoch": 1.20052175975335, "grad_norm": 1.389950639256929, "learning_rate": 4.102715438840867e-05, "loss": 0.3737, "step": 10124 }, { "epoch": 1.200640341515475, "grad_norm": 1.1791191977489621, "learning_rate": 4.1025312112011415e-05, "loss": 0.2206, "step": 10125 }, { "epoch": 1.2007589232776, "grad_norm": 1.1576957436745274, "learning_rate": 4.102346968788042e-05, "loss": 0.3197, "step": 10126 }, { "epoch": 1.200877505039725, "grad_norm": 1.1800929722347389, "learning_rate": 4.102162711603269e-05, "loss": 0.2357, "step": 10127 }, { "epoch": 1.2009960868018499, "grad_norm": 1.0819533359904676, "learning_rate": 4.1019784396485205e-05, "loss": 0.2657, "step": 10128 }, { "epoch": 1.2011146685639749, "grad_norm": 1.8265215751647212, "learning_rate": 4.1017941529254955e-05, "loss": 0.3905, "step": 10129 }, { "epoch": 1.2012332503260998, "grad_norm": 1.1358342153756338, "learning_rate": 4.101609851435892e-05, "loss": 0.2339, "step": 10130 }, { "epoch": 1.2013518320882248, "grad_norm": 1.101327878669638, "learning_rate": 4.10142553518141e-05, "loss": 0.1796, "step": 10131 }, { "epoch": 1.2014704138503498, "grad_norm": 0.936585809948274, "learning_rate": 4.1012412041637483e-05, "loss": 0.2048, "step": 10132 }, { "epoch": 1.2015889956124748, "grad_norm": 1.0308399496397147, "learning_rate": 4.101056858384606e-05, "loss": 0.2356, "step": 10133 }, { "epoch": 1.2017075773745998, "grad_norm": 0.9546384594511809, "learning_rate": 4.100872497845683e-05, "loss": 0.2272, "step": 10134 }, { "epoch": 1.2018261591367247, "grad_norm": 1.195640423158244, "learning_rate": 4.1006881225486794e-05, "loss": 0.227, "step": 10135 }, { "epoch": 1.2019447408988497, "grad_norm": 1.7188517006694826, "learning_rate": 4.1005037324952934e-05, "loss": 0.3612, "step": 10136 }, { "epoch": 1.2020633226609747, "grad_norm": 1.2529696254001514, "learning_rate": 4.1003193276872256e-05, "loss": 0.3062, "step": 10137 }, { "epoch": 1.2021819044230997, "grad_norm": 1.0569311222613382, "learning_rate": 4.1001349081261756e-05, "loss": 0.2565, "step": 10138 }, { "epoch": 1.2023004861852247, "grad_norm": 1.0313310799546145, "learning_rate": 4.099950473813845e-05, "loss": 0.2163, "step": 10139 }, { "epoch": 1.2024190679473497, "grad_norm": 1.4167794733864791, "learning_rate": 4.0997660247519324e-05, "loss": 0.2814, "step": 10140 }, { "epoch": 1.2025376497094746, "grad_norm": 0.9442673676433876, "learning_rate": 4.0995815609421394e-05, "loss": 0.221, "step": 10141 }, { "epoch": 1.2026562314715996, "grad_norm": 1.434688029996051, "learning_rate": 4.099397082386166e-05, "loss": 0.2853, "step": 10142 }, { "epoch": 1.2027748132337246, "grad_norm": 1.1550169483429702, "learning_rate": 4.099212589085712e-05, "loss": 0.2873, "step": 10143 }, { "epoch": 1.2028933949958496, "grad_norm": 1.5369203569969059, "learning_rate": 4.09902808104248e-05, "loss": 0.3733, "step": 10144 }, { "epoch": 1.2030119767579746, "grad_norm": 1.1232691428485828, "learning_rate": 4.098843558258168e-05, "loss": 0.2502, "step": 10145 }, { "epoch": 1.2031305585200995, "grad_norm": 1.6668873679469038, "learning_rate": 4.098659020734481e-05, "loss": 0.2806, "step": 10146 }, { "epoch": 1.2032491402822245, "grad_norm": 1.134385391374349, "learning_rate": 4.098474468473118e-05, "loss": 0.2468, "step": 10147 }, { "epoch": 1.2033677220443495, "grad_norm": 1.3438778018297415, "learning_rate": 4.09828990147578e-05, "loss": 0.2268, "step": 10148 }, { "epoch": 1.2034863038064745, "grad_norm": 1.2946938991409886, "learning_rate": 4.098105319744169e-05, "loss": 0.3574, "step": 10149 }, { "epoch": 1.2036048855685995, "grad_norm": 1.1016701317290052, "learning_rate": 4.097920723279988e-05, "loss": 0.2365, "step": 10150 }, { "epoch": 1.2037234673307244, "grad_norm": 1.2243748979913949, "learning_rate": 4.0977361120849353e-05, "loss": 0.2229, "step": 10151 }, { "epoch": 1.2038420490928496, "grad_norm": 1.3555098215189718, "learning_rate": 4.0975514861607165e-05, "loss": 0.2736, "step": 10152 }, { "epoch": 1.2039606308549744, "grad_norm": 0.9198562896039261, "learning_rate": 4.097366845509031e-05, "loss": 0.1869, "step": 10153 }, { "epoch": 1.2040792126170996, "grad_norm": 1.373201116406091, "learning_rate": 4.097182190131583e-05, "loss": 0.2407, "step": 10154 }, { "epoch": 1.2041977943792244, "grad_norm": 0.9618674156468626, "learning_rate": 4.096997520030073e-05, "loss": 0.2317, "step": 10155 }, { "epoch": 1.2043163761413496, "grad_norm": 0.9527515931393938, "learning_rate": 4.096812835206204e-05, "loss": 0.1907, "step": 10156 }, { "epoch": 1.2044349579034743, "grad_norm": 1.5549003051983408, "learning_rate": 4.09662813566168e-05, "loss": 0.3926, "step": 10157 }, { "epoch": 1.2045535396655995, "grad_norm": 1.1657579496902652, "learning_rate": 4.096443421398202e-05, "loss": 0.2506, "step": 10158 }, { "epoch": 1.2046721214277245, "grad_norm": 1.0796789124999318, "learning_rate": 4.096258692417473e-05, "loss": 0.2003, "step": 10159 }, { "epoch": 1.2047907031898495, "grad_norm": 1.0353586043321399, "learning_rate": 4.0960739487211965e-05, "loss": 0.2044, "step": 10160 }, { "epoch": 1.2049092849519745, "grad_norm": 1.5863920207631093, "learning_rate": 4.0958891903110764e-05, "loss": 0.3363, "step": 10161 }, { "epoch": 1.2050278667140994, "grad_norm": 1.190550368433452, "learning_rate": 4.095704417188814e-05, "loss": 0.2218, "step": 10162 }, { "epoch": 1.2051464484762244, "grad_norm": 1.4497766015624562, "learning_rate": 4.0955196293561134e-05, "loss": 0.3323, "step": 10163 }, { "epoch": 1.2052650302383494, "grad_norm": 1.3540693300328088, "learning_rate": 4.095334826814678e-05, "loss": 0.3355, "step": 10164 }, { "epoch": 1.2053836120004744, "grad_norm": 0.9217079983859838, "learning_rate": 4.095150009566213e-05, "loss": 0.1792, "step": 10165 }, { "epoch": 1.2055021937625994, "grad_norm": 1.465627581681539, "learning_rate": 4.0949651776124205e-05, "loss": 0.3237, "step": 10166 }, { "epoch": 1.2056207755247244, "grad_norm": 1.3377624261039944, "learning_rate": 4.094780330955005e-05, "loss": 0.2164, "step": 10167 }, { "epoch": 1.2057393572868493, "grad_norm": 0.9847702014764638, "learning_rate": 4.09459546959567e-05, "loss": 0.2422, "step": 10168 }, { "epoch": 1.2058579390489743, "grad_norm": 1.0074892722766746, "learning_rate": 4.094410593536121e-05, "loss": 0.2148, "step": 10169 }, { "epoch": 1.2059765208110993, "grad_norm": 1.0186595149053943, "learning_rate": 4.094225702778061e-05, "loss": 0.2394, "step": 10170 }, { "epoch": 1.2060951025732243, "grad_norm": 1.2149873654893601, "learning_rate": 4.094040797323195e-05, "loss": 0.2761, "step": 10171 }, { "epoch": 1.2062136843353493, "grad_norm": 1.0304441089442282, "learning_rate": 4.093855877173228e-05, "loss": 0.1904, "step": 10172 }, { "epoch": 1.2063322660974742, "grad_norm": 0.9338174234477378, "learning_rate": 4.093670942329864e-05, "loss": 0.1893, "step": 10173 }, { "epoch": 1.2064508478595992, "grad_norm": 0.7584986024159174, "learning_rate": 4.093485992794809e-05, "loss": 0.1817, "step": 10174 }, { "epoch": 1.2065694296217242, "grad_norm": 1.2762350995987157, "learning_rate": 4.093301028569766e-05, "loss": 0.2703, "step": 10175 }, { "epoch": 1.2066880113838492, "grad_norm": 1.0565452179733306, "learning_rate": 4.093116049656442e-05, "loss": 0.2249, "step": 10176 }, { "epoch": 1.2068065931459742, "grad_norm": 1.1925868722593789, "learning_rate": 4.092931056056542e-05, "loss": 0.3015, "step": 10177 }, { "epoch": 1.2069251749080991, "grad_norm": 0.9155573596006452, "learning_rate": 4.092746047771769e-05, "loss": 0.1967, "step": 10178 }, { "epoch": 1.2070437566702241, "grad_norm": 0.9694333383942133, "learning_rate": 4.0925610248038326e-05, "loss": 0.1916, "step": 10179 }, { "epoch": 1.207162338432349, "grad_norm": 1.205043977822242, "learning_rate": 4.092375987154436e-05, "loss": 0.2978, "step": 10180 }, { "epoch": 1.207280920194474, "grad_norm": 2.0187936664709665, "learning_rate": 4.0921909348252856e-05, "loss": 0.4646, "step": 10181 }, { "epoch": 1.207399501956599, "grad_norm": 1.1556435811578174, "learning_rate": 4.092005867818087e-05, "loss": 0.2126, "step": 10182 }, { "epoch": 1.207518083718724, "grad_norm": 0.9428100955597953, "learning_rate": 4.091820786134547e-05, "loss": 0.1871, "step": 10183 }, { "epoch": 1.207636665480849, "grad_norm": 1.1357021714374476, "learning_rate": 4.091635689776371e-05, "loss": 0.2679, "step": 10184 }, { "epoch": 1.207755247242974, "grad_norm": 0.9922596132523961, "learning_rate": 4.091450578745266e-05, "loss": 0.2135, "step": 10185 }, { "epoch": 1.207873829005099, "grad_norm": 1.072713743020643, "learning_rate": 4.091265453042937e-05, "loss": 0.185, "step": 10186 }, { "epoch": 1.207992410767224, "grad_norm": 0.9650377249297603, "learning_rate": 4.091080312671094e-05, "loss": 0.2411, "step": 10187 }, { "epoch": 1.208110992529349, "grad_norm": 1.1383697024052295, "learning_rate": 4.0908951576314404e-05, "loss": 0.2015, "step": 10188 }, { "epoch": 1.208229574291474, "grad_norm": 0.9502630178513273, "learning_rate": 4.0907099879256836e-05, "loss": 0.2003, "step": 10189 }, { "epoch": 1.208348156053599, "grad_norm": 0.7955936632410228, "learning_rate": 4.090524803555533e-05, "loss": 0.1835, "step": 10190 }, { "epoch": 1.208466737815724, "grad_norm": 1.2611954705512252, "learning_rate": 4.090339604522693e-05, "loss": 0.2318, "step": 10191 }, { "epoch": 1.2085853195778489, "grad_norm": 0.9190337939460484, "learning_rate": 4.090154390828872e-05, "loss": 0.1776, "step": 10192 }, { "epoch": 1.2087039013399739, "grad_norm": 1.1839339866022651, "learning_rate": 4.0899691624757786e-05, "loss": 0.2182, "step": 10193 }, { "epoch": 1.2088224831020988, "grad_norm": 1.2847279009215706, "learning_rate": 4.089783919465118e-05, "loss": 0.3102, "step": 10194 }, { "epoch": 1.2089410648642238, "grad_norm": 1.1023585371658664, "learning_rate": 4.0895986617986e-05, "loss": 0.2306, "step": 10195 }, { "epoch": 1.2090596466263488, "grad_norm": 1.1244134350481656, "learning_rate": 4.0894133894779316e-05, "loss": 0.2552, "step": 10196 }, { "epoch": 1.2091782283884738, "grad_norm": 0.9931473144063955, "learning_rate": 4.0892281025048204e-05, "loss": 0.2399, "step": 10197 }, { "epoch": 1.2092968101505988, "grad_norm": 1.1822287499739839, "learning_rate": 4.089042800880975e-05, "loss": 0.2841, "step": 10198 }, { "epoch": 1.2094153919127237, "grad_norm": 0.9060762683156744, "learning_rate": 4.088857484608104e-05, "loss": 0.1796, "step": 10199 }, { "epoch": 1.2095339736748487, "grad_norm": 0.872322498006015, "learning_rate": 4.088672153687915e-05, "loss": 0.2016, "step": 10200 }, { "epoch": 1.2096525554369737, "grad_norm": 0.8663670952857345, "learning_rate": 4.0884868081221174e-05, "loss": 0.1608, "step": 10201 }, { "epoch": 1.2097711371990987, "grad_norm": 0.8651539947637432, "learning_rate": 4.088301447912418e-05, "loss": 0.2232, "step": 10202 }, { "epoch": 1.2098897189612239, "grad_norm": 1.608270089772309, "learning_rate": 4.088116073060528e-05, "loss": 0.3295, "step": 10203 }, { "epoch": 1.2100083007233486, "grad_norm": 1.2352770109862852, "learning_rate": 4.087930683568156e-05, "loss": 0.2213, "step": 10204 }, { "epoch": 1.2101268824854738, "grad_norm": 0.8391347101612521, "learning_rate": 4.087745279437009e-05, "loss": 0.1597, "step": 10205 }, { "epoch": 1.2102454642475986, "grad_norm": 1.2410967343531722, "learning_rate": 4.087559860668798e-05, "loss": 0.2857, "step": 10206 }, { "epoch": 1.2103640460097238, "grad_norm": 1.1709539095412569, "learning_rate": 4.0873744272652325e-05, "loss": 0.1823, "step": 10207 }, { "epoch": 1.2104826277718488, "grad_norm": 1.3002403148307262, "learning_rate": 4.08718897922802e-05, "loss": 0.2529, "step": 10208 }, { "epoch": 1.2106012095339738, "grad_norm": 1.4675355092217133, "learning_rate": 4.087003516558873e-05, "loss": 0.3008, "step": 10209 }, { "epoch": 1.2107197912960987, "grad_norm": 1.28416685775307, "learning_rate": 4.086818039259498e-05, "loss": 0.2312, "step": 10210 }, { "epoch": 1.2108383730582237, "grad_norm": 1.262348458846978, "learning_rate": 4.086632547331608e-05, "loss": 0.2084, "step": 10211 }, { "epoch": 1.2109569548203487, "grad_norm": 1.2289776941981818, "learning_rate": 4.0864470407769114e-05, "loss": 0.2568, "step": 10212 }, { "epoch": 1.2110755365824737, "grad_norm": 1.7375408420393477, "learning_rate": 4.086261519597118e-05, "loss": 0.3488, "step": 10213 }, { "epoch": 1.2111941183445987, "grad_norm": 1.3243500338490757, "learning_rate": 4.0860759837939386e-05, "loss": 0.2741, "step": 10214 }, { "epoch": 1.2113127001067236, "grad_norm": 0.7907342780452906, "learning_rate": 4.085890433369084e-05, "loss": 0.1431, "step": 10215 }, { "epoch": 1.2114312818688486, "grad_norm": 1.1458723521952625, "learning_rate": 4.085704868324264e-05, "loss": 0.273, "step": 10216 }, { "epoch": 1.2115498636309736, "grad_norm": 1.5893087495292009, "learning_rate": 4.08551928866119e-05, "loss": 0.253, "step": 10217 }, { "epoch": 1.2116684453930986, "grad_norm": 0.891560372571105, "learning_rate": 4.085333694381572e-05, "loss": 0.1791, "step": 10218 }, { "epoch": 1.2117870271552236, "grad_norm": 1.3952797129616346, "learning_rate": 4.085148085487122e-05, "loss": 0.2631, "step": 10219 }, { "epoch": 1.2119056089173486, "grad_norm": 1.0798993190310615, "learning_rate": 4.0849624619795504e-05, "loss": 0.209, "step": 10220 }, { "epoch": 1.2120241906794735, "grad_norm": 1.1906924347185477, "learning_rate": 4.0847768238605676e-05, "loss": 0.2098, "step": 10221 }, { "epoch": 1.2121427724415985, "grad_norm": 1.1539969931428957, "learning_rate": 4.084591171131886e-05, "loss": 0.1802, "step": 10222 }, { "epoch": 1.2122613542037235, "grad_norm": 1.0432603102133833, "learning_rate": 4.084405503795218e-05, "loss": 0.222, "step": 10223 }, { "epoch": 1.2123799359658485, "grad_norm": 1.164833997568905, "learning_rate": 4.084219821852274e-05, "loss": 0.2373, "step": 10224 }, { "epoch": 1.2124985177279735, "grad_norm": 0.965253810681616, "learning_rate": 4.084034125304765e-05, "loss": 0.253, "step": 10225 }, { "epoch": 1.2126170994900984, "grad_norm": 0.9264097863019384, "learning_rate": 4.083848414154405e-05, "loss": 0.1713, "step": 10226 }, { "epoch": 1.2127356812522234, "grad_norm": 1.0592164498487315, "learning_rate": 4.083662688402904e-05, "loss": 0.2185, "step": 10227 }, { "epoch": 1.2128542630143484, "grad_norm": 1.457275860385385, "learning_rate": 4.083476948051975e-05, "loss": 0.2816, "step": 10228 }, { "epoch": 1.2129728447764734, "grad_norm": 1.4903222080627936, "learning_rate": 4.083291193103331e-05, "loss": 0.3186, "step": 10229 }, { "epoch": 1.2130914265385984, "grad_norm": 1.122566659933671, "learning_rate": 4.0831054235586836e-05, "loss": 0.221, "step": 10230 }, { "epoch": 1.2132100083007233, "grad_norm": 1.0338368345738906, "learning_rate": 4.082919639419745e-05, "loss": 0.2021, "step": 10231 }, { "epoch": 1.2133285900628483, "grad_norm": 0.9875112832883731, "learning_rate": 4.082733840688229e-05, "loss": 0.1724, "step": 10232 }, { "epoch": 1.2134471718249733, "grad_norm": 1.081448727879954, "learning_rate": 4.082548027365847e-05, "loss": 0.2649, "step": 10233 }, { "epoch": 1.2135657535870983, "grad_norm": 1.502245481414067, "learning_rate": 4.0823621994543136e-05, "loss": 0.3143, "step": 10234 }, { "epoch": 1.2136843353492233, "grad_norm": 1.1478157591358962, "learning_rate": 4.082176356955342e-05, "loss": 0.2655, "step": 10235 }, { "epoch": 1.2138029171113482, "grad_norm": 1.178054348545664, "learning_rate": 4.0819904998706424e-05, "loss": 0.2694, "step": 10236 }, { "epoch": 1.2139214988734732, "grad_norm": 0.8321173507579744, "learning_rate": 4.0818046282019315e-05, "loss": 0.1633, "step": 10237 }, { "epoch": 1.2140400806355982, "grad_norm": 1.454685502400587, "learning_rate": 4.0816187419509224e-05, "loss": 0.2763, "step": 10238 }, { "epoch": 1.2141586623977232, "grad_norm": 0.9586447640542258, "learning_rate": 4.081432841119327e-05, "loss": 0.1947, "step": 10239 }, { "epoch": 1.2142772441598482, "grad_norm": 1.2382880855381473, "learning_rate": 4.0812469257088605e-05, "loss": 0.2724, "step": 10240 }, { "epoch": 1.2143958259219731, "grad_norm": 0.8753838249992335, "learning_rate": 4.081060995721237e-05, "loss": 0.1828, "step": 10241 }, { "epoch": 1.2145144076840981, "grad_norm": 1.3642913008776578, "learning_rate": 4.080875051158168e-05, "loss": 0.2903, "step": 10242 }, { "epoch": 1.214632989446223, "grad_norm": 0.9746579883733816, "learning_rate": 4.080689092021372e-05, "loss": 0.2368, "step": 10243 }, { "epoch": 1.214751571208348, "grad_norm": 0.9208365052701576, "learning_rate": 4.0805031183125594e-05, "loss": 0.2216, "step": 10244 }, { "epoch": 1.214870152970473, "grad_norm": 1.3900279026328566, "learning_rate": 4.080317130033446e-05, "loss": 0.2324, "step": 10245 }, { "epoch": 1.214988734732598, "grad_norm": 1.295535716624153, "learning_rate": 4.0801311271857476e-05, "loss": 0.2556, "step": 10246 }, { "epoch": 1.215107316494723, "grad_norm": 0.9883270327179872, "learning_rate": 4.079945109771177e-05, "loss": 0.2261, "step": 10247 }, { "epoch": 1.215225898256848, "grad_norm": 1.3227013525179128, "learning_rate": 4.0797590777914506e-05, "loss": 0.3109, "step": 10248 }, { "epoch": 1.215344480018973, "grad_norm": 1.746518958296574, "learning_rate": 4.079573031248283e-05, "loss": 0.3725, "step": 10249 }, { "epoch": 1.215463061781098, "grad_norm": 1.9459292155309782, "learning_rate": 4.0793869701433883e-05, "loss": 0.3743, "step": 10250 }, { "epoch": 1.215581643543223, "grad_norm": 1.6105986647054182, "learning_rate": 4.0792008944784826e-05, "loss": 0.4305, "step": 10251 }, { "epoch": 1.2157002253053482, "grad_norm": 0.9910418377457845, "learning_rate": 4.079014804255281e-05, "loss": 0.2051, "step": 10252 }, { "epoch": 1.215818807067473, "grad_norm": 1.2028534158447939, "learning_rate": 4.0788286994755e-05, "loss": 0.3145, "step": 10253 }, { "epoch": 1.2159373888295981, "grad_norm": 0.9788081307773414, "learning_rate": 4.078642580140854e-05, "loss": 0.1889, "step": 10254 }, { "epoch": 1.2160559705917229, "grad_norm": 0.9477192842644316, "learning_rate": 4.078456446253059e-05, "loss": 0.2279, "step": 10255 }, { "epoch": 1.216174552353848, "grad_norm": 1.1928961694698341, "learning_rate": 4.0782702978138323e-05, "loss": 0.2094, "step": 10256 }, { "epoch": 1.2162931341159728, "grad_norm": 1.1109019807824705, "learning_rate": 4.0780841348248875e-05, "loss": 0.2139, "step": 10257 }, { "epoch": 1.216411715878098, "grad_norm": 1.5549994049655793, "learning_rate": 4.077897957287943e-05, "loss": 0.3279, "step": 10258 }, { "epoch": 1.216530297640223, "grad_norm": 0.8197984422674387, "learning_rate": 4.077711765204714e-05, "loss": 0.1871, "step": 10259 }, { "epoch": 1.216648879402348, "grad_norm": 1.4058301647667417, "learning_rate": 4.077525558576918e-05, "loss": 0.2794, "step": 10260 }, { "epoch": 1.216767461164473, "grad_norm": 1.0770198909695483, "learning_rate": 4.07733933740627e-05, "loss": 0.237, "step": 10261 }, { "epoch": 1.216886042926598, "grad_norm": 1.3475563051762618, "learning_rate": 4.077153101694487e-05, "loss": 0.2454, "step": 10262 }, { "epoch": 1.217004624688723, "grad_norm": 1.3316602949519247, "learning_rate": 4.076966851443287e-05, "loss": 0.265, "step": 10263 }, { "epoch": 1.217123206450848, "grad_norm": 1.020668856314294, "learning_rate": 4.076780586654387e-05, "loss": 0.2394, "step": 10264 }, { "epoch": 1.217241788212973, "grad_norm": 1.3940977115137962, "learning_rate": 4.0765943073295035e-05, "loss": 0.3089, "step": 10265 }, { "epoch": 1.217360369975098, "grad_norm": 1.4387248312315777, "learning_rate": 4.076408013470353e-05, "loss": 0.3201, "step": 10266 }, { "epoch": 1.2174789517372229, "grad_norm": 1.4484811668436048, "learning_rate": 4.0762217050786544e-05, "loss": 0.2844, "step": 10267 }, { "epoch": 1.2175975334993479, "grad_norm": 1.370809982485104, "learning_rate": 4.0760353821561245e-05, "loss": 0.3152, "step": 10268 }, { "epoch": 1.2177161152614728, "grad_norm": 0.8523853760152775, "learning_rate": 4.0758490447044805e-05, "loss": 0.1921, "step": 10269 }, { "epoch": 1.2178346970235978, "grad_norm": 1.351058469628779, "learning_rate": 4.0756626927254414e-05, "loss": 0.3423, "step": 10270 }, { "epoch": 1.2179532787857228, "grad_norm": 0.8813399971637731, "learning_rate": 4.075476326220724e-05, "loss": 0.1994, "step": 10271 }, { "epoch": 1.2180718605478478, "grad_norm": 1.2305426745761106, "learning_rate": 4.0752899451920475e-05, "loss": 0.2988, "step": 10272 }, { "epoch": 1.2181904423099728, "grad_norm": 1.4218744293762504, "learning_rate": 4.075103549641129e-05, "loss": 0.3867, "step": 10273 }, { "epoch": 1.2183090240720977, "grad_norm": 1.2401304491389225, "learning_rate": 4.074917139569687e-05, "loss": 0.3173, "step": 10274 }, { "epoch": 1.2184276058342227, "grad_norm": 1.0296452519508474, "learning_rate": 4.074730714979441e-05, "loss": 0.2013, "step": 10275 }, { "epoch": 1.2185461875963477, "grad_norm": 1.080363999820064, "learning_rate": 4.074544275872109e-05, "loss": 0.2161, "step": 10276 }, { "epoch": 1.2186647693584727, "grad_norm": 1.074733604937567, "learning_rate": 4.074357822249408e-05, "loss": 0.2555, "step": 10277 }, { "epoch": 1.2187833511205977, "grad_norm": 0.9837244023557823, "learning_rate": 4.07417135411306e-05, "loss": 0.2022, "step": 10278 }, { "epoch": 1.2189019328827226, "grad_norm": 1.1730818126155909, "learning_rate": 4.0739848714647814e-05, "loss": 0.2586, "step": 10279 }, { "epoch": 1.2190205146448476, "grad_norm": 0.9761842475621036, "learning_rate": 4.0737983743062935e-05, "loss": 0.1841, "step": 10280 }, { "epoch": 1.2191390964069726, "grad_norm": 0.9474092624997897, "learning_rate": 4.073611862639314e-05, "loss": 0.2345, "step": 10281 }, { "epoch": 1.2192576781690976, "grad_norm": 1.0317486687126054, "learning_rate": 4.073425336465563e-05, "loss": 0.2102, "step": 10282 }, { "epoch": 1.2193762599312226, "grad_norm": 1.041609820733456, "learning_rate": 4.0732387957867596e-05, "loss": 0.2133, "step": 10283 }, { "epoch": 1.2194948416933475, "grad_norm": 0.9366668988897691, "learning_rate": 4.073052240604624e-05, "loss": 0.1954, "step": 10284 }, { "epoch": 1.2196134234554725, "grad_norm": 1.5611131767879702, "learning_rate": 4.0728656709208755e-05, "loss": 0.3343, "step": 10285 }, { "epoch": 1.2197320052175975, "grad_norm": 1.1809157245042148, "learning_rate": 4.072679086737235e-05, "loss": 0.2618, "step": 10286 }, { "epoch": 1.2198505869797225, "grad_norm": 1.031965069462004, "learning_rate": 4.072492488055421e-05, "loss": 0.1799, "step": 10287 }, { "epoch": 1.2199691687418475, "grad_norm": 1.0258788719396572, "learning_rate": 4.072305874877155e-05, "loss": 0.2401, "step": 10288 }, { "epoch": 1.2200877505039724, "grad_norm": 0.9382033281246679, "learning_rate": 4.072119247204157e-05, "loss": 0.1892, "step": 10289 }, { "epoch": 1.2202063322660974, "grad_norm": 1.2299562016584125, "learning_rate": 4.0719326050381465e-05, "loss": 0.2897, "step": 10290 }, { "epoch": 1.2203249140282224, "grad_norm": 1.3039396432243335, "learning_rate": 4.071745948380846e-05, "loss": 0.2879, "step": 10291 }, { "epoch": 1.2204434957903474, "grad_norm": 1.3139195329004056, "learning_rate": 4.071559277233975e-05, "loss": 0.2549, "step": 10292 }, { "epoch": 1.2205620775524724, "grad_norm": 0.8750268377486163, "learning_rate": 4.071372591599255e-05, "loss": 0.2093, "step": 10293 }, { "epoch": 1.2206806593145973, "grad_norm": 0.8916521372738684, "learning_rate": 4.0711858914784054e-05, "loss": 0.164, "step": 10294 }, { "epoch": 1.2207992410767223, "grad_norm": 1.1495165176746145, "learning_rate": 4.0709991768731504e-05, "loss": 0.202, "step": 10295 }, { "epoch": 1.2209178228388473, "grad_norm": 1.2823566867758875, "learning_rate": 4.070812447785208e-05, "loss": 0.1934, "step": 10296 }, { "epoch": 1.2210364046009723, "grad_norm": 0.9309214755027723, "learning_rate": 4.070625704216302e-05, "loss": 0.1552, "step": 10297 }, { "epoch": 1.2211549863630973, "grad_norm": 1.0766286903510887, "learning_rate": 4.070438946168153e-05, "loss": 0.2329, "step": 10298 }, { "epoch": 1.2212735681252223, "grad_norm": 1.5477588018276742, "learning_rate": 4.070252173642481e-05, "loss": 0.31, "step": 10299 }, { "epoch": 1.2213921498873472, "grad_norm": 1.1052874841189015, "learning_rate": 4.070065386641011e-05, "loss": 0.2729, "step": 10300 }, { "epoch": 1.2215107316494722, "grad_norm": 1.2032268934610537, "learning_rate": 4.069878585165464e-05, "loss": 0.2847, "step": 10301 }, { "epoch": 1.2216293134115972, "grad_norm": 1.1666917251255546, "learning_rate": 4.0696917692175605e-05, "loss": 0.2604, "step": 10302 }, { "epoch": 1.2217478951737224, "grad_norm": 1.0444779220342593, "learning_rate": 4.069504938799025e-05, "loss": 0.2461, "step": 10303 }, { "epoch": 1.2218664769358472, "grad_norm": 1.2182181974880055, "learning_rate": 4.069318093911577e-05, "loss": 0.1934, "step": 10304 }, { "epoch": 1.2219850586979724, "grad_norm": 1.0179658366359854, "learning_rate": 4.069131234556942e-05, "loss": 0.2186, "step": 10305 }, { "epoch": 1.2221036404600971, "grad_norm": 1.2856905857748602, "learning_rate": 4.0689443607368405e-05, "loss": 0.2333, "step": 10306 }, { "epoch": 1.2222222222222223, "grad_norm": 1.17300439391137, "learning_rate": 4.068757472452996e-05, "loss": 0.2742, "step": 10307 }, { "epoch": 1.2223408039843473, "grad_norm": 1.338646078889366, "learning_rate": 4.0685705697071317e-05, "loss": 0.2201, "step": 10308 }, { "epoch": 1.2224593857464723, "grad_norm": 0.7568836205283361, "learning_rate": 4.06838365250097e-05, "loss": 0.1354, "step": 10309 }, { "epoch": 1.2225779675085973, "grad_norm": 0.9603537433216494, "learning_rate": 4.0681967208362344e-05, "loss": 0.1734, "step": 10310 }, { "epoch": 1.2226965492707222, "grad_norm": 1.0761707993782752, "learning_rate": 4.0680097747146474e-05, "loss": 0.2543, "step": 10311 }, { "epoch": 1.2228151310328472, "grad_norm": 1.184485265370033, "learning_rate": 4.0678228141379336e-05, "loss": 0.2354, "step": 10312 }, { "epoch": 1.2229337127949722, "grad_norm": 1.1399778149041233, "learning_rate": 4.067635839107816e-05, "loss": 0.253, "step": 10313 }, { "epoch": 1.2230522945570972, "grad_norm": 1.3069735074016442, "learning_rate": 4.067448849626019e-05, "loss": 0.2566, "step": 10314 }, { "epoch": 1.2231708763192222, "grad_norm": 1.2106966747555683, "learning_rate": 4.0672618456942654e-05, "loss": 0.1975, "step": 10315 }, { "epoch": 1.2232894580813471, "grad_norm": 1.0399887941766268, "learning_rate": 4.067074827314279e-05, "loss": 0.2532, "step": 10316 }, { "epoch": 1.2234080398434721, "grad_norm": 0.8376393667687961, "learning_rate": 4.0668877944877846e-05, "loss": 0.1509, "step": 10317 }, { "epoch": 1.223526621605597, "grad_norm": 1.281604557577526, "learning_rate": 4.066700747216507e-05, "loss": 0.335, "step": 10318 }, { "epoch": 1.223645203367722, "grad_norm": 0.9539099633635346, "learning_rate": 4.066513685502168e-05, "loss": 0.2265, "step": 10319 }, { "epoch": 1.223763785129847, "grad_norm": 1.0609306719199592, "learning_rate": 4.066326609346495e-05, "loss": 0.2395, "step": 10320 }, { "epoch": 1.223882366891972, "grad_norm": 1.1311756235082806, "learning_rate": 4.066139518751211e-05, "loss": 0.1945, "step": 10321 }, { "epoch": 1.224000948654097, "grad_norm": 1.1209256170493511, "learning_rate": 4.065952413718041e-05, "loss": 0.2149, "step": 10322 }, { "epoch": 1.224119530416222, "grad_norm": 1.1253779253064846, "learning_rate": 4.0657652942487114e-05, "loss": 0.233, "step": 10323 }, { "epoch": 1.224238112178347, "grad_norm": 1.4419135324799837, "learning_rate": 4.065578160344944e-05, "loss": 0.387, "step": 10324 }, { "epoch": 1.224356693940472, "grad_norm": 1.042097524145393, "learning_rate": 4.065391012008467e-05, "loss": 0.179, "step": 10325 }, { "epoch": 1.224475275702597, "grad_norm": 1.1054802779202186, "learning_rate": 4.065203849241004e-05, "loss": 0.2723, "step": 10326 }, { "epoch": 1.224593857464722, "grad_norm": 0.9513154067175317, "learning_rate": 4.065016672044281e-05, "loss": 0.2806, "step": 10327 }, { "epoch": 1.224712439226847, "grad_norm": 1.005001205353516, "learning_rate": 4.064829480420023e-05, "loss": 0.2596, "step": 10328 }, { "epoch": 1.224831020988972, "grad_norm": 1.1072610700673473, "learning_rate": 4.0646422743699574e-05, "loss": 0.2189, "step": 10329 }, { "epoch": 1.2249496027510969, "grad_norm": 1.0456194052300545, "learning_rate": 4.064455053895807e-05, "loss": 0.241, "step": 10330 }, { "epoch": 1.2250681845132219, "grad_norm": 1.375734181396911, "learning_rate": 4.064267818999301e-05, "loss": 0.2874, "step": 10331 }, { "epoch": 1.2251867662753468, "grad_norm": 0.954844341113241, "learning_rate": 4.064080569682163e-05, "loss": 0.222, "step": 10332 }, { "epoch": 1.2253053480374718, "grad_norm": 1.3563880438139067, "learning_rate": 4.06389330594612e-05, "loss": 0.2648, "step": 10333 }, { "epoch": 1.2254239297995968, "grad_norm": 1.2044479236654095, "learning_rate": 4.0637060277928994e-05, "loss": 0.212, "step": 10334 }, { "epoch": 1.2255425115617218, "grad_norm": 1.0231161242173488, "learning_rate": 4.063518735224227e-05, "loss": 0.2666, "step": 10335 }, { "epoch": 1.2256610933238468, "grad_norm": 1.0431945913763834, "learning_rate": 4.0633314282418275e-05, "loss": 0.2232, "step": 10336 }, { "epoch": 1.2257796750859717, "grad_norm": 0.8807268737440037, "learning_rate": 4.063144106847431e-05, "loss": 0.2353, "step": 10337 }, { "epoch": 1.2258982568480967, "grad_norm": 1.051394813427057, "learning_rate": 4.062956771042762e-05, "loss": 0.2021, "step": 10338 }, { "epoch": 1.2260168386102217, "grad_norm": 1.2109507975941567, "learning_rate": 4.062769420829547e-05, "loss": 0.2134, "step": 10339 }, { "epoch": 1.2261354203723467, "grad_norm": 1.6161296959054892, "learning_rate": 4.0625820562095165e-05, "loss": 0.4353, "step": 10340 }, { "epoch": 1.2262540021344717, "grad_norm": 1.022331559835647, "learning_rate": 4.062394677184395e-05, "loss": 0.1981, "step": 10341 }, { "epoch": 1.2263725838965966, "grad_norm": 1.1901834835802496, "learning_rate": 4.0622072837559096e-05, "loss": 0.2936, "step": 10342 }, { "epoch": 1.2264911656587216, "grad_norm": 1.157240761062325, "learning_rate": 4.0620198759257887e-05, "loss": 0.2376, "step": 10343 }, { "epoch": 1.2266097474208466, "grad_norm": 1.4580050676316623, "learning_rate": 4.061832453695761e-05, "loss": 0.2759, "step": 10344 }, { "epoch": 1.2267283291829716, "grad_norm": 1.4070927018441504, "learning_rate": 4.061645017067552e-05, "loss": 0.3392, "step": 10345 }, { "epoch": 1.2268469109450966, "grad_norm": 0.9715904545330502, "learning_rate": 4.0614575660428927e-05, "loss": 0.2124, "step": 10346 }, { "epoch": 1.2269654927072216, "grad_norm": 1.3475876987384037, "learning_rate": 4.0612701006235085e-05, "loss": 0.3005, "step": 10347 }, { "epoch": 1.2270840744693465, "grad_norm": 1.2259016992322325, "learning_rate": 4.0610826208111284e-05, "loss": 0.2501, "step": 10348 }, { "epoch": 1.2272026562314715, "grad_norm": 1.0860151188507194, "learning_rate": 4.060895126607481e-05, "loss": 0.2425, "step": 10349 }, { "epoch": 1.2273212379935965, "grad_norm": 1.013581272847547, "learning_rate": 4.060707618014295e-05, "loss": 0.177, "step": 10350 }, { "epoch": 1.2274398197557215, "grad_norm": 1.484871612701876, "learning_rate": 4.060520095033298e-05, "loss": 0.27, "step": 10351 }, { "epoch": 1.2275584015178467, "grad_norm": 1.3484467951333505, "learning_rate": 4.06033255766622e-05, "loss": 0.3577, "step": 10352 }, { "epoch": 1.2276769832799714, "grad_norm": 1.35252320910601, "learning_rate": 4.060145005914789e-05, "loss": 0.3128, "step": 10353 }, { "epoch": 1.2277955650420966, "grad_norm": 1.3558320560697066, "learning_rate": 4.0599574397807335e-05, "loss": 0.2908, "step": 10354 }, { "epoch": 1.2279141468042214, "grad_norm": 1.2253928242668557, "learning_rate": 4.059769859265785e-05, "loss": 0.207, "step": 10355 }, { "epoch": 1.2280327285663466, "grad_norm": 0.9305918148828173, "learning_rate": 4.05958226437167e-05, "loss": 0.176, "step": 10356 }, { "epoch": 1.2281513103284714, "grad_norm": 1.0311692335832507, "learning_rate": 4.059394655100119e-05, "loss": 0.2427, "step": 10357 }, { "epoch": 1.2282698920905966, "grad_norm": 0.9009773624812605, "learning_rate": 4.0592070314528614e-05, "loss": 0.2192, "step": 10358 }, { "epoch": 1.2283884738527215, "grad_norm": 1.2035279914437245, "learning_rate": 4.0590193934316274e-05, "loss": 0.2474, "step": 10359 }, { "epoch": 1.2285070556148465, "grad_norm": 1.1113872087730987, "learning_rate": 4.058831741038146e-05, "loss": 0.2708, "step": 10360 }, { "epoch": 1.2286256373769715, "grad_norm": 1.0945385584320637, "learning_rate": 4.058644074274148e-05, "loss": 0.2138, "step": 10361 }, { "epoch": 1.2287442191390965, "grad_norm": 1.5598622141249898, "learning_rate": 4.058456393141362e-05, "loss": 0.3975, "step": 10362 }, { "epoch": 1.2288628009012215, "grad_norm": 0.8715948448383642, "learning_rate": 4.0582686976415206e-05, "loss": 0.1905, "step": 10363 }, { "epoch": 1.2289813826633464, "grad_norm": 1.2610287703431173, "learning_rate": 4.0580809877763513e-05, "loss": 0.2849, "step": 10364 }, { "epoch": 1.2290999644254714, "grad_norm": 1.1218073106246012, "learning_rate": 4.057893263547587e-05, "loss": 0.1861, "step": 10365 }, { "epoch": 1.2292185461875964, "grad_norm": 1.0025476230465227, "learning_rate": 4.057705524956957e-05, "loss": 0.2125, "step": 10366 }, { "epoch": 1.2293371279497214, "grad_norm": 0.9176731337235872, "learning_rate": 4.057517772006192e-05, "loss": 0.217, "step": 10367 }, { "epoch": 1.2294557097118464, "grad_norm": 0.9488008286584205, "learning_rate": 4.0573300046970233e-05, "loss": 0.2148, "step": 10368 }, { "epoch": 1.2295742914739713, "grad_norm": 1.0656062754853088, "learning_rate": 4.057142223031181e-05, "loss": 0.2003, "step": 10369 }, { "epoch": 1.2296928732360963, "grad_norm": 1.1532068067971386, "learning_rate": 4.056954427010398e-05, "loss": 0.2795, "step": 10370 }, { "epoch": 1.2298114549982213, "grad_norm": 1.1386834669612202, "learning_rate": 4.0567666166364035e-05, "loss": 0.2567, "step": 10371 }, { "epoch": 1.2299300367603463, "grad_norm": 1.5347243857253139, "learning_rate": 4.0565787919109307e-05, "loss": 0.3564, "step": 10372 }, { "epoch": 1.2300486185224713, "grad_norm": 1.4663995886488497, "learning_rate": 4.056390952835709e-05, "loss": 0.338, "step": 10373 }, { "epoch": 1.2301672002845963, "grad_norm": 1.1885281558809169, "learning_rate": 4.056203099412472e-05, "loss": 0.322, "step": 10374 }, { "epoch": 1.2302857820467212, "grad_norm": 1.2903020845770692, "learning_rate": 4.0560152316429514e-05, "loss": 0.3541, "step": 10375 }, { "epoch": 1.2304043638088462, "grad_norm": 1.0443563001458418, "learning_rate": 4.055827349528878e-05, "loss": 0.2192, "step": 10376 }, { "epoch": 1.2305229455709712, "grad_norm": 1.2919293087861499, "learning_rate": 4.055639453071984e-05, "loss": 0.317, "step": 10377 }, { "epoch": 1.2306415273330962, "grad_norm": 1.2225131419413489, "learning_rate": 4.055451542274002e-05, "loss": 0.2503, "step": 10378 }, { "epoch": 1.2307601090952212, "grad_norm": 1.1360546293642564, "learning_rate": 4.0552636171366644e-05, "loss": 0.2569, "step": 10379 }, { "epoch": 1.2308786908573461, "grad_norm": 1.469185870123054, "learning_rate": 4.055075677661703e-05, "loss": 0.3153, "step": 10380 }, { "epoch": 1.2309972726194711, "grad_norm": 1.316407755210929, "learning_rate": 4.054887723850852e-05, "loss": 0.266, "step": 10381 }, { "epoch": 1.231115854381596, "grad_norm": 1.0166231201054174, "learning_rate": 4.0546997557058416e-05, "loss": 0.1845, "step": 10382 }, { "epoch": 1.231234436143721, "grad_norm": 1.0772726607225094, "learning_rate": 4.0545117732284065e-05, "loss": 0.195, "step": 10383 }, { "epoch": 1.231353017905846, "grad_norm": 1.436140172439385, "learning_rate": 4.054323776420279e-05, "loss": 0.2549, "step": 10384 }, { "epoch": 1.231471599667971, "grad_norm": 0.8609131422718318, "learning_rate": 4.054135765283192e-05, "loss": 0.1775, "step": 10385 }, { "epoch": 1.231590181430096, "grad_norm": 1.062905195915382, "learning_rate": 4.05394773981888e-05, "loss": 0.206, "step": 10386 }, { "epoch": 1.231708763192221, "grad_norm": 1.134812544122325, "learning_rate": 4.0537597000290744e-05, "loss": 0.2421, "step": 10387 }, { "epoch": 1.231827344954346, "grad_norm": 1.2308492256809125, "learning_rate": 4.05357164591551e-05, "loss": 0.2588, "step": 10388 }, { "epoch": 1.231945926716471, "grad_norm": 0.9739838362179883, "learning_rate": 4.053383577479921e-05, "loss": 0.1823, "step": 10389 }, { "epoch": 1.232064508478596, "grad_norm": 1.0102670363665212, "learning_rate": 4.053195494724039e-05, "loss": 0.2026, "step": 10390 }, { "epoch": 1.232183090240721, "grad_norm": 1.202890138384261, "learning_rate": 4.0530073976496e-05, "loss": 0.3331, "step": 10391 }, { "epoch": 1.232301672002846, "grad_norm": 1.4521372366814496, "learning_rate": 4.052819286258337e-05, "loss": 0.308, "step": 10392 }, { "epoch": 1.2324202537649709, "grad_norm": 1.2687170600942417, "learning_rate": 4.052631160551984e-05, "loss": 0.2613, "step": 10393 }, { "epoch": 1.2325388355270959, "grad_norm": 1.0096210892206983, "learning_rate": 4.052443020532276e-05, "loss": 0.2179, "step": 10394 }, { "epoch": 1.2326574172892208, "grad_norm": 1.162285964577226, "learning_rate": 4.052254866200947e-05, "loss": 0.2242, "step": 10395 }, { "epoch": 1.2327759990513458, "grad_norm": 1.2616258094738422, "learning_rate": 4.052066697559732e-05, "loss": 0.2734, "step": 10396 }, { "epoch": 1.2328945808134708, "grad_norm": 1.3323279307450362, "learning_rate": 4.051878514610365e-05, "loss": 0.2801, "step": 10397 }, { "epoch": 1.2330131625755958, "grad_norm": 1.5041861726248658, "learning_rate": 4.0516903173545816e-05, "loss": 0.3196, "step": 10398 }, { "epoch": 1.2331317443377208, "grad_norm": 0.9259855329577563, "learning_rate": 4.051502105794115e-05, "loss": 0.185, "step": 10399 }, { "epoch": 1.2332503260998458, "grad_norm": 1.253751688405963, "learning_rate": 4.051313879930703e-05, "loss": 0.2858, "step": 10400 }, { "epoch": 1.233368907861971, "grad_norm": 1.5697294893196387, "learning_rate": 4.051125639766079e-05, "loss": 0.4019, "step": 10401 }, { "epoch": 1.2334874896240957, "grad_norm": 1.1682495191139026, "learning_rate": 4.0509373853019785e-05, "loss": 0.2028, "step": 10402 }, { "epoch": 1.233606071386221, "grad_norm": 1.1083219549893202, "learning_rate": 4.050749116540137e-05, "loss": 0.2328, "step": 10403 }, { "epoch": 1.2337246531483457, "grad_norm": 0.9695602431625172, "learning_rate": 4.050560833482291e-05, "loss": 0.2011, "step": 10404 }, { "epoch": 1.2338432349104709, "grad_norm": 1.4674056766231716, "learning_rate": 4.0503725361301746e-05, "loss": 0.3292, "step": 10405 }, { "epoch": 1.2339618166725956, "grad_norm": 1.243027512160164, "learning_rate": 4.050184224485525e-05, "loss": 0.2848, "step": 10406 }, { "epoch": 1.2340803984347208, "grad_norm": 1.3414468410444953, "learning_rate": 4.049995898550078e-05, "loss": 0.2861, "step": 10407 }, { "epoch": 1.2341989801968458, "grad_norm": 1.0913395307614915, "learning_rate": 4.049807558325569e-05, "loss": 0.2028, "step": 10408 }, { "epoch": 1.2343175619589708, "grad_norm": 1.017994955442891, "learning_rate": 4.049619203813736e-05, "loss": 0.2191, "step": 10409 }, { "epoch": 1.2344361437210958, "grad_norm": 1.0807346083553915, "learning_rate": 4.049430835016313e-05, "loss": 0.2222, "step": 10410 }, { "epoch": 1.2345547254832208, "grad_norm": 1.4643312074749304, "learning_rate": 4.049242451935038e-05, "loss": 0.289, "step": 10411 }, { "epoch": 1.2346733072453457, "grad_norm": 1.337008294321385, "learning_rate": 4.049054054571648e-05, "loss": 0.3339, "step": 10412 }, { "epoch": 1.2347918890074707, "grad_norm": 1.2171753287405522, "learning_rate": 4.048865642927879e-05, "loss": 0.2292, "step": 10413 }, { "epoch": 1.2349104707695957, "grad_norm": 1.2654810421773326, "learning_rate": 4.0486772170054675e-05, "loss": 0.2801, "step": 10414 }, { "epoch": 1.2350290525317207, "grad_norm": 1.1019240865802675, "learning_rate": 4.048488776806152e-05, "loss": 0.221, "step": 10415 }, { "epoch": 1.2351476342938457, "grad_norm": 1.1903615042512776, "learning_rate": 4.048300322331668e-05, "loss": 0.2567, "step": 10416 }, { "epoch": 1.2352662160559706, "grad_norm": 1.0366270642293351, "learning_rate": 4.0481118535837546e-05, "loss": 0.2571, "step": 10417 }, { "epoch": 1.2353847978180956, "grad_norm": 1.323492449155374, "learning_rate": 4.047923370564147e-05, "loss": 0.2305, "step": 10418 }, { "epoch": 1.2355033795802206, "grad_norm": 1.4476333954939236, "learning_rate": 4.047734873274586e-05, "loss": 0.246, "step": 10419 }, { "epoch": 1.2356219613423456, "grad_norm": 1.1994137485677294, "learning_rate": 4.047546361716806e-05, "loss": 0.2672, "step": 10420 }, { "epoch": 1.2357405431044706, "grad_norm": 1.0268641673309007, "learning_rate": 4.047357835892546e-05, "loss": 0.2415, "step": 10421 }, { "epoch": 1.2358591248665955, "grad_norm": 1.6020146113053215, "learning_rate": 4.047169295803545e-05, "loss": 0.3262, "step": 10422 }, { "epoch": 1.2359777066287205, "grad_norm": 1.1474300405533635, "learning_rate": 4.04698074145154e-05, "loss": 0.191, "step": 10423 }, { "epoch": 1.2360962883908455, "grad_norm": 1.21633269032001, "learning_rate": 4.046792172838271e-05, "loss": 0.2752, "step": 10424 }, { "epoch": 1.2362148701529705, "grad_norm": 1.1147321407881818, "learning_rate": 4.046603589965473e-05, "loss": 0.2546, "step": 10425 }, { "epoch": 1.2363334519150955, "grad_norm": 1.0387741437188411, "learning_rate": 4.0464149928348874e-05, "loss": 0.2171, "step": 10426 }, { "epoch": 1.2364520336772205, "grad_norm": 1.689542317782831, "learning_rate": 4.0462263814482514e-05, "loss": 0.4004, "step": 10427 }, { "epoch": 1.2365706154393454, "grad_norm": 1.2335922217392163, "learning_rate": 4.046037755807305e-05, "loss": 0.2815, "step": 10428 }, { "epoch": 1.2366891972014704, "grad_norm": 1.4114706557345076, "learning_rate": 4.0458491159137854e-05, "loss": 0.3576, "step": 10429 }, { "epoch": 1.2368077789635954, "grad_norm": 1.2118961293792692, "learning_rate": 4.045660461769434e-05, "loss": 0.225, "step": 10430 }, { "epoch": 1.2369263607257204, "grad_norm": 1.0786544296009202, "learning_rate": 4.0454717933759875e-05, "loss": 0.2503, "step": 10431 }, { "epoch": 1.2370449424878454, "grad_norm": 1.4845167559253982, "learning_rate": 4.045283110735187e-05, "loss": 0.2648, "step": 10432 }, { "epoch": 1.2371635242499703, "grad_norm": 0.9447311265303799, "learning_rate": 4.04509441384877e-05, "loss": 0.1867, "step": 10433 }, { "epoch": 1.2372821060120953, "grad_norm": 1.023828830443806, "learning_rate": 4.044905702718479e-05, "loss": 0.2398, "step": 10434 }, { "epoch": 1.2374006877742203, "grad_norm": 0.8192392423249338, "learning_rate": 4.04471697734605e-05, "loss": 0.1692, "step": 10435 }, { "epoch": 1.2375192695363453, "grad_norm": 1.4831804850193884, "learning_rate": 4.044528237733226e-05, "loss": 0.2746, "step": 10436 }, { "epoch": 1.2376378512984703, "grad_norm": 1.1623504757281935, "learning_rate": 4.044339483881745e-05, "loss": 0.2242, "step": 10437 }, { "epoch": 1.2377564330605952, "grad_norm": 1.2146398150404634, "learning_rate": 4.0441507157933475e-05, "loss": 0.2517, "step": 10438 }, { "epoch": 1.2378750148227202, "grad_norm": 1.141496008480598, "learning_rate": 4.0439619334697754e-05, "loss": 0.2611, "step": 10439 }, { "epoch": 1.2379935965848452, "grad_norm": 0.9239401835496538, "learning_rate": 4.043773136912766e-05, "loss": 0.2127, "step": 10440 }, { "epoch": 1.2381121783469702, "grad_norm": 1.1043275231075644, "learning_rate": 4.043584326124062e-05, "loss": 0.2257, "step": 10441 }, { "epoch": 1.2382307601090952, "grad_norm": 1.1207237987783494, "learning_rate": 4.0433955011054034e-05, "loss": 0.2553, "step": 10442 }, { "epoch": 1.2383493418712201, "grad_norm": 0.9587476298544969, "learning_rate": 4.043206661858531e-05, "loss": 0.1517, "step": 10443 }, { "epoch": 1.2384679236333451, "grad_norm": 1.3635760389166562, "learning_rate": 4.0430178083851855e-05, "loss": 0.2611, "step": 10444 }, { "epoch": 1.23858650539547, "grad_norm": 1.3829757900433353, "learning_rate": 4.042828940687108e-05, "loss": 0.2461, "step": 10445 }, { "epoch": 1.238705087157595, "grad_norm": 1.1239778764736725, "learning_rate": 4.0426400587660396e-05, "loss": 0.2488, "step": 10446 }, { "epoch": 1.23882366891972, "grad_norm": 1.2029635580188662, "learning_rate": 4.042451162623722e-05, "loss": 0.2676, "step": 10447 }, { "epoch": 1.238942250681845, "grad_norm": 1.1371281154508992, "learning_rate": 4.042262252261896e-05, "loss": 0.3013, "step": 10448 }, { "epoch": 1.23906083244397, "grad_norm": 1.138781374418562, "learning_rate": 4.042073327682303e-05, "loss": 0.2858, "step": 10449 }, { "epoch": 1.239179414206095, "grad_norm": 1.1682930838709649, "learning_rate": 4.0418843888866845e-05, "loss": 0.2044, "step": 10450 }, { "epoch": 1.23929799596822, "grad_norm": 1.228310868759271, "learning_rate": 4.0416954358767835e-05, "loss": 0.2554, "step": 10451 }, { "epoch": 1.2394165777303452, "grad_norm": 1.0876521876803484, "learning_rate": 4.04150646865434e-05, "loss": 0.2446, "step": 10452 }, { "epoch": 1.23953515949247, "grad_norm": 0.9352704547807149, "learning_rate": 4.041317487221098e-05, "loss": 0.1803, "step": 10453 }, { "epoch": 1.2396537412545952, "grad_norm": 1.1896564090733655, "learning_rate": 4.0411284915787994e-05, "loss": 0.2628, "step": 10454 }, { "epoch": 1.23977232301672, "grad_norm": 1.0410338235440035, "learning_rate": 4.040939481729185e-05, "loss": 0.1841, "step": 10455 }, { "epoch": 1.2398909047788451, "grad_norm": 1.487967823511107, "learning_rate": 4.0407504576739984e-05, "loss": 0.3427, "step": 10456 }, { "epoch": 1.2400094865409699, "grad_norm": 0.7966016917705205, "learning_rate": 4.040561419414982e-05, "loss": 0.1743, "step": 10457 }, { "epoch": 1.240128068303095, "grad_norm": 1.4797816232336247, "learning_rate": 4.0403723669538796e-05, "loss": 0.2855, "step": 10458 }, { "epoch": 1.24024665006522, "grad_norm": 1.1285288014030752, "learning_rate": 4.040183300292432e-05, "loss": 0.2423, "step": 10459 }, { "epoch": 1.240365231827345, "grad_norm": 1.320081852621455, "learning_rate": 4.0399942194323826e-05, "loss": 0.2785, "step": 10460 }, { "epoch": 1.24048381358947, "grad_norm": 1.201354055542455, "learning_rate": 4.039805124375475e-05, "loss": 0.2592, "step": 10461 }, { "epoch": 1.240602395351595, "grad_norm": 2.105710918664603, "learning_rate": 4.039616015123453e-05, "loss": 0.195, "step": 10462 }, { "epoch": 1.24072097711372, "grad_norm": 1.0975713323866878, "learning_rate": 4.0394268916780594e-05, "loss": 0.2166, "step": 10463 }, { "epoch": 1.240839558875845, "grad_norm": 1.0910338190612414, "learning_rate": 4.0392377540410375e-05, "loss": 0.2122, "step": 10464 }, { "epoch": 1.24095814063797, "grad_norm": 1.0113785130900554, "learning_rate": 4.0390486022141306e-05, "loss": 0.1932, "step": 10465 }, { "epoch": 1.241076722400095, "grad_norm": 1.294315778340242, "learning_rate": 4.038859436199083e-05, "loss": 0.2959, "step": 10466 }, { "epoch": 1.24119530416222, "grad_norm": 1.1686581154915705, "learning_rate": 4.0386702559976385e-05, "loss": 0.2836, "step": 10467 }, { "epoch": 1.2413138859243449, "grad_norm": 1.2776349342091515, "learning_rate": 4.0384810616115415e-05, "loss": 0.2468, "step": 10468 }, { "epoch": 1.2414324676864699, "grad_norm": 1.2149778562527616, "learning_rate": 4.038291853042536e-05, "loss": 0.2385, "step": 10469 }, { "epoch": 1.2415510494485948, "grad_norm": 1.0338435217152584, "learning_rate": 4.038102630292365e-05, "loss": 0.2494, "step": 10470 }, { "epoch": 1.2416696312107198, "grad_norm": 1.097141839281573, "learning_rate": 4.0379133933627746e-05, "loss": 0.2385, "step": 10471 }, { "epoch": 1.2417882129728448, "grad_norm": 1.2308875405541801, "learning_rate": 4.037724142255508e-05, "loss": 0.2567, "step": 10472 }, { "epoch": 1.2419067947349698, "grad_norm": 1.126828822169031, "learning_rate": 4.037534876972311e-05, "loss": 0.1779, "step": 10473 }, { "epoch": 1.2420253764970948, "grad_norm": 0.9739911621170491, "learning_rate": 4.0373455975149275e-05, "loss": 0.2246, "step": 10474 }, { "epoch": 1.2421439582592197, "grad_norm": 2.040190204424857, "learning_rate": 4.037156303885103e-05, "loss": 0.4745, "step": 10475 }, { "epoch": 1.2422625400213447, "grad_norm": 1.165764032297576, "learning_rate": 4.0369669960845826e-05, "loss": 0.2342, "step": 10476 }, { "epoch": 1.2423811217834697, "grad_norm": 1.2054712782500459, "learning_rate": 4.036777674115111e-05, "loss": 0.2642, "step": 10477 }, { "epoch": 1.2424997035455947, "grad_norm": 0.9936247732604012, "learning_rate": 4.0365883379784345e-05, "loss": 0.2328, "step": 10478 }, { "epoch": 1.2426182853077197, "grad_norm": 0.9038161000004851, "learning_rate": 4.036398987676296e-05, "loss": 0.2207, "step": 10479 }, { "epoch": 1.2427368670698447, "grad_norm": 0.9434547096866889, "learning_rate": 4.036209623210444e-05, "loss": 0.2178, "step": 10480 }, { "epoch": 1.2428554488319696, "grad_norm": 1.0554341977106048, "learning_rate": 4.0360202445826236e-05, "loss": 0.2744, "step": 10481 }, { "epoch": 1.2429740305940946, "grad_norm": 0.9447917242351296, "learning_rate": 4.03583085179458e-05, "loss": 0.195, "step": 10482 }, { "epoch": 1.2430926123562196, "grad_norm": 1.0645737766898753, "learning_rate": 4.0356414448480585e-05, "loss": 0.2231, "step": 10483 }, { "epoch": 1.2432111941183446, "grad_norm": 2.1094616417604195, "learning_rate": 4.035452023744807e-05, "loss": 0.5596, "step": 10484 }, { "epoch": 1.2433297758804696, "grad_norm": 0.7706261524419524, "learning_rate": 4.03526258848657e-05, "loss": 0.1499, "step": 10485 }, { "epoch": 1.2434483576425945, "grad_norm": 0.9375155371413499, "learning_rate": 4.0350731390750953e-05, "loss": 0.2152, "step": 10486 }, { "epoch": 1.2435669394047195, "grad_norm": 0.9043959443944448, "learning_rate": 4.0348836755121274e-05, "loss": 0.2472, "step": 10487 }, { "epoch": 1.2436855211668445, "grad_norm": 1.217952308141904, "learning_rate": 4.034694197799416e-05, "loss": 0.2351, "step": 10488 }, { "epoch": 1.2438041029289695, "grad_norm": 1.2363116913687606, "learning_rate": 4.034504705938704e-05, "loss": 0.2598, "step": 10489 }, { "epoch": 1.2439226846910945, "grad_norm": 1.2799765754349888, "learning_rate": 4.034315199931742e-05, "loss": 0.2584, "step": 10490 }, { "epoch": 1.2440412664532194, "grad_norm": 1.0190593038893978, "learning_rate": 4.034125679780275e-05, "loss": 0.2682, "step": 10491 }, { "epoch": 1.2441598482153444, "grad_norm": 0.9391231118246326, "learning_rate": 4.03393614548605e-05, "loss": 0.1844, "step": 10492 }, { "epoch": 1.2442784299774694, "grad_norm": 1.3192451946167671, "learning_rate": 4.033746597050815e-05, "loss": 0.2613, "step": 10493 }, { "epoch": 1.2443970117395944, "grad_norm": 1.3958438620056595, "learning_rate": 4.033557034476318e-05, "loss": 0.259, "step": 10494 }, { "epoch": 1.2445155935017194, "grad_norm": 1.0093463452116567, "learning_rate": 4.033367457764304e-05, "loss": 0.2251, "step": 10495 }, { "epoch": 1.2446341752638443, "grad_norm": 0.9499408745125743, "learning_rate": 4.033177866916523e-05, "loss": 0.1551, "step": 10496 }, { "epoch": 1.2447527570259693, "grad_norm": 1.0931319390045557, "learning_rate": 4.032988261934723e-05, "loss": 0.242, "step": 10497 }, { "epoch": 1.2448713387880943, "grad_norm": 1.094501074541544, "learning_rate": 4.032798642820651e-05, "loss": 0.2536, "step": 10498 }, { "epoch": 1.2449899205502193, "grad_norm": 1.2738152160055225, "learning_rate": 4.032609009576055e-05, "loss": 0.2876, "step": 10499 }, { "epoch": 1.2451085023123443, "grad_norm": 1.0256005041217717, "learning_rate": 4.032419362202683e-05, "loss": 0.2323, "step": 10500 }, { "epoch": 1.2452270840744695, "grad_norm": 0.9663864134321976, "learning_rate": 4.0322297007022835e-05, "loss": 0.2164, "step": 10501 }, { "epoch": 1.2453456658365942, "grad_norm": 1.2343799216721234, "learning_rate": 4.032040025076606e-05, "loss": 0.2781, "step": 10502 }, { "epoch": 1.2454642475987194, "grad_norm": 0.9479766832494176, "learning_rate": 4.0318503353273975e-05, "loss": 0.2279, "step": 10503 }, { "epoch": 1.2455828293608442, "grad_norm": 0.9689145773770386, "learning_rate": 4.0316606314564065e-05, "loss": 0.2116, "step": 10504 }, { "epoch": 1.2457014111229694, "grad_norm": 1.2615590637776244, "learning_rate": 4.031470913465384e-05, "loss": 0.2794, "step": 10505 }, { "epoch": 1.2458199928850942, "grad_norm": 1.210611703776856, "learning_rate": 4.031281181356077e-05, "loss": 0.2498, "step": 10506 }, { "epoch": 1.2459385746472194, "grad_norm": 1.0439387585696862, "learning_rate": 4.031091435130237e-05, "loss": 0.1851, "step": 10507 }, { "epoch": 1.2460571564093443, "grad_norm": 1.0718587396981918, "learning_rate": 4.030901674789609e-05, "loss": 0.2336, "step": 10508 }, { "epoch": 1.2461757381714693, "grad_norm": 1.2807458861142087, "learning_rate": 4.030711900335946e-05, "loss": 0.2318, "step": 10509 }, { "epoch": 1.2462943199335943, "grad_norm": 1.5963507083245414, "learning_rate": 4.0305221117709965e-05, "loss": 0.3859, "step": 10510 }, { "epoch": 1.2464129016957193, "grad_norm": 1.1154691834953003, "learning_rate": 4.03033230909651e-05, "loss": 0.2371, "step": 10511 }, { "epoch": 1.2465314834578443, "grad_norm": 1.4643078988972753, "learning_rate": 4.030142492314236e-05, "loss": 0.4185, "step": 10512 }, { "epoch": 1.2466500652199692, "grad_norm": 1.1373260571900035, "learning_rate": 4.029952661425924e-05, "loss": 0.228, "step": 10513 }, { "epoch": 1.2467686469820942, "grad_norm": 1.189883913259819, "learning_rate": 4.0297628164333264e-05, "loss": 0.3118, "step": 10514 }, { "epoch": 1.2468872287442192, "grad_norm": 1.4707296608570126, "learning_rate": 4.0295729573381905e-05, "loss": 0.2544, "step": 10515 }, { "epoch": 1.2470058105063442, "grad_norm": 1.694070977915552, "learning_rate": 4.0293830841422674e-05, "loss": 0.3553, "step": 10516 }, { "epoch": 1.2471243922684692, "grad_norm": 1.442309445209932, "learning_rate": 4.0291931968473075e-05, "loss": 0.255, "step": 10517 }, { "epoch": 1.2472429740305941, "grad_norm": 1.2375233932447167, "learning_rate": 4.0290032954550623e-05, "loss": 0.2511, "step": 10518 }, { "epoch": 1.2473615557927191, "grad_norm": 1.0892822408191938, "learning_rate": 4.028813379967281e-05, "loss": 0.218, "step": 10519 }, { "epoch": 1.247480137554844, "grad_norm": 1.1583590969337567, "learning_rate": 4.0286234503857156e-05, "loss": 0.2445, "step": 10520 }, { "epoch": 1.247598719316969, "grad_norm": 0.8883643924792928, "learning_rate": 4.028433506712116e-05, "loss": 0.2233, "step": 10521 }, { "epoch": 1.247717301079094, "grad_norm": 0.8079169845569238, "learning_rate": 4.028243548948234e-05, "loss": 0.1984, "step": 10522 }, { "epoch": 1.247835882841219, "grad_norm": 1.0283133469508123, "learning_rate": 4.028053577095821e-05, "loss": 0.2129, "step": 10523 }, { "epoch": 1.247954464603344, "grad_norm": 1.1660471976518556, "learning_rate": 4.027863591156627e-05, "loss": 0.2954, "step": 10524 }, { "epoch": 1.248073046365469, "grad_norm": 1.1271958540673999, "learning_rate": 4.027673591132405e-05, "loss": 0.2472, "step": 10525 }, { "epoch": 1.248191628127594, "grad_norm": 1.1330934887037956, "learning_rate": 4.027483577024906e-05, "loss": 0.2825, "step": 10526 }, { "epoch": 1.248310209889719, "grad_norm": 1.408379322310984, "learning_rate": 4.027293548835881e-05, "loss": 0.2967, "step": 10527 }, { "epoch": 1.248428791651844, "grad_norm": 0.8739466059611773, "learning_rate": 4.027103506567083e-05, "loss": 0.186, "step": 10528 }, { "epoch": 1.248547373413969, "grad_norm": 1.0366278478477022, "learning_rate": 4.0269134502202626e-05, "loss": 0.2945, "step": 10529 }, { "epoch": 1.248665955176094, "grad_norm": 0.7808945938209709, "learning_rate": 4.0267233797971725e-05, "loss": 0.174, "step": 10530 }, { "epoch": 1.248784536938219, "grad_norm": 1.3738126035192106, "learning_rate": 4.026533295299566e-05, "loss": 0.3106, "step": 10531 }, { "epoch": 1.2489031187003439, "grad_norm": 1.0693772289922485, "learning_rate": 4.026343196729194e-05, "loss": 0.2113, "step": 10532 }, { "epoch": 1.2490217004624689, "grad_norm": 0.7010631579168635, "learning_rate": 4.026153084087809e-05, "loss": 0.1841, "step": 10533 }, { "epoch": 1.2491402822245938, "grad_norm": 1.1328815676799033, "learning_rate": 4.025962957377164e-05, "loss": 0.2591, "step": 10534 }, { "epoch": 1.2492588639867188, "grad_norm": 1.4828476463484515, "learning_rate": 4.025772816599013e-05, "loss": 0.2572, "step": 10535 }, { "epoch": 1.2493774457488438, "grad_norm": 0.9294358605405638, "learning_rate": 4.025582661755106e-05, "loss": 0.206, "step": 10536 }, { "epoch": 1.2494960275109688, "grad_norm": 1.7796900458900629, "learning_rate": 4.0253924928471984e-05, "loss": 0.4766, "step": 10537 }, { "epoch": 1.2496146092730938, "grad_norm": 1.054048524475053, "learning_rate": 4.025202309877043e-05, "loss": 0.2147, "step": 10538 }, { "epoch": 1.2497331910352187, "grad_norm": 1.4888406814829203, "learning_rate": 4.0250121128463924e-05, "loss": 0.3532, "step": 10539 }, { "epoch": 1.2498517727973437, "grad_norm": 1.2863881454807318, "learning_rate": 4.024821901757e-05, "loss": 0.2555, "step": 10540 }, { "epoch": 1.2499703545594687, "grad_norm": 1.608068289493836, "learning_rate": 4.02463167661062e-05, "loss": 0.409, "step": 10541 }, { "epoch": 1.2500889363215937, "grad_norm": 1.098914217182304, "learning_rate": 4.024441437409005e-05, "loss": 0.2133, "step": 10542 }, { "epoch": 1.2502075180837187, "grad_norm": 0.904090266091376, "learning_rate": 4.0242511841539095e-05, "loss": 0.197, "step": 10543 }, { "epoch": 1.2503260998458436, "grad_norm": 0.8821204150052284, "learning_rate": 4.0240609168470876e-05, "loss": 0.1482, "step": 10544 }, { "epoch": 1.2504446816079686, "grad_norm": 1.421411973876386, "learning_rate": 4.023870635490292e-05, "loss": 0.3042, "step": 10545 }, { "epoch": 1.2505632633700936, "grad_norm": 0.8906788752085187, "learning_rate": 4.023680340085279e-05, "loss": 0.2034, "step": 10546 }, { "epoch": 1.2506818451322186, "grad_norm": 1.1714934346290409, "learning_rate": 4.023490030633801e-05, "loss": 0.2372, "step": 10547 }, { "epoch": 1.2508004268943438, "grad_norm": 1.2621119886885481, "learning_rate": 4.023299707137613e-05, "loss": 0.2667, "step": 10548 }, { "epoch": 1.2509190086564685, "grad_norm": 1.026897548318463, "learning_rate": 4.023109369598471e-05, "loss": 0.2092, "step": 10549 }, { "epoch": 1.2510375904185937, "grad_norm": 1.1165845469337354, "learning_rate": 4.022919018018127e-05, "loss": 0.2402, "step": 10550 }, { "epoch": 1.2511561721807185, "grad_norm": 0.9019856573736542, "learning_rate": 4.022728652398338e-05, "loss": 0.1358, "step": 10551 }, { "epoch": 1.2512747539428437, "grad_norm": 1.699215762337985, "learning_rate": 4.0225382727408565e-05, "loss": 0.3368, "step": 10552 }, { "epoch": 1.2513933357049685, "grad_norm": 1.400880118837076, "learning_rate": 4.0223478790474415e-05, "loss": 0.264, "step": 10553 }, { "epoch": 1.2515119174670937, "grad_norm": 1.1040214887182063, "learning_rate": 4.0221574713198445e-05, "loss": 0.2091, "step": 10554 }, { "epoch": 1.2516304992292184, "grad_norm": 1.0639382091714573, "learning_rate": 4.021967049559823e-05, "loss": 0.277, "step": 10555 }, { "epoch": 1.2517490809913436, "grad_norm": 1.2770344151229518, "learning_rate": 4.0217766137691304e-05, "loss": 0.2566, "step": 10556 }, { "epoch": 1.2518676627534684, "grad_norm": 1.269552253933047, "learning_rate": 4.021586163949525e-05, "loss": 0.2185, "step": 10557 }, { "epoch": 1.2519862445155936, "grad_norm": 1.1906228199535545, "learning_rate": 4.02139570010276e-05, "loss": 0.2745, "step": 10558 }, { "epoch": 1.2521048262777184, "grad_norm": 1.0876754735520582, "learning_rate": 4.021205222230593e-05, "loss": 0.2211, "step": 10559 }, { "epoch": 1.2522234080398436, "grad_norm": 0.9004020313894792, "learning_rate": 4.0210147303347786e-05, "loss": 0.1939, "step": 10560 }, { "epoch": 1.2523419898019685, "grad_norm": 1.3485927057076532, "learning_rate": 4.020824224417074e-05, "loss": 0.374, "step": 10561 }, { "epoch": 1.2524605715640935, "grad_norm": 1.2431035257963305, "learning_rate": 4.0206337044792345e-05, "loss": 0.2769, "step": 10562 }, { "epoch": 1.2525791533262185, "grad_norm": 1.0736428194254974, "learning_rate": 4.020443170523018e-05, "loss": 0.1874, "step": 10563 }, { "epoch": 1.2526977350883435, "grad_norm": 1.386910940161215, "learning_rate": 4.020252622550179e-05, "loss": 0.333, "step": 10564 }, { "epoch": 1.2528163168504685, "grad_norm": 1.2457872332610398, "learning_rate": 4.020062060562475e-05, "loss": 0.2795, "step": 10565 }, { "epoch": 1.2529348986125934, "grad_norm": 1.22455121212678, "learning_rate": 4.019871484561664e-05, "loss": 0.283, "step": 10566 }, { "epoch": 1.2530534803747184, "grad_norm": 0.6701795808994715, "learning_rate": 4.019680894549501e-05, "loss": 0.1446, "step": 10567 }, { "epoch": 1.2531720621368434, "grad_norm": 1.106329211406439, "learning_rate": 4.0194902905277436e-05, "loss": 0.214, "step": 10568 }, { "epoch": 1.2532906438989684, "grad_norm": 0.8740744638317279, "learning_rate": 4.019299672498149e-05, "loss": 0.1585, "step": 10569 }, { "epoch": 1.2534092256610934, "grad_norm": 0.8324948620813536, "learning_rate": 4.019109040462474e-05, "loss": 0.2267, "step": 10570 }, { "epoch": 1.2535278074232183, "grad_norm": 0.9907605752278357, "learning_rate": 4.018918394422477e-05, "loss": 0.2655, "step": 10571 }, { "epoch": 1.2536463891853433, "grad_norm": 1.4124107508401187, "learning_rate": 4.018727734379916e-05, "loss": 0.2846, "step": 10572 }, { "epoch": 1.2537649709474683, "grad_norm": 1.6581570178786065, "learning_rate": 4.018537060336547e-05, "loss": 0.2759, "step": 10573 }, { "epoch": 1.2538835527095933, "grad_norm": 1.0927094739030272, "learning_rate": 4.018346372294128e-05, "loss": 0.2855, "step": 10574 }, { "epoch": 1.2540021344717183, "grad_norm": 0.9463406600167925, "learning_rate": 4.018155670254418e-05, "loss": 0.215, "step": 10575 }, { "epoch": 1.2541207162338432, "grad_norm": 1.7170418486546057, "learning_rate": 4.017964954219174e-05, "loss": 0.3879, "step": 10576 }, { "epoch": 1.2542392979959682, "grad_norm": 1.1740391120139588, "learning_rate": 4.0177742241901546e-05, "loss": 0.2926, "step": 10577 }, { "epoch": 1.2543578797580932, "grad_norm": 0.9455168386012042, "learning_rate": 4.017583480169118e-05, "loss": 0.2242, "step": 10578 }, { "epoch": 1.2544764615202182, "grad_norm": 0.9289529212328501, "learning_rate": 4.017392722157823e-05, "loss": 0.2322, "step": 10579 }, { "epoch": 1.2545950432823432, "grad_norm": 0.9035402776839989, "learning_rate": 4.017201950158027e-05, "loss": 0.2264, "step": 10580 }, { "epoch": 1.2547136250444682, "grad_norm": 0.7481993170686446, "learning_rate": 4.0170111641714905e-05, "loss": 0.15, "step": 10581 }, { "epoch": 1.2548322068065931, "grad_norm": 1.380032675601541, "learning_rate": 4.016820364199971e-05, "loss": 0.2922, "step": 10582 }, { "epoch": 1.2549507885687181, "grad_norm": 1.0341225557928495, "learning_rate": 4.0166295502452275e-05, "loss": 0.2689, "step": 10583 }, { "epoch": 1.255069370330843, "grad_norm": 1.3940712219323408, "learning_rate": 4.01643872230902e-05, "loss": 0.3234, "step": 10584 }, { "epoch": 1.255187952092968, "grad_norm": 1.0438485305803942, "learning_rate": 4.016247880393107e-05, "loss": 0.1867, "step": 10585 }, { "epoch": 1.255306533855093, "grad_norm": 1.0408866102631629, "learning_rate": 4.016057024499248e-05, "loss": 0.2099, "step": 10586 }, { "epoch": 1.255425115617218, "grad_norm": 1.3765915397470567, "learning_rate": 4.015866154629202e-05, "loss": 0.2929, "step": 10587 }, { "epoch": 1.255543697379343, "grad_norm": 1.3526164220262473, "learning_rate": 4.0156752707847284e-05, "loss": 0.3264, "step": 10588 }, { "epoch": 1.255662279141468, "grad_norm": 1.4115313455036778, "learning_rate": 4.015484372967588e-05, "loss": 0.3204, "step": 10589 }, { "epoch": 1.255780860903593, "grad_norm": 1.2925640279323831, "learning_rate": 4.0152934611795404e-05, "loss": 0.2742, "step": 10590 }, { "epoch": 1.255899442665718, "grad_norm": 1.7454706134272517, "learning_rate": 4.015102535422345e-05, "loss": 0.3437, "step": 10591 }, { "epoch": 1.256018024427843, "grad_norm": 1.0844293065490622, "learning_rate": 4.014911595697762e-05, "loss": 0.2418, "step": 10592 }, { "epoch": 1.256136606189968, "grad_norm": 1.0145025765498206, "learning_rate": 4.014720642007552e-05, "loss": 0.2199, "step": 10593 }, { "epoch": 1.256255187952093, "grad_norm": 1.0365113537959116, "learning_rate": 4.014529674353474e-05, "loss": 0.1514, "step": 10594 }, { "epoch": 1.2563737697142179, "grad_norm": 0.7556012572626248, "learning_rate": 4.014338692737291e-05, "loss": 0.1682, "step": 10595 }, { "epoch": 1.2564923514763429, "grad_norm": 0.8389109172342724, "learning_rate": 4.0141476971607624e-05, "loss": 0.1901, "step": 10596 }, { "epoch": 1.2566109332384678, "grad_norm": 0.7607359171287091, "learning_rate": 4.0139566876256476e-05, "loss": 0.1607, "step": 10597 }, { "epoch": 1.2567295150005928, "grad_norm": 1.3394907749120355, "learning_rate": 4.01376566413371e-05, "loss": 0.3087, "step": 10598 }, { "epoch": 1.256848096762718, "grad_norm": 1.267931054374564, "learning_rate": 4.013574626686708e-05, "loss": 0.2562, "step": 10599 }, { "epoch": 1.2569666785248428, "grad_norm": 0.7678971535225139, "learning_rate": 4.0133835752864054e-05, "loss": 0.1463, "step": 10600 }, { "epoch": 1.257085260286968, "grad_norm": 0.8204870371285615, "learning_rate": 4.013192509934561e-05, "loss": 0.1616, "step": 10601 }, { "epoch": 1.2572038420490927, "grad_norm": 0.7218933771661717, "learning_rate": 4.013001430632938e-05, "loss": 0.1621, "step": 10602 }, { "epoch": 1.257322423811218, "grad_norm": 1.2838740272270943, "learning_rate": 4.012810337383297e-05, "loss": 0.2476, "step": 10603 }, { "epoch": 1.2574410055733427, "grad_norm": 1.0388327256912735, "learning_rate": 4.012619230187399e-05, "loss": 0.2334, "step": 10604 }, { "epoch": 1.257559587335468, "grad_norm": 0.9688822144774484, "learning_rate": 4.012428109047007e-05, "loss": 0.2317, "step": 10605 }, { "epoch": 1.2576781690975927, "grad_norm": 1.520105372878284, "learning_rate": 4.0122369739638835e-05, "loss": 0.2545, "step": 10606 }, { "epoch": 1.2577967508597179, "grad_norm": 1.2582582498061827, "learning_rate": 4.012045824939788e-05, "loss": 0.2729, "step": 10607 }, { "epoch": 1.2579153326218426, "grad_norm": 1.0146580804416334, "learning_rate": 4.011854661976485e-05, "loss": 0.2163, "step": 10608 }, { "epoch": 1.2580339143839678, "grad_norm": 1.1060118683948428, "learning_rate": 4.011663485075736e-05, "loss": 0.2125, "step": 10609 }, { "epoch": 1.2581524961460928, "grad_norm": 1.0140132599832952, "learning_rate": 4.011472294239303e-05, "loss": 0.1977, "step": 10610 }, { "epoch": 1.2582710779082178, "grad_norm": 0.8981796227561046, "learning_rate": 4.01128108946895e-05, "loss": 0.181, "step": 10611 }, { "epoch": 1.2583896596703428, "grad_norm": 1.364893122035823, "learning_rate": 4.011089870766437e-05, "loss": 0.297, "step": 10612 }, { "epoch": 1.2585082414324678, "grad_norm": 1.156791081761517, "learning_rate": 4.0108986381335304e-05, "loss": 0.2761, "step": 10613 }, { "epoch": 1.2586268231945927, "grad_norm": 1.2617731370628869, "learning_rate": 4.010707391571989e-05, "loss": 0.2399, "step": 10614 }, { "epoch": 1.2587454049567177, "grad_norm": 1.133165690336073, "learning_rate": 4.010516131083579e-05, "loss": 0.2842, "step": 10615 }, { "epoch": 1.2588639867188427, "grad_norm": 1.1016158317968447, "learning_rate": 4.010324856670063e-05, "loss": 0.2692, "step": 10616 }, { "epoch": 1.2589825684809677, "grad_norm": 1.1502618974028418, "learning_rate": 4.010133568333204e-05, "loss": 0.2598, "step": 10617 }, { "epoch": 1.2591011502430927, "grad_norm": 1.074467199959485, "learning_rate": 4.0099422660747645e-05, "loss": 0.1515, "step": 10618 }, { "epoch": 1.2592197320052176, "grad_norm": 1.2558737245545464, "learning_rate": 4.009750949896509e-05, "loss": 0.279, "step": 10619 }, { "epoch": 1.2593383137673426, "grad_norm": 0.9119357655452501, "learning_rate": 4.0095596198002014e-05, "loss": 0.1888, "step": 10620 }, { "epoch": 1.2594568955294676, "grad_norm": 1.1871888170577531, "learning_rate": 4.009368275787606e-05, "loss": 0.2591, "step": 10621 }, { "epoch": 1.2595754772915926, "grad_norm": 0.8084549804476707, "learning_rate": 4.009176917860484e-05, "loss": 0.1749, "step": 10622 }, { "epoch": 1.2596940590537176, "grad_norm": 1.080155702942602, "learning_rate": 4.008985546020603e-05, "loss": 0.2267, "step": 10623 }, { "epoch": 1.2598126408158425, "grad_norm": 0.924844015233843, "learning_rate": 4.008794160269725e-05, "loss": 0.2337, "step": 10624 }, { "epoch": 1.2599312225779675, "grad_norm": 0.9787574871091232, "learning_rate": 4.008602760609616e-05, "loss": 0.2354, "step": 10625 }, { "epoch": 1.2600498043400925, "grad_norm": 1.0769525463952392, "learning_rate": 4.0084113470420395e-05, "loss": 0.1958, "step": 10626 }, { "epoch": 1.2601683861022175, "grad_norm": 1.3767634672578035, "learning_rate": 4.0082199195687594e-05, "loss": 0.273, "step": 10627 }, { "epoch": 1.2602869678643425, "grad_norm": 1.4158339523503718, "learning_rate": 4.008028478191541e-05, "loss": 0.3039, "step": 10628 }, { "epoch": 1.2604055496264674, "grad_norm": 1.0004243844852918, "learning_rate": 4.00783702291215e-05, "loss": 0.2462, "step": 10629 }, { "epoch": 1.2605241313885924, "grad_norm": 1.483396105839095, "learning_rate": 4.007645553732351e-05, "loss": 0.3742, "step": 10630 }, { "epoch": 1.2606427131507174, "grad_norm": 1.1498480101278847, "learning_rate": 4.0074540706539075e-05, "loss": 0.2041, "step": 10631 }, { "epoch": 1.2607612949128424, "grad_norm": 1.1919065555603667, "learning_rate": 4.007262573678587e-05, "loss": 0.2624, "step": 10632 }, { "epoch": 1.2608798766749674, "grad_norm": 1.0832644062741912, "learning_rate": 4.0070710628081534e-05, "loss": 0.2236, "step": 10633 }, { "epoch": 1.2609984584370924, "grad_norm": 0.9841329454685708, "learning_rate": 4.006879538044372e-05, "loss": 0.2265, "step": 10634 }, { "epoch": 1.2611170401992173, "grad_norm": 1.2642497825940258, "learning_rate": 4.006687999389011e-05, "loss": 0.2764, "step": 10635 }, { "epoch": 1.2612356219613423, "grad_norm": 1.32225812823312, "learning_rate": 4.006496446843833e-05, "loss": 0.296, "step": 10636 }, { "epoch": 1.2613542037234673, "grad_norm": 1.6928657714658077, "learning_rate": 4.006304880410605e-05, "loss": 0.3346, "step": 10637 }, { "epoch": 1.2614727854855923, "grad_norm": 1.0778045676224994, "learning_rate": 4.006113300091093e-05, "loss": 0.2829, "step": 10638 }, { "epoch": 1.2615913672477173, "grad_norm": 1.1336464785442355, "learning_rate": 4.0059217058870644e-05, "loss": 0.2867, "step": 10639 }, { "epoch": 1.2617099490098422, "grad_norm": 1.094497285344225, "learning_rate": 4.0057300978002835e-05, "loss": 0.2105, "step": 10640 }, { "epoch": 1.2618285307719672, "grad_norm": 0.9013090009877783, "learning_rate": 4.0055384758325185e-05, "loss": 0.2085, "step": 10641 }, { "epoch": 1.2619471125340922, "grad_norm": 1.241823199372829, "learning_rate": 4.0053468399855345e-05, "loss": 0.2617, "step": 10642 }, { "epoch": 1.2620656942962172, "grad_norm": 1.384808779288354, "learning_rate": 4.005155190261099e-05, "loss": 0.2278, "step": 10643 }, { "epoch": 1.2621842760583422, "grad_norm": 1.4409858363564618, "learning_rate": 4.004963526660977e-05, "loss": 0.2997, "step": 10644 }, { "epoch": 1.2623028578204671, "grad_norm": 1.0048667941659195, "learning_rate": 4.004771849186937e-05, "loss": 0.2385, "step": 10645 }, { "epoch": 1.2624214395825921, "grad_norm": 0.9905365513971506, "learning_rate": 4.004580157840747e-05, "loss": 0.2325, "step": 10646 }, { "epoch": 1.262540021344717, "grad_norm": 1.0684044545888134, "learning_rate": 4.004388452624172e-05, "loss": 0.2372, "step": 10647 }, { "epoch": 1.2626586031068423, "grad_norm": 1.0016323278891466, "learning_rate": 4.004196733538981e-05, "loss": 0.2203, "step": 10648 }, { "epoch": 1.262777184868967, "grad_norm": 1.1041340292123465, "learning_rate": 4.00400500058694e-05, "loss": 0.2474, "step": 10649 }, { "epoch": 1.2628957666310923, "grad_norm": 1.1591464328132572, "learning_rate": 4.003813253769818e-05, "loss": 0.2468, "step": 10650 }, { "epoch": 1.263014348393217, "grad_norm": 1.126682481589318, "learning_rate": 4.003621493089381e-05, "loss": 0.276, "step": 10651 }, { "epoch": 1.2631329301553422, "grad_norm": 1.5903999591298772, "learning_rate": 4.003429718547399e-05, "loss": 0.283, "step": 10652 }, { "epoch": 1.263251511917467, "grad_norm": 1.1106224161184617, "learning_rate": 4.0032379301456365e-05, "loss": 0.2229, "step": 10653 }, { "epoch": 1.2633700936795922, "grad_norm": 1.2156439127984733, "learning_rate": 4.003046127885866e-05, "loss": 0.2629, "step": 10654 }, { "epoch": 1.263488675441717, "grad_norm": 0.6691641439512508, "learning_rate": 4.002854311769852e-05, "loss": 0.1881, "step": 10655 }, { "epoch": 1.2636072572038421, "grad_norm": 0.8796583983295096, "learning_rate": 4.002662481799365e-05, "loss": 0.1548, "step": 10656 }, { "epoch": 1.263725838965967, "grad_norm": 1.6707682254743603, "learning_rate": 4.002470637976172e-05, "loss": 0.2782, "step": 10657 }, { "epoch": 1.263844420728092, "grad_norm": 1.3260456038689135, "learning_rate": 4.002278780302042e-05, "loss": 0.254, "step": 10658 }, { "epoch": 1.2639630024902169, "grad_norm": 1.560101862213038, "learning_rate": 4.002086908778744e-05, "loss": 0.407, "step": 10659 }, { "epoch": 1.264081584252342, "grad_norm": 0.9617866406249037, "learning_rate": 4.001895023408047e-05, "loss": 0.1848, "step": 10660 }, { "epoch": 1.264200166014467, "grad_norm": 0.8342875921736955, "learning_rate": 4.001703124191719e-05, "loss": 0.1887, "step": 10661 }, { "epoch": 1.264318747776592, "grad_norm": 1.1156495441699488, "learning_rate": 4.00151121113153e-05, "loss": 0.2343, "step": 10662 }, { "epoch": 1.264437329538717, "grad_norm": 1.1603740761500894, "learning_rate": 4.001319284229249e-05, "loss": 0.1994, "step": 10663 }, { "epoch": 1.264555911300842, "grad_norm": 1.4474455836035063, "learning_rate": 4.001127343486646e-05, "loss": 0.3058, "step": 10664 }, { "epoch": 1.264674493062967, "grad_norm": 1.267754510075988, "learning_rate": 4.000935388905489e-05, "loss": 0.2659, "step": 10665 }, { "epoch": 1.264793074825092, "grad_norm": 1.2412552669574035, "learning_rate": 4.000743420487548e-05, "loss": 0.2466, "step": 10666 }, { "epoch": 1.264911656587217, "grad_norm": 1.0244323095552779, "learning_rate": 4.000551438234593e-05, "loss": 0.2061, "step": 10667 }, { "epoch": 1.265030238349342, "grad_norm": 1.1490675767566865, "learning_rate": 4.000359442148395e-05, "loss": 0.2131, "step": 10668 }, { "epoch": 1.265148820111467, "grad_norm": 1.0305381817892802, "learning_rate": 4.000167432230721e-05, "loss": 0.2136, "step": 10669 }, { "epoch": 1.2652674018735919, "grad_norm": 1.1307076152535613, "learning_rate": 3.999975408483344e-05, "loss": 0.2314, "step": 10670 }, { "epoch": 1.2653859836357169, "grad_norm": 1.2307108637618005, "learning_rate": 3.999783370908033e-05, "loss": 0.2778, "step": 10671 }, { "epoch": 1.2655045653978418, "grad_norm": 0.8770689618885671, "learning_rate": 3.999591319506558e-05, "loss": 0.2063, "step": 10672 }, { "epoch": 1.2656231471599668, "grad_norm": 1.2937093111149467, "learning_rate": 3.999399254280691e-05, "loss": 0.2523, "step": 10673 }, { "epoch": 1.2657417289220918, "grad_norm": 1.7955767299821062, "learning_rate": 3.999207175232201e-05, "loss": 0.3439, "step": 10674 }, { "epoch": 1.2658603106842168, "grad_norm": 1.5653976613227858, "learning_rate": 3.999015082362859e-05, "loss": 0.3406, "step": 10675 }, { "epoch": 1.2659788924463418, "grad_norm": 0.9386469808658074, "learning_rate": 3.998822975674437e-05, "loss": 0.1864, "step": 10676 }, { "epoch": 1.2660974742084667, "grad_norm": 1.1223111904740737, "learning_rate": 3.998630855168705e-05, "loss": 0.2015, "step": 10677 }, { "epoch": 1.2662160559705917, "grad_norm": 1.745198469381485, "learning_rate": 3.9984387208474325e-05, "loss": 0.3706, "step": 10678 }, { "epoch": 1.2663346377327167, "grad_norm": 1.192219137685321, "learning_rate": 3.9982465727123944e-05, "loss": 0.2628, "step": 10679 }, { "epoch": 1.2664532194948417, "grad_norm": 0.8707276362436793, "learning_rate": 3.998054410765359e-05, "loss": 0.1823, "step": 10680 }, { "epoch": 1.2665718012569667, "grad_norm": 1.4036879120328158, "learning_rate": 3.9978622350081e-05, "loss": 0.2951, "step": 10681 }, { "epoch": 1.2666903830190916, "grad_norm": 0.8838612897522572, "learning_rate": 3.997670045442388e-05, "loss": 0.1718, "step": 10682 }, { "epoch": 1.2668089647812166, "grad_norm": 1.00351266415984, "learning_rate": 3.9974778420699935e-05, "loss": 0.2139, "step": 10683 }, { "epoch": 1.2669275465433416, "grad_norm": 1.3169446281343504, "learning_rate": 3.997285624892691e-05, "loss": 0.2999, "step": 10684 }, { "epoch": 1.2670461283054666, "grad_norm": 1.0294864483851398, "learning_rate": 3.99709339391225e-05, "loss": 0.2103, "step": 10685 }, { "epoch": 1.2671647100675916, "grad_norm": 0.9507200771841938, "learning_rate": 3.996901149130444e-05, "loss": 0.1867, "step": 10686 }, { "epoch": 1.2672832918297166, "grad_norm": 0.9775158417424439, "learning_rate": 3.996708890549046e-05, "loss": 0.1851, "step": 10687 }, { "epoch": 1.2674018735918415, "grad_norm": 1.423459055489313, "learning_rate": 3.9965166181698266e-05, "loss": 0.2574, "step": 10688 }, { "epoch": 1.2675204553539665, "grad_norm": 0.965467331594832, "learning_rate": 3.996324331994559e-05, "loss": 0.1984, "step": 10689 }, { "epoch": 1.2676390371160915, "grad_norm": 1.1541669513876114, "learning_rate": 3.996132032025016e-05, "loss": 0.2321, "step": 10690 }, { "epoch": 1.2677576188782165, "grad_norm": 1.0800643790434257, "learning_rate": 3.995939718262971e-05, "loss": 0.2147, "step": 10691 }, { "epoch": 1.2678762006403415, "grad_norm": 0.9554983101076154, "learning_rate": 3.995747390710196e-05, "loss": 0.1826, "step": 10692 }, { "epoch": 1.2679947824024664, "grad_norm": 1.251250476876661, "learning_rate": 3.995555049368463e-05, "loss": 0.2345, "step": 10693 }, { "epoch": 1.2681133641645914, "grad_norm": 0.9288623968813257, "learning_rate": 3.995362694239548e-05, "loss": 0.2067, "step": 10694 }, { "epoch": 1.2682319459267164, "grad_norm": 1.1048194892975658, "learning_rate": 3.995170325325223e-05, "loss": 0.2068, "step": 10695 }, { "epoch": 1.2683505276888414, "grad_norm": 1.3590855806006066, "learning_rate": 3.9949779426272594e-05, "loss": 0.2619, "step": 10696 }, { "epoch": 1.2684691094509664, "grad_norm": 1.0691928941437427, "learning_rate": 3.994785546147434e-05, "loss": 0.2116, "step": 10697 }, { "epoch": 1.2685876912130913, "grad_norm": 0.9484664944888733, "learning_rate": 3.994593135887518e-05, "loss": 0.1741, "step": 10698 }, { "epoch": 1.2687062729752165, "grad_norm": 1.0407278170795875, "learning_rate": 3.994400711849286e-05, "loss": 0.1771, "step": 10699 }, { "epoch": 1.2688248547373413, "grad_norm": 1.2180017255120155, "learning_rate": 3.994208274034512e-05, "loss": 0.2914, "step": 10700 }, { "epoch": 1.2689434364994665, "grad_norm": 1.213265787973207, "learning_rate": 3.994015822444971e-05, "loss": 0.1975, "step": 10701 }, { "epoch": 1.2690620182615913, "grad_norm": 1.3029946474889964, "learning_rate": 3.993823357082435e-05, "loss": 0.2691, "step": 10702 }, { "epoch": 1.2691806000237165, "grad_norm": 0.8872970550052439, "learning_rate": 3.99363087794868e-05, "loss": 0.1836, "step": 10703 }, { "epoch": 1.2692991817858412, "grad_norm": 0.9807060953964426, "learning_rate": 3.99343838504548e-05, "loss": 0.1923, "step": 10704 }, { "epoch": 1.2694177635479664, "grad_norm": 0.8879667168084098, "learning_rate": 3.99324587837461e-05, "loss": 0.1681, "step": 10705 }, { "epoch": 1.2695363453100912, "grad_norm": 1.2566264357518295, "learning_rate": 3.993053357937843e-05, "loss": 0.25, "step": 10706 }, { "epoch": 1.2696549270722164, "grad_norm": 1.2781308141744867, "learning_rate": 3.9928608237369556e-05, "loss": 0.2842, "step": 10707 }, { "epoch": 1.2697735088343411, "grad_norm": 1.1030641155428695, "learning_rate": 3.992668275773722e-05, "loss": 0.2188, "step": 10708 }, { "epoch": 1.2698920905964663, "grad_norm": 0.7663596927902078, "learning_rate": 3.992475714049917e-05, "loss": 0.1508, "step": 10709 }, { "epoch": 1.2700106723585913, "grad_norm": 1.0249351449811366, "learning_rate": 3.9922831385673164e-05, "loss": 0.2102, "step": 10710 }, { "epoch": 1.2701292541207163, "grad_norm": 1.0020940310372732, "learning_rate": 3.992090549327695e-05, "loss": 0.2422, "step": 10711 }, { "epoch": 1.2702478358828413, "grad_norm": 1.4685510705660805, "learning_rate": 3.991897946332829e-05, "loss": 0.3161, "step": 10712 }, { "epoch": 1.2703664176449663, "grad_norm": 0.9749512040969037, "learning_rate": 3.991705329584493e-05, "loss": 0.2146, "step": 10713 }, { "epoch": 1.2704849994070913, "grad_norm": 1.1043507980188454, "learning_rate": 3.991512699084463e-05, "loss": 0.2592, "step": 10714 }, { "epoch": 1.2706035811692162, "grad_norm": 0.9464422163388854, "learning_rate": 3.991320054834515e-05, "loss": 0.1796, "step": 10715 }, { "epoch": 1.2707221629313412, "grad_norm": 1.3141493894758716, "learning_rate": 3.9911273968364255e-05, "loss": 0.3514, "step": 10716 }, { "epoch": 1.2708407446934662, "grad_norm": 1.1854478000384914, "learning_rate": 3.990934725091969e-05, "loss": 0.2624, "step": 10717 }, { "epoch": 1.2709593264555912, "grad_norm": 1.5170063723404184, "learning_rate": 3.9907420396029236e-05, "loss": 0.3642, "step": 10718 }, { "epoch": 1.2710779082177162, "grad_norm": 1.1767563666292837, "learning_rate": 3.9905493403710634e-05, "loss": 0.2707, "step": 10719 }, { "epoch": 1.2711964899798411, "grad_norm": 1.0204997563820253, "learning_rate": 3.990356627398167e-05, "loss": 0.2561, "step": 10720 }, { "epoch": 1.2713150717419661, "grad_norm": 0.8506459529618069, "learning_rate": 3.9901639006860104e-05, "loss": 0.1581, "step": 10721 }, { "epoch": 1.271433653504091, "grad_norm": 1.318313785767226, "learning_rate": 3.9899711602363696e-05, "loss": 0.2488, "step": 10722 }, { "epoch": 1.271552235266216, "grad_norm": 0.9829156483484653, "learning_rate": 3.989778406051022e-05, "loss": 0.1948, "step": 10723 }, { "epoch": 1.271670817028341, "grad_norm": 1.5084978239224585, "learning_rate": 3.989585638131744e-05, "loss": 0.3187, "step": 10724 }, { "epoch": 1.271789398790466, "grad_norm": 1.1319984850210534, "learning_rate": 3.989392856480313e-05, "loss": 0.2382, "step": 10725 }, { "epoch": 1.271907980552591, "grad_norm": 1.1888636637056829, "learning_rate": 3.989200061098506e-05, "loss": 0.3327, "step": 10726 }, { "epoch": 1.272026562314716, "grad_norm": 1.1398874179319805, "learning_rate": 3.9890072519881016e-05, "loss": 0.2559, "step": 10727 }, { "epoch": 1.272145144076841, "grad_norm": 1.1705299011701351, "learning_rate": 3.9888144291508756e-05, "loss": 0.2712, "step": 10728 }, { "epoch": 1.272263725838966, "grad_norm": 1.0852975003506278, "learning_rate": 3.988621592588607e-05, "loss": 0.2585, "step": 10729 }, { "epoch": 1.272382307601091, "grad_norm": 1.3521493261108521, "learning_rate": 3.988428742303072e-05, "loss": 0.3471, "step": 10730 }, { "epoch": 1.272500889363216, "grad_norm": 0.9796288780391064, "learning_rate": 3.9882358782960497e-05, "loss": 0.224, "step": 10731 }, { "epoch": 1.272619471125341, "grad_norm": 1.0472668117434936, "learning_rate": 3.988043000569317e-05, "loss": 0.2175, "step": 10732 }, { "epoch": 1.2727380528874659, "grad_norm": 0.9516384691870745, "learning_rate": 3.987850109124653e-05, "loss": 0.2174, "step": 10733 }, { "epoch": 1.2728566346495909, "grad_norm": 0.9278705480811084, "learning_rate": 3.987657203963835e-05, "loss": 0.2052, "step": 10734 }, { "epoch": 1.2729752164117158, "grad_norm": 0.996511502275814, "learning_rate": 3.9874642850886426e-05, "loss": 0.2108, "step": 10735 }, { "epoch": 1.2730937981738408, "grad_norm": 1.3032703879335963, "learning_rate": 3.9872713525008535e-05, "loss": 0.2754, "step": 10736 }, { "epoch": 1.2732123799359658, "grad_norm": 1.181016433098123, "learning_rate": 3.987078406202246e-05, "loss": 0.2387, "step": 10737 }, { "epoch": 1.2733309616980908, "grad_norm": 1.5296302438097418, "learning_rate": 3.9868854461945994e-05, "loss": 0.314, "step": 10738 }, { "epoch": 1.2734495434602158, "grad_norm": 0.9750327744606769, "learning_rate": 3.986692472479692e-05, "loss": 0.2094, "step": 10739 }, { "epoch": 1.2735681252223408, "grad_norm": 1.1705208848042112, "learning_rate": 3.9864994850593036e-05, "loss": 0.2283, "step": 10740 }, { "epoch": 1.2736867069844657, "grad_norm": 1.143361343776401, "learning_rate": 3.986306483935213e-05, "loss": 0.2315, "step": 10741 }, { "epoch": 1.2738052887465907, "grad_norm": 1.178502739696743, "learning_rate": 3.986113469109198e-05, "loss": 0.2495, "step": 10742 }, { "epoch": 1.2739238705087157, "grad_norm": 0.8692228637376979, "learning_rate": 3.98592044058304e-05, "loss": 0.1729, "step": 10743 }, { "epoch": 1.2740424522708407, "grad_norm": 1.0705477451196304, "learning_rate": 3.9857273983585183e-05, "loss": 0.2202, "step": 10744 }, { "epoch": 1.2741610340329657, "grad_norm": 0.9122347761068905, "learning_rate": 3.985534342437411e-05, "loss": 0.1694, "step": 10745 }, { "epoch": 1.2742796157950906, "grad_norm": 0.9126119873345295, "learning_rate": 3.985341272821499e-05, "loss": 0.2235, "step": 10746 }, { "epoch": 1.2743981975572156, "grad_norm": 0.8083512508531157, "learning_rate": 3.985148189512562e-05, "loss": 0.1431, "step": 10747 }, { "epoch": 1.2745167793193408, "grad_norm": 1.0064643942516724, "learning_rate": 3.98495509251238e-05, "loss": 0.1957, "step": 10748 }, { "epoch": 1.2746353610814656, "grad_norm": 0.8961918358572555, "learning_rate": 3.984761981822732e-05, "loss": 0.2004, "step": 10749 }, { "epoch": 1.2747539428435908, "grad_norm": 1.2652547478491485, "learning_rate": 3.984568857445401e-05, "loss": 0.2099, "step": 10750 }, { "epoch": 1.2748725246057155, "grad_norm": 1.5208584319985956, "learning_rate": 3.9843757193821645e-05, "loss": 0.2939, "step": 10751 }, { "epoch": 1.2749911063678407, "grad_norm": 0.9807201050845276, "learning_rate": 3.9841825676348047e-05, "loss": 0.2089, "step": 10752 }, { "epoch": 1.2751096881299655, "grad_norm": 1.126037823143914, "learning_rate": 3.9839894022051013e-05, "loss": 0.1855, "step": 10753 }, { "epoch": 1.2752282698920907, "grad_norm": 1.2960040228685163, "learning_rate": 3.983796223094835e-05, "loss": 0.2512, "step": 10754 }, { "epoch": 1.2753468516542155, "grad_norm": 1.0928294296753787, "learning_rate": 3.9836030303057883e-05, "loss": 0.2833, "step": 10755 }, { "epoch": 1.2754654334163407, "grad_norm": 0.9802863026186557, "learning_rate": 3.9834098238397396e-05, "loss": 0.1926, "step": 10756 }, { "epoch": 1.2755840151784654, "grad_norm": 1.123754737479265, "learning_rate": 3.983216603698472e-05, "loss": 0.1898, "step": 10757 }, { "epoch": 1.2757025969405906, "grad_norm": 1.0710651706434926, "learning_rate": 3.983023369883766e-05, "loss": 0.2737, "step": 10758 }, { "epoch": 1.2758211787027154, "grad_norm": 0.970645167287315, "learning_rate": 3.982830122397403e-05, "loss": 0.2004, "step": 10759 }, { "epoch": 1.2759397604648406, "grad_norm": 1.071601575329305, "learning_rate": 3.982636861241165e-05, "loss": 0.2341, "step": 10760 }, { "epoch": 1.2760583422269656, "grad_norm": 1.4662226414449882, "learning_rate": 3.982443586416834e-05, "loss": 0.3167, "step": 10761 }, { "epoch": 1.2761769239890906, "grad_norm": 1.2488398534175147, "learning_rate": 3.98225029792619e-05, "loss": 0.2417, "step": 10762 }, { "epoch": 1.2762955057512155, "grad_norm": 0.8550336124779022, "learning_rate": 3.9820569957710167e-05, "loss": 0.1687, "step": 10763 }, { "epoch": 1.2764140875133405, "grad_norm": 1.0073469473416086, "learning_rate": 3.981863679953094e-05, "loss": 0.2044, "step": 10764 }, { "epoch": 1.2765326692754655, "grad_norm": 0.9557902299502421, "learning_rate": 3.981670350474206e-05, "loss": 0.1839, "step": 10765 }, { "epoch": 1.2766512510375905, "grad_norm": 1.339610070479773, "learning_rate": 3.981477007336135e-05, "loss": 0.2652, "step": 10766 }, { "epoch": 1.2767698327997155, "grad_norm": 1.2781064307178756, "learning_rate": 3.9812836505406614e-05, "loss": 0.2661, "step": 10767 }, { "epoch": 1.2768884145618404, "grad_norm": 1.1278602704510179, "learning_rate": 3.98109028008957e-05, "loss": 0.2513, "step": 10768 }, { "epoch": 1.2770069963239654, "grad_norm": 1.2117200114402091, "learning_rate": 3.9808968959846416e-05, "loss": 0.2622, "step": 10769 }, { "epoch": 1.2771255780860904, "grad_norm": 1.3018365154736355, "learning_rate": 3.980703498227661e-05, "loss": 0.2758, "step": 10770 }, { "epoch": 1.2772441598482154, "grad_norm": 1.0535607868851413, "learning_rate": 3.980510086820409e-05, "loss": 0.2359, "step": 10771 }, { "epoch": 1.2773627416103404, "grad_norm": 1.0191695626980086, "learning_rate": 3.98031666176467e-05, "loss": 0.2391, "step": 10772 }, { "epoch": 1.2774813233724653, "grad_norm": 1.0529741652759002, "learning_rate": 3.9801232230622266e-05, "loss": 0.2414, "step": 10773 }, { "epoch": 1.2775999051345903, "grad_norm": 1.0766937141432322, "learning_rate": 3.979929770714862e-05, "loss": 0.2038, "step": 10774 }, { "epoch": 1.2777184868967153, "grad_norm": 1.1711256827254262, "learning_rate": 3.97973630472436e-05, "loss": 0.2177, "step": 10775 }, { "epoch": 1.2778370686588403, "grad_norm": 1.2287305986693369, "learning_rate": 3.979542825092503e-05, "loss": 0.2456, "step": 10776 }, { "epoch": 1.2779556504209653, "grad_norm": 1.0670594279674128, "learning_rate": 3.979349331821076e-05, "loss": 0.2782, "step": 10777 }, { "epoch": 1.2780742321830902, "grad_norm": 1.1196378719961013, "learning_rate": 3.979155824911863e-05, "loss": 0.1896, "step": 10778 }, { "epoch": 1.2781928139452152, "grad_norm": 2.662878470385042, "learning_rate": 3.978962304366646e-05, "loss": 0.4028, "step": 10779 }, { "epoch": 1.2783113957073402, "grad_norm": 0.9258389119173809, "learning_rate": 3.97876877018721e-05, "loss": 0.1828, "step": 10780 }, { "epoch": 1.2784299774694652, "grad_norm": 1.6205214831015902, "learning_rate": 3.9785752223753406e-05, "loss": 0.2429, "step": 10781 }, { "epoch": 1.2785485592315902, "grad_norm": 1.0531458938995457, "learning_rate": 3.97838166093282e-05, "loss": 0.2343, "step": 10782 }, { "epoch": 1.2786671409937151, "grad_norm": 1.3234376795672296, "learning_rate": 3.978188085861434e-05, "loss": 0.2859, "step": 10783 }, { "epoch": 1.2787857227558401, "grad_norm": 0.7062582871288987, "learning_rate": 3.977994497162966e-05, "loss": 0.136, "step": 10784 }, { "epoch": 1.278904304517965, "grad_norm": 1.077109028626667, "learning_rate": 3.9778008948392006e-05, "loss": 0.2251, "step": 10785 }, { "epoch": 1.27902288628009, "grad_norm": 1.133612752429743, "learning_rate": 3.977607278891924e-05, "loss": 0.2201, "step": 10786 }, { "epoch": 1.279141468042215, "grad_norm": 1.3091837012733343, "learning_rate": 3.977413649322921e-05, "loss": 0.2802, "step": 10787 }, { "epoch": 1.27926004980434, "grad_norm": 0.9619663244214097, "learning_rate": 3.9772200061339746e-05, "loss": 0.241, "step": 10788 }, { "epoch": 1.279378631566465, "grad_norm": 0.9414846498318026, "learning_rate": 3.977026349326871e-05, "loss": 0.2532, "step": 10789 }, { "epoch": 1.27949721332859, "grad_norm": 0.8944483944557797, "learning_rate": 3.9768326789033967e-05, "loss": 0.2136, "step": 10790 }, { "epoch": 1.279615795090715, "grad_norm": 0.9732708054867222, "learning_rate": 3.976638994865336e-05, "loss": 0.2023, "step": 10791 }, { "epoch": 1.27973437685284, "grad_norm": 1.1212607027694528, "learning_rate": 3.976445297214474e-05, "loss": 0.2341, "step": 10792 }, { "epoch": 1.279852958614965, "grad_norm": 1.15262076829852, "learning_rate": 3.976251585952597e-05, "loss": 0.2004, "step": 10793 }, { "epoch": 1.27997154037709, "grad_norm": 1.2344976101366896, "learning_rate": 3.9760578610814906e-05, "loss": 0.2737, "step": 10794 }, { "epoch": 1.280090122139215, "grad_norm": 1.2336276643157398, "learning_rate": 3.975864122602941e-05, "loss": 0.3141, "step": 10795 }, { "epoch": 1.28020870390134, "grad_norm": 1.2102905537655648, "learning_rate": 3.975670370518734e-05, "loss": 0.2558, "step": 10796 }, { "epoch": 1.280327285663465, "grad_norm": 1.0172884340862178, "learning_rate": 3.9754766048306555e-05, "loss": 0.1813, "step": 10797 }, { "epoch": 1.2804458674255899, "grad_norm": 1.385540104185659, "learning_rate": 3.975282825540493e-05, "loss": 0.2708, "step": 10798 }, { "epoch": 1.280564449187715, "grad_norm": 1.0339832845056875, "learning_rate": 3.9750890326500313e-05, "loss": 0.1863, "step": 10799 }, { "epoch": 1.2806830309498398, "grad_norm": 1.0651917868920195, "learning_rate": 3.9748952261610575e-05, "loss": 0.2136, "step": 10800 }, { "epoch": 1.280801612711965, "grad_norm": 1.0448083465828053, "learning_rate": 3.974701406075358e-05, "loss": 0.2798, "step": 10801 }, { "epoch": 1.2809201944740898, "grad_norm": 0.9630826586442516, "learning_rate": 3.974507572394721e-05, "loss": 0.1939, "step": 10802 }, { "epoch": 1.281038776236215, "grad_norm": 1.1392357919925158, "learning_rate": 3.9743137251209315e-05, "loss": 0.2562, "step": 10803 }, { "epoch": 1.2811573579983397, "grad_norm": 1.0889679097238374, "learning_rate": 3.974119864255778e-05, "loss": 0.2037, "step": 10804 }, { "epoch": 1.281275939760465, "grad_norm": 1.3210974350315363, "learning_rate": 3.973925989801046e-05, "loss": 0.2681, "step": 10805 }, { "epoch": 1.2813945215225897, "grad_norm": 0.9583683508913416, "learning_rate": 3.973732101758525e-05, "loss": 0.2037, "step": 10806 }, { "epoch": 1.281513103284715, "grad_norm": 1.3491855242132251, "learning_rate": 3.97353820013e-05, "loss": 0.2649, "step": 10807 }, { "epoch": 1.2816316850468397, "grad_norm": 1.1048654093660821, "learning_rate": 3.973344284917261e-05, "loss": 0.222, "step": 10808 }, { "epoch": 1.2817502668089649, "grad_norm": 1.0619835299881157, "learning_rate": 3.9731503561220936e-05, "loss": 0.2439, "step": 10809 }, { "epoch": 1.2818688485710898, "grad_norm": 1.130662680304318, "learning_rate": 3.972956413746286e-05, "loss": 0.1912, "step": 10810 }, { "epoch": 1.2819874303332148, "grad_norm": 0.8626962528850692, "learning_rate": 3.9727624577916276e-05, "loss": 0.1679, "step": 10811 }, { "epoch": 1.2821060120953398, "grad_norm": 0.9733274136432473, "learning_rate": 3.972568488259905e-05, "loss": 0.1916, "step": 10812 }, { "epoch": 1.2822245938574648, "grad_norm": 1.8788321132432857, "learning_rate": 3.972374505152907e-05, "loss": 0.3866, "step": 10813 }, { "epoch": 1.2823431756195898, "grad_norm": 1.1036534492203087, "learning_rate": 3.97218050847242e-05, "loss": 0.2761, "step": 10814 }, { "epoch": 1.2824617573817148, "grad_norm": 1.0978372989135854, "learning_rate": 3.9719864982202364e-05, "loss": 0.2496, "step": 10815 }, { "epoch": 1.2825803391438397, "grad_norm": 1.2898888556319095, "learning_rate": 3.9717924743981415e-05, "loss": 0.3215, "step": 10816 }, { "epoch": 1.2826989209059647, "grad_norm": 1.4634120850778907, "learning_rate": 3.971598437007924e-05, "loss": 0.365, "step": 10817 }, { "epoch": 1.2828175026680897, "grad_norm": 0.9240282952853114, "learning_rate": 3.9714043860513745e-05, "loss": 0.1991, "step": 10818 }, { "epoch": 1.2829360844302147, "grad_norm": 1.0256191252637823, "learning_rate": 3.971210321530281e-05, "loss": 0.2314, "step": 10819 }, { "epoch": 1.2830546661923397, "grad_norm": 1.2544047813305328, "learning_rate": 3.9710162434464314e-05, "loss": 0.2486, "step": 10820 }, { "epoch": 1.2831732479544646, "grad_norm": 1.1109123590789722, "learning_rate": 3.970822151801616e-05, "loss": 0.1996, "step": 10821 }, { "epoch": 1.2832918297165896, "grad_norm": 1.2570038056889168, "learning_rate": 3.970628046597625e-05, "loss": 0.2712, "step": 10822 }, { "epoch": 1.2834104114787146, "grad_norm": 1.182870748167845, "learning_rate": 3.970433927836246e-05, "loss": 0.246, "step": 10823 }, { "epoch": 1.2835289932408396, "grad_norm": 1.1187804258687966, "learning_rate": 3.97023979551927e-05, "loss": 0.2353, "step": 10824 }, { "epoch": 1.2836475750029646, "grad_norm": 1.2380539765894887, "learning_rate": 3.970045649648485e-05, "loss": 0.239, "step": 10825 }, { "epoch": 1.2837661567650895, "grad_norm": 0.8080707455040895, "learning_rate": 3.969851490225684e-05, "loss": 0.1913, "step": 10826 }, { "epoch": 1.2838847385272145, "grad_norm": 1.257570774927042, "learning_rate": 3.969657317252652e-05, "loss": 0.2509, "step": 10827 }, { "epoch": 1.2840033202893395, "grad_norm": 1.0750878412680054, "learning_rate": 3.969463130731183e-05, "loss": 0.2294, "step": 10828 }, { "epoch": 1.2841219020514645, "grad_norm": 1.0073654636667686, "learning_rate": 3.969268930663066e-05, "loss": 0.221, "step": 10829 }, { "epoch": 1.2842404838135895, "grad_norm": 1.0101777759893429, "learning_rate": 3.9690747170500906e-05, "loss": 0.2062, "step": 10830 }, { "epoch": 1.2843590655757144, "grad_norm": 1.0182331324116962, "learning_rate": 3.9688804898940484e-05, "loss": 0.228, "step": 10831 }, { "epoch": 1.2844776473378394, "grad_norm": 1.081276421226411, "learning_rate": 3.968686249196729e-05, "loss": 0.2219, "step": 10832 }, { "epoch": 1.2845962290999644, "grad_norm": 0.934956973006679, "learning_rate": 3.9684919949599234e-05, "loss": 0.1682, "step": 10833 }, { "epoch": 1.2847148108620894, "grad_norm": 1.216757134148545, "learning_rate": 3.968297727185423e-05, "loss": 0.2496, "step": 10834 }, { "epoch": 1.2848333926242144, "grad_norm": 1.6238736171895398, "learning_rate": 3.968103445875018e-05, "loss": 0.3954, "step": 10835 }, { "epoch": 1.2849519743863393, "grad_norm": 1.0826496181446703, "learning_rate": 3.967909151030499e-05, "loss": 0.2013, "step": 10836 }, { "epoch": 1.2850705561484643, "grad_norm": 1.1371912530561223, "learning_rate": 3.9677148426536584e-05, "loss": 0.2466, "step": 10837 }, { "epoch": 1.2851891379105893, "grad_norm": 1.4511816622349496, "learning_rate": 3.9675205207462854e-05, "loss": 0.2694, "step": 10838 }, { "epoch": 1.2853077196727143, "grad_norm": 1.131842578408116, "learning_rate": 3.967326185310174e-05, "loss": 0.2596, "step": 10839 }, { "epoch": 1.2854263014348393, "grad_norm": 1.2886355944190715, "learning_rate": 3.967131836347114e-05, "loss": 0.2769, "step": 10840 }, { "epoch": 1.2855448831969642, "grad_norm": 1.112183255024301, "learning_rate": 3.9669374738588985e-05, "loss": 0.2234, "step": 10841 }, { "epoch": 1.2856634649590892, "grad_norm": 0.8106646531315126, "learning_rate": 3.966743097847317e-05, "loss": 0.234, "step": 10842 }, { "epoch": 1.2857820467212142, "grad_norm": 1.488070038886557, "learning_rate": 3.9665487083141636e-05, "loss": 0.2762, "step": 10843 }, { "epoch": 1.2859006284833392, "grad_norm": 1.046731742393729, "learning_rate": 3.966354305261229e-05, "loss": 0.213, "step": 10844 }, { "epoch": 1.2860192102454642, "grad_norm": 1.159844054641228, "learning_rate": 3.966159888690306e-05, "loss": 0.2194, "step": 10845 }, { "epoch": 1.2861377920075892, "grad_norm": 1.2967631573296186, "learning_rate": 3.965965458603188e-05, "loss": 0.3036, "step": 10846 }, { "epoch": 1.2862563737697141, "grad_norm": 0.9062555437708207, "learning_rate": 3.965771015001665e-05, "loss": 0.1988, "step": 10847 }, { "epoch": 1.2863749555318393, "grad_norm": 0.9766186773423624, "learning_rate": 3.9655765578875305e-05, "loss": 0.1658, "step": 10848 }, { "epoch": 1.286493537293964, "grad_norm": 1.258270882776296, "learning_rate": 3.965382087262578e-05, "loss": 0.3109, "step": 10849 }, { "epoch": 1.2866121190560893, "grad_norm": 1.2383936069880728, "learning_rate": 3.965187603128598e-05, "loss": 0.263, "step": 10850 }, { "epoch": 1.286730700818214, "grad_norm": 1.0029879068069858, "learning_rate": 3.964993105487387e-05, "loss": 0.1763, "step": 10851 }, { "epoch": 1.2868492825803393, "grad_norm": 0.776745860103505, "learning_rate": 3.964798594340735e-05, "loss": 0.145, "step": 10852 }, { "epoch": 1.286967864342464, "grad_norm": 1.6526214290791585, "learning_rate": 3.964604069690437e-05, "loss": 0.296, "step": 10853 }, { "epoch": 1.2870864461045892, "grad_norm": 1.2472859369583584, "learning_rate": 3.9644095315382853e-05, "loss": 0.2215, "step": 10854 }, { "epoch": 1.287205027866714, "grad_norm": 1.0424786910271304, "learning_rate": 3.9642149798860725e-05, "loss": 0.2474, "step": 10855 }, { "epoch": 1.2873236096288392, "grad_norm": 1.029839603055906, "learning_rate": 3.964020414735594e-05, "loss": 0.2264, "step": 10856 }, { "epoch": 1.287442191390964, "grad_norm": 1.1329540560047393, "learning_rate": 3.9638258360886425e-05, "loss": 0.18, "step": 10857 }, { "epoch": 1.2875607731530891, "grad_norm": 0.9460007286515121, "learning_rate": 3.963631243947013e-05, "loss": 0.1864, "step": 10858 }, { "epoch": 1.287679354915214, "grad_norm": 1.0052041708525885, "learning_rate": 3.963436638312496e-05, "loss": 0.2188, "step": 10859 }, { "epoch": 1.287797936677339, "grad_norm": 0.8110774137365021, "learning_rate": 3.963242019186889e-05, "loss": 0.1522, "step": 10860 }, { "epoch": 1.287916518439464, "grad_norm": 0.9982825850996764, "learning_rate": 3.963047386571985e-05, "loss": 0.2381, "step": 10861 }, { "epoch": 1.288035100201589, "grad_norm": 1.0251395933398348, "learning_rate": 3.962852740469578e-05, "loss": 0.2118, "step": 10862 }, { "epoch": 1.288153681963714, "grad_norm": 1.0194117620348124, "learning_rate": 3.962658080881462e-05, "loss": 0.1888, "step": 10863 }, { "epoch": 1.288272263725839, "grad_norm": 1.0366235112332165, "learning_rate": 3.962463407809434e-05, "loss": 0.2524, "step": 10864 }, { "epoch": 1.288390845487964, "grad_norm": 1.0293300616643928, "learning_rate": 3.962268721255284e-05, "loss": 0.2, "step": 10865 }, { "epoch": 1.288509427250089, "grad_norm": 1.4032066970026995, "learning_rate": 3.962074021220812e-05, "loss": 0.2279, "step": 10866 }, { "epoch": 1.288628009012214, "grad_norm": 1.0953174453187209, "learning_rate": 3.961879307707809e-05, "loss": 0.201, "step": 10867 }, { "epoch": 1.288746590774339, "grad_norm": 1.250480613723757, "learning_rate": 3.961684580718072e-05, "loss": 0.2494, "step": 10868 }, { "epoch": 1.288865172536464, "grad_norm": 1.0564991976863756, "learning_rate": 3.961489840253396e-05, "loss": 0.1897, "step": 10869 }, { "epoch": 1.288983754298589, "grad_norm": 1.488951499483191, "learning_rate": 3.961295086315575e-05, "loss": 0.3025, "step": 10870 }, { "epoch": 1.289102336060714, "grad_norm": 1.1348335103772185, "learning_rate": 3.9611003189064055e-05, "loss": 0.2997, "step": 10871 }, { "epoch": 1.2892209178228389, "grad_norm": 1.0958328606780834, "learning_rate": 3.960905538027683e-05, "loss": 0.29, "step": 10872 }, { "epoch": 1.2893394995849639, "grad_norm": 1.5853389934726192, "learning_rate": 3.960710743681203e-05, "loss": 0.3408, "step": 10873 }, { "epoch": 1.2894580813470888, "grad_norm": 1.3553147386580882, "learning_rate": 3.960515935868761e-05, "loss": 0.3124, "step": 10874 }, { "epoch": 1.2895766631092138, "grad_norm": 1.0956780669018842, "learning_rate": 3.9603211145921534e-05, "loss": 0.262, "step": 10875 }, { "epoch": 1.2896952448713388, "grad_norm": 1.2314281805100147, "learning_rate": 3.960126279853175e-05, "loss": 0.2984, "step": 10876 }, { "epoch": 1.2898138266334638, "grad_norm": 1.262612356495236, "learning_rate": 3.959931431653624e-05, "loss": 0.2522, "step": 10877 }, { "epoch": 1.2899324083955888, "grad_norm": 1.0905198432499834, "learning_rate": 3.9597365699952946e-05, "loss": 0.1944, "step": 10878 }, { "epoch": 1.2900509901577137, "grad_norm": 1.0671757046165362, "learning_rate": 3.9595416948799844e-05, "loss": 0.2373, "step": 10879 }, { "epoch": 1.2901695719198387, "grad_norm": 1.3834351524923154, "learning_rate": 3.9593468063094893e-05, "loss": 0.278, "step": 10880 }, { "epoch": 1.2902881536819637, "grad_norm": 0.8893796294768886, "learning_rate": 3.959151904285606e-05, "loss": 0.1666, "step": 10881 }, { "epoch": 1.2904067354440887, "grad_norm": 1.4486572060457568, "learning_rate": 3.9589569888101326e-05, "loss": 0.3251, "step": 10882 }, { "epoch": 1.2905253172062137, "grad_norm": 1.292692264632047, "learning_rate": 3.958762059884864e-05, "loss": 0.3335, "step": 10883 }, { "epoch": 1.2906438989683386, "grad_norm": 1.017353759313209, "learning_rate": 3.958567117511599e-05, "loss": 0.2442, "step": 10884 }, { "epoch": 1.2907624807304636, "grad_norm": 0.8731184855866532, "learning_rate": 3.958372161692132e-05, "loss": 0.2272, "step": 10885 }, { "epoch": 1.2908810624925886, "grad_norm": 1.2201772270588465, "learning_rate": 3.958177192428264e-05, "loss": 0.3452, "step": 10886 }, { "epoch": 1.2909996442547136, "grad_norm": 1.6345679025474167, "learning_rate": 3.957982209721789e-05, "loss": 0.2544, "step": 10887 }, { "epoch": 1.2911182260168386, "grad_norm": 1.3845043580271446, "learning_rate": 3.957787213574506e-05, "loss": 0.2538, "step": 10888 }, { "epoch": 1.2912368077789635, "grad_norm": 1.0251734897505893, "learning_rate": 3.9575922039882135e-05, "loss": 0.2239, "step": 10889 }, { "epoch": 1.2913553895410885, "grad_norm": 1.3402822784085564, "learning_rate": 3.957397180964708e-05, "loss": 0.2598, "step": 10890 }, { "epoch": 1.2914739713032135, "grad_norm": 1.2416829162990342, "learning_rate": 3.9572021445057874e-05, "loss": 0.257, "step": 10891 }, { "epoch": 1.2915925530653385, "grad_norm": 0.998296227017748, "learning_rate": 3.9570070946132496e-05, "loss": 0.1974, "step": 10892 }, { "epoch": 1.2917111348274635, "grad_norm": 0.9488605144187368, "learning_rate": 3.9568120312888935e-05, "loss": 0.213, "step": 10893 }, { "epoch": 1.2918297165895885, "grad_norm": 1.456984093395168, "learning_rate": 3.956616954534517e-05, "loss": 0.2892, "step": 10894 }, { "epoch": 1.2919482983517134, "grad_norm": 1.4549135026537598, "learning_rate": 3.956421864351919e-05, "loss": 0.3438, "step": 10895 }, { "epoch": 1.2920668801138384, "grad_norm": 1.3033457191151183, "learning_rate": 3.956226760742896e-05, "loss": 0.2935, "step": 10896 }, { "epoch": 1.2921854618759636, "grad_norm": 1.3611386550927653, "learning_rate": 3.956031643709249e-05, "loss": 0.333, "step": 10897 }, { "epoch": 1.2923040436380884, "grad_norm": 0.8482019499211835, "learning_rate": 3.955836513252775e-05, "loss": 0.1906, "step": 10898 }, { "epoch": 1.2924226254002136, "grad_norm": 0.7592516879020468, "learning_rate": 3.955641369375275e-05, "loss": 0.1465, "step": 10899 }, { "epoch": 1.2925412071623383, "grad_norm": 0.9114152660113787, "learning_rate": 3.955446212078545e-05, "loss": 0.2112, "step": 10900 }, { "epoch": 1.2926597889244635, "grad_norm": 1.0176965527249355, "learning_rate": 3.9552510413643876e-05, "loss": 0.2444, "step": 10901 }, { "epoch": 1.2927783706865883, "grad_norm": 1.3238726854403902, "learning_rate": 3.955055857234599e-05, "loss": 0.2632, "step": 10902 }, { "epoch": 1.2928969524487135, "grad_norm": 1.2086654054290191, "learning_rate": 3.954860659690979e-05, "loss": 0.2145, "step": 10903 }, { "epoch": 1.2930155342108383, "grad_norm": 1.166604912384743, "learning_rate": 3.954665448735329e-05, "loss": 0.2532, "step": 10904 }, { "epoch": 1.2931341159729635, "grad_norm": 1.0259644434566155, "learning_rate": 3.954470224369446e-05, "loss": 0.2614, "step": 10905 }, { "epoch": 1.2932526977350882, "grad_norm": 1.421475489821214, "learning_rate": 3.9542749865951324e-05, "loss": 0.325, "step": 10906 }, { "epoch": 1.2933712794972134, "grad_norm": 0.6892659324852397, "learning_rate": 3.9540797354141864e-05, "loss": 0.1574, "step": 10907 }, { "epoch": 1.2934898612593382, "grad_norm": 1.1943262884891912, "learning_rate": 3.953884470828409e-05, "loss": 0.2958, "step": 10908 }, { "epoch": 1.2936084430214634, "grad_norm": 1.0059936374831007, "learning_rate": 3.953689192839598e-05, "loss": 0.2033, "step": 10909 }, { "epoch": 1.2937270247835884, "grad_norm": 1.3049417394248664, "learning_rate": 3.9534939014495566e-05, "loss": 0.2484, "step": 10910 }, { "epoch": 1.2938456065457133, "grad_norm": 1.2831307623216306, "learning_rate": 3.953298596660083e-05, "loss": 0.323, "step": 10911 }, { "epoch": 1.2939641883078383, "grad_norm": 1.1153406161859811, "learning_rate": 3.953103278472979e-05, "loss": 0.3101, "step": 10912 }, { "epoch": 1.2940827700699633, "grad_norm": 1.1685298747146309, "learning_rate": 3.952907946890044e-05, "loss": 0.2439, "step": 10913 }, { "epoch": 1.2942013518320883, "grad_norm": 1.1133327678044826, "learning_rate": 3.95271260191308e-05, "loss": 0.2383, "step": 10914 }, { "epoch": 1.2943199335942133, "grad_norm": 1.1485858526637018, "learning_rate": 3.952517243543888e-05, "loss": 0.2285, "step": 10915 }, { "epoch": 1.2944385153563382, "grad_norm": 1.0614519519421255, "learning_rate": 3.952321871784267e-05, "loss": 0.2451, "step": 10916 }, { "epoch": 1.2945570971184632, "grad_norm": 1.3389531246040944, "learning_rate": 3.9521264866360205e-05, "loss": 0.2777, "step": 10917 }, { "epoch": 1.2946756788805882, "grad_norm": 0.9536185829351614, "learning_rate": 3.951931088100947e-05, "loss": 0.1754, "step": 10918 }, { "epoch": 1.2947942606427132, "grad_norm": 1.442286288284422, "learning_rate": 3.9517356761808505e-05, "loss": 0.2943, "step": 10919 }, { "epoch": 1.2949128424048382, "grad_norm": 1.0108269207595781, "learning_rate": 3.951540250877531e-05, "loss": 0.2064, "step": 10920 }, { "epoch": 1.2950314241669632, "grad_norm": 1.1874412648391464, "learning_rate": 3.9513448121927904e-05, "loss": 0.2504, "step": 10921 }, { "epoch": 1.2951500059290881, "grad_norm": 1.8577874653148276, "learning_rate": 3.95114936012843e-05, "loss": 0.3956, "step": 10922 }, { "epoch": 1.2952685876912131, "grad_norm": 1.2745280955023635, "learning_rate": 3.9509538946862525e-05, "loss": 0.1945, "step": 10923 }, { "epoch": 1.295387169453338, "grad_norm": 0.9017677114698852, "learning_rate": 3.950758415868059e-05, "loss": 0.2091, "step": 10924 }, { "epoch": 1.295505751215463, "grad_norm": 1.36057581619686, "learning_rate": 3.9505629236756526e-05, "loss": 0.2721, "step": 10925 }, { "epoch": 1.295624332977588, "grad_norm": 1.021380764669379, "learning_rate": 3.9503674181108344e-05, "loss": 0.2127, "step": 10926 }, { "epoch": 1.295742914739713, "grad_norm": 1.3653272173770985, "learning_rate": 3.950171899175407e-05, "loss": 0.258, "step": 10927 }, { "epoch": 1.295861496501838, "grad_norm": 1.103043843459674, "learning_rate": 3.9499763668711725e-05, "loss": 0.2232, "step": 10928 }, { "epoch": 1.295980078263963, "grad_norm": 1.3460112082826168, "learning_rate": 3.949780821199935e-05, "loss": 0.2699, "step": 10929 }, { "epoch": 1.296098660026088, "grad_norm": 0.8451803341454924, "learning_rate": 3.949585262163496e-05, "loss": 0.2076, "step": 10930 }, { "epoch": 1.296217241788213, "grad_norm": 1.5258215906603612, "learning_rate": 3.9493896897636587e-05, "loss": 0.3435, "step": 10931 }, { "epoch": 1.296335823550338, "grad_norm": 1.0594440382660262, "learning_rate": 3.9491941040022245e-05, "loss": 0.2177, "step": 10932 }, { "epoch": 1.296454405312463, "grad_norm": 1.347716419928417, "learning_rate": 3.9489985048809984e-05, "loss": 0.2527, "step": 10933 }, { "epoch": 1.296572987074588, "grad_norm": 1.039553294688813, "learning_rate": 3.9488028924017836e-05, "loss": 0.1952, "step": 10934 }, { "epoch": 1.2966915688367129, "grad_norm": 1.4532963276403335, "learning_rate": 3.9486072665663825e-05, "loss": 0.2859, "step": 10935 }, { "epoch": 1.2968101505988379, "grad_norm": 1.4832038361414788, "learning_rate": 3.948411627376599e-05, "loss": 0.3221, "step": 10936 }, { "epoch": 1.2969287323609628, "grad_norm": 1.0660062250236255, "learning_rate": 3.9482159748342354e-05, "loss": 0.2303, "step": 10937 }, { "epoch": 1.2970473141230878, "grad_norm": 1.0076370736756224, "learning_rate": 3.948020308941097e-05, "loss": 0.2754, "step": 10938 }, { "epoch": 1.2971658958852128, "grad_norm": 0.967483758669861, "learning_rate": 3.9478246296989873e-05, "loss": 0.2274, "step": 10939 }, { "epoch": 1.2972844776473378, "grad_norm": 1.0028042922740734, "learning_rate": 3.9476289371097105e-05, "loss": 0.2468, "step": 10940 }, { "epoch": 1.2974030594094628, "grad_norm": 0.9473837525029029, "learning_rate": 3.9474332311750696e-05, "loss": 0.1803, "step": 10941 }, { "epoch": 1.2975216411715877, "grad_norm": 1.1306772008251902, "learning_rate": 3.947237511896869e-05, "loss": 0.2152, "step": 10942 }, { "epoch": 1.2976402229337127, "grad_norm": 0.9383691048947969, "learning_rate": 3.947041779276913e-05, "loss": 0.21, "step": 10943 }, { "epoch": 1.2977588046958377, "grad_norm": 0.9319631144337592, "learning_rate": 3.946846033317006e-05, "loss": 0.1636, "step": 10944 }, { "epoch": 1.2978773864579627, "grad_norm": 1.1611219824955554, "learning_rate": 3.9466502740189544e-05, "loss": 0.2503, "step": 10945 }, { "epoch": 1.2979959682200877, "grad_norm": 1.1984024787537124, "learning_rate": 3.94645450138456e-05, "loss": 0.234, "step": 10946 }, { "epoch": 1.2981145499822127, "grad_norm": 0.9987341047902589, "learning_rate": 3.946258715415629e-05, "loss": 0.1925, "step": 10947 }, { "epoch": 1.2982331317443379, "grad_norm": 1.4300347102787359, "learning_rate": 3.9460629161139676e-05, "loss": 0.2923, "step": 10948 }, { "epoch": 1.2983517135064626, "grad_norm": 0.9738341906524622, "learning_rate": 3.9458671034813775e-05, "loss": 0.243, "step": 10949 }, { "epoch": 1.2984702952685878, "grad_norm": 1.1434331591476317, "learning_rate": 3.945671277519667e-05, "loss": 0.2379, "step": 10950 }, { "epoch": 1.2985888770307126, "grad_norm": 1.2803592011531808, "learning_rate": 3.945475438230639e-05, "loss": 0.2531, "step": 10951 }, { "epoch": 1.2987074587928378, "grad_norm": 0.8684743616037215, "learning_rate": 3.9452795856161004e-05, "loss": 0.1877, "step": 10952 }, { "epoch": 1.2988260405549625, "grad_norm": 1.1635908093456846, "learning_rate": 3.945083719677857e-05, "loss": 0.2258, "step": 10953 }, { "epoch": 1.2989446223170877, "grad_norm": 1.6479160793408856, "learning_rate": 3.944887840417713e-05, "loss": 0.3551, "step": 10954 }, { "epoch": 1.2990632040792125, "grad_norm": 1.1475337948813804, "learning_rate": 3.944691947837475e-05, "loss": 0.2441, "step": 10955 }, { "epoch": 1.2991817858413377, "grad_norm": 1.161098695672196, "learning_rate": 3.944496041938949e-05, "loss": 0.2649, "step": 10956 }, { "epoch": 1.2993003676034625, "grad_norm": 1.1909902634831517, "learning_rate": 3.944300122723941e-05, "loss": 0.2574, "step": 10957 }, { "epoch": 1.2994189493655877, "grad_norm": 1.1562883122580063, "learning_rate": 3.9441041901942566e-05, "loss": 0.256, "step": 10958 }, { "epoch": 1.2995375311277124, "grad_norm": 0.8194073254128489, "learning_rate": 3.9439082443517026e-05, "loss": 0.1515, "step": 10959 }, { "epoch": 1.2996561128898376, "grad_norm": 0.9454543556052423, "learning_rate": 3.9437122851980855e-05, "loss": 0.2575, "step": 10960 }, { "epoch": 1.2997746946519626, "grad_norm": 0.8832766822840542, "learning_rate": 3.943516312735211e-05, "loss": 0.2172, "step": 10961 }, { "epoch": 1.2998932764140876, "grad_norm": 1.2171855192479484, "learning_rate": 3.9433203269648875e-05, "loss": 0.2177, "step": 10962 }, { "epoch": 1.3000118581762126, "grad_norm": 1.1632174660850447, "learning_rate": 3.9431243278889197e-05, "loss": 0.3033, "step": 10963 }, { "epoch": 1.3001304399383375, "grad_norm": 1.2348335482179527, "learning_rate": 3.942928315509115e-05, "loss": 0.2972, "step": 10964 }, { "epoch": 1.3002490217004625, "grad_norm": 1.1811467688845099, "learning_rate": 3.942732289827281e-05, "loss": 0.2101, "step": 10965 }, { "epoch": 1.3003676034625875, "grad_norm": 1.0741466451234243, "learning_rate": 3.942536250845225e-05, "loss": 0.2256, "step": 10966 }, { "epoch": 1.3004861852247125, "grad_norm": 1.452026231644293, "learning_rate": 3.9423401985647526e-05, "loss": 0.3787, "step": 10967 }, { "epoch": 1.3006047669868375, "grad_norm": 0.9900339027586272, "learning_rate": 3.942144132987673e-05, "loss": 0.2116, "step": 10968 }, { "epoch": 1.3007233487489624, "grad_norm": 1.5798822304224407, "learning_rate": 3.941948054115792e-05, "loss": 0.3551, "step": 10969 }, { "epoch": 1.3008419305110874, "grad_norm": 1.0506342922115095, "learning_rate": 3.941751961950919e-05, "loss": 0.2405, "step": 10970 }, { "epoch": 1.3009605122732124, "grad_norm": 1.052018683018909, "learning_rate": 3.941555856494861e-05, "loss": 0.2778, "step": 10971 }, { "epoch": 1.3010790940353374, "grad_norm": 1.1147736653837486, "learning_rate": 3.941359737749426e-05, "loss": 0.2802, "step": 10972 }, { "epoch": 1.3011976757974624, "grad_norm": 1.083706638428422, "learning_rate": 3.9411636057164206e-05, "loss": 0.2301, "step": 10973 }, { "epoch": 1.3013162575595874, "grad_norm": 1.0532423278475032, "learning_rate": 3.940967460397656e-05, "loss": 0.174, "step": 10974 }, { "epoch": 1.3014348393217123, "grad_norm": 0.987517799962196, "learning_rate": 3.940771301794937e-05, "loss": 0.2389, "step": 10975 }, { "epoch": 1.3015534210838373, "grad_norm": 1.1261967856232036, "learning_rate": 3.940575129910073e-05, "loss": 0.2794, "step": 10976 }, { "epoch": 1.3016720028459623, "grad_norm": 0.8671480576021543, "learning_rate": 3.940378944744873e-05, "loss": 0.1769, "step": 10977 }, { "epoch": 1.3017905846080873, "grad_norm": 0.9708060629952727, "learning_rate": 3.9401827463011465e-05, "loss": 0.2216, "step": 10978 }, { "epoch": 1.3019091663702123, "grad_norm": 1.009598848587565, "learning_rate": 3.9399865345807e-05, "loss": 0.2333, "step": 10979 }, { "epoch": 1.3020277481323372, "grad_norm": 0.9939228181681855, "learning_rate": 3.939790309585344e-05, "loss": 0.232, "step": 10980 }, { "epoch": 1.3021463298944622, "grad_norm": 0.9821716127518101, "learning_rate": 3.939594071316887e-05, "loss": 0.2662, "step": 10981 }, { "epoch": 1.3022649116565872, "grad_norm": 0.9092369229597885, "learning_rate": 3.939397819777138e-05, "loss": 0.1931, "step": 10982 }, { "epoch": 1.3023834934187122, "grad_norm": 0.9401910332321576, "learning_rate": 3.939201554967906e-05, "loss": 0.1702, "step": 10983 }, { "epoch": 1.3025020751808372, "grad_norm": 1.1173591216225927, "learning_rate": 3.939005276891001e-05, "loss": 0.2367, "step": 10984 }, { "epoch": 1.3026206569429621, "grad_norm": 0.9796743162363702, "learning_rate": 3.938808985548231e-05, "loss": 0.2115, "step": 10985 }, { "epoch": 1.3027392387050871, "grad_norm": 1.0020710483908057, "learning_rate": 3.9386126809414076e-05, "loss": 0.1485, "step": 10986 }, { "epoch": 1.302857820467212, "grad_norm": 1.7965607422536327, "learning_rate": 3.938416363072339e-05, "loss": 0.3728, "step": 10987 }, { "epoch": 1.302976402229337, "grad_norm": 1.543581253018415, "learning_rate": 3.938220031942835e-05, "loss": 0.3855, "step": 10988 }, { "epoch": 1.303094983991462, "grad_norm": 0.9895761832005385, "learning_rate": 3.938023687554707e-05, "loss": 0.2016, "step": 10989 }, { "epoch": 1.303213565753587, "grad_norm": 1.5064454957267597, "learning_rate": 3.9378273299097635e-05, "loss": 0.3449, "step": 10990 }, { "epoch": 1.303332147515712, "grad_norm": 0.9317585629842473, "learning_rate": 3.937630959009815e-05, "loss": 0.1831, "step": 10991 }, { "epoch": 1.303450729277837, "grad_norm": 1.200933103085418, "learning_rate": 3.937434574856672e-05, "loss": 0.2867, "step": 10992 }, { "epoch": 1.303569311039962, "grad_norm": 1.0706253583238232, "learning_rate": 3.937238177452145e-05, "loss": 0.2506, "step": 10993 }, { "epoch": 1.303687892802087, "grad_norm": 1.0648096672750937, "learning_rate": 3.9370417667980446e-05, "loss": 0.2155, "step": 10994 }, { "epoch": 1.303806474564212, "grad_norm": 1.1169240209925375, "learning_rate": 3.936845342896181e-05, "loss": 0.2264, "step": 10995 }, { "epoch": 1.303925056326337, "grad_norm": 1.1864142136562092, "learning_rate": 3.936648905748366e-05, "loss": 0.2526, "step": 10996 }, { "epoch": 1.3040436380884621, "grad_norm": 1.0362168935782063, "learning_rate": 3.936452455356409e-05, "loss": 0.2581, "step": 10997 }, { "epoch": 1.304162219850587, "grad_norm": 1.049977026796436, "learning_rate": 3.936255991722122e-05, "loss": 0.2088, "step": 10998 }, { "epoch": 1.304280801612712, "grad_norm": 0.931230089782395, "learning_rate": 3.936059514847317e-05, "loss": 0.181, "step": 10999 }, { "epoch": 1.3043993833748369, "grad_norm": 0.9931849029603441, "learning_rate": 3.9358630247338034e-05, "loss": 0.1968, "step": 11000 }, { "epoch": 1.304517965136962, "grad_norm": 1.0262459760439553, "learning_rate": 3.9356665213833935e-05, "loss": 0.2265, "step": 11001 }, { "epoch": 1.3046365468990868, "grad_norm": 1.1955885270566438, "learning_rate": 3.9354700047978986e-05, "loss": 0.2763, "step": 11002 }, { "epoch": 1.304755128661212, "grad_norm": 1.6458841590910434, "learning_rate": 3.935273474979131e-05, "loss": 0.2739, "step": 11003 }, { "epoch": 1.3048737104233368, "grad_norm": 1.1783239350545998, "learning_rate": 3.935076931928902e-05, "loss": 0.2534, "step": 11004 }, { "epoch": 1.304992292185462, "grad_norm": 1.8678454303605045, "learning_rate": 3.9348803756490234e-05, "loss": 0.5127, "step": 11005 }, { "epoch": 1.3051108739475867, "grad_norm": 1.2591141646030697, "learning_rate": 3.934683806141307e-05, "loss": 0.2674, "step": 11006 }, { "epoch": 1.305229455709712, "grad_norm": 1.4611253160457696, "learning_rate": 3.934487223407566e-05, "loss": 0.3019, "step": 11007 }, { "epoch": 1.3053480374718367, "grad_norm": 0.902960384666571, "learning_rate": 3.934290627449611e-05, "loss": 0.1996, "step": 11008 }, { "epoch": 1.305466619233962, "grad_norm": 1.0365335102156557, "learning_rate": 3.9340940182692556e-05, "loss": 0.2329, "step": 11009 }, { "epoch": 1.3055852009960869, "grad_norm": 1.1667042248386066, "learning_rate": 3.9338973958683115e-05, "loss": 0.2371, "step": 11010 }, { "epoch": 1.3057037827582119, "grad_norm": 1.1851575265309677, "learning_rate": 3.933700760248593e-05, "loss": 0.2399, "step": 11011 }, { "epoch": 1.3058223645203368, "grad_norm": 1.03041721271189, "learning_rate": 3.93350411141191e-05, "loss": 0.2241, "step": 11012 }, { "epoch": 1.3059409462824618, "grad_norm": 1.3439700183422978, "learning_rate": 3.9333074493600786e-05, "loss": 0.2866, "step": 11013 }, { "epoch": 1.3060595280445868, "grad_norm": 1.0741181542259692, "learning_rate": 3.933110774094909e-05, "loss": 0.2257, "step": 11014 }, { "epoch": 1.3061781098067118, "grad_norm": 1.2022991159271619, "learning_rate": 3.932914085618217e-05, "loss": 0.2792, "step": 11015 }, { "epoch": 1.3062966915688368, "grad_norm": 1.1630013474836398, "learning_rate": 3.932717383931812e-05, "loss": 0.2461, "step": 11016 }, { "epoch": 1.3064152733309617, "grad_norm": 0.9713579937469023, "learning_rate": 3.932520669037511e-05, "loss": 0.22, "step": 11017 }, { "epoch": 1.3065338550930867, "grad_norm": 1.5403182852809536, "learning_rate": 3.932323940937126e-05, "loss": 0.3859, "step": 11018 }, { "epoch": 1.3066524368552117, "grad_norm": 0.7076946892835153, "learning_rate": 3.9321271996324706e-05, "loss": 0.1386, "step": 11019 }, { "epoch": 1.3067710186173367, "grad_norm": 1.4937714930209656, "learning_rate": 3.931930445125358e-05, "loss": 0.2549, "step": 11020 }, { "epoch": 1.3068896003794617, "grad_norm": 1.03606494176738, "learning_rate": 3.931733677417604e-05, "loss": 0.2184, "step": 11021 }, { "epoch": 1.3070081821415866, "grad_norm": 1.2388664837649044, "learning_rate": 3.93153689651102e-05, "loss": 0.209, "step": 11022 }, { "epoch": 1.3071267639037116, "grad_norm": 0.9619298248351297, "learning_rate": 3.931340102407421e-05, "loss": 0.1912, "step": 11023 }, { "epoch": 1.3072453456658366, "grad_norm": 1.1925713485668128, "learning_rate": 3.9311432951086234e-05, "loss": 0.2375, "step": 11024 }, { "epoch": 1.3073639274279616, "grad_norm": 1.6650479425514484, "learning_rate": 3.930946474616438e-05, "loss": 0.3977, "step": 11025 }, { "epoch": 1.3074825091900866, "grad_norm": 1.168668308668476, "learning_rate": 3.9307496409326813e-05, "loss": 0.2205, "step": 11026 }, { "epoch": 1.3076010909522116, "grad_norm": 1.0875723042302883, "learning_rate": 3.930552794059167e-05, "loss": 0.2358, "step": 11027 }, { "epoch": 1.3077196727143365, "grad_norm": 0.9141667471590318, "learning_rate": 3.930355933997711e-05, "loss": 0.2215, "step": 11028 }, { "epoch": 1.3078382544764615, "grad_norm": 0.9600842082955442, "learning_rate": 3.930159060750127e-05, "loss": 0.1985, "step": 11029 }, { "epoch": 1.3079568362385865, "grad_norm": 1.1549740524700642, "learning_rate": 3.92996217431823e-05, "loss": 0.2365, "step": 11030 }, { "epoch": 1.3080754180007115, "grad_norm": 0.9633976416225218, "learning_rate": 3.929765274703835e-05, "loss": 0.2157, "step": 11031 }, { "epoch": 1.3081939997628365, "grad_norm": 0.9757738777353772, "learning_rate": 3.929568361908759e-05, "loss": 0.1594, "step": 11032 }, { "epoch": 1.3083125815249614, "grad_norm": 1.2527918634111816, "learning_rate": 3.9293714359348135e-05, "loss": 0.2412, "step": 11033 }, { "epoch": 1.3084311632870864, "grad_norm": 1.3882991721045297, "learning_rate": 3.929174496783818e-05, "loss": 0.306, "step": 11034 }, { "epoch": 1.3085497450492114, "grad_norm": 0.8849318706492632, "learning_rate": 3.928977544457585e-05, "loss": 0.2242, "step": 11035 }, { "epoch": 1.3086683268113364, "grad_norm": 0.9885320485109715, "learning_rate": 3.9287805789579326e-05, "loss": 0.2017, "step": 11036 }, { "epoch": 1.3087869085734614, "grad_norm": 0.863515234431776, "learning_rate": 3.928583600286674e-05, "loss": 0.2075, "step": 11037 }, { "epoch": 1.3089054903355863, "grad_norm": 1.7540145162464784, "learning_rate": 3.9283866084456265e-05, "loss": 0.3283, "step": 11038 }, { "epoch": 1.3090240720977113, "grad_norm": 1.500305891615403, "learning_rate": 3.9281896034366075e-05, "loss": 0.3512, "step": 11039 }, { "epoch": 1.3091426538598363, "grad_norm": 0.8754008521091413, "learning_rate": 3.927992585261431e-05, "loss": 0.1753, "step": 11040 }, { "epoch": 1.3092612356219613, "grad_norm": 1.0513044993372331, "learning_rate": 3.927795553921914e-05, "loss": 0.2164, "step": 11041 }, { "epoch": 1.3093798173840863, "grad_norm": 1.266350547388877, "learning_rate": 3.927598509419872e-05, "loss": 0.2283, "step": 11042 }, { "epoch": 1.3094983991462112, "grad_norm": 1.168973706337675, "learning_rate": 3.927401451757123e-05, "loss": 0.2615, "step": 11043 }, { "epoch": 1.3096169809083362, "grad_norm": 1.603386605597296, "learning_rate": 3.927204380935483e-05, "loss": 0.3235, "step": 11044 }, { "epoch": 1.3097355626704612, "grad_norm": 1.1697360939469852, "learning_rate": 3.927007296956769e-05, "loss": 0.2066, "step": 11045 }, { "epoch": 1.3098541444325862, "grad_norm": 0.8139674193183849, "learning_rate": 3.9268101998227976e-05, "loss": 0.2113, "step": 11046 }, { "epoch": 1.3099727261947112, "grad_norm": 0.9381711979380831, "learning_rate": 3.926613089535386e-05, "loss": 0.2164, "step": 11047 }, { "epoch": 1.3100913079568364, "grad_norm": 1.110368195045124, "learning_rate": 3.926415966096351e-05, "loss": 0.254, "step": 11048 }, { "epoch": 1.3102098897189611, "grad_norm": 1.4317934763089903, "learning_rate": 3.92621882950751e-05, "loss": 0.3357, "step": 11049 }, { "epoch": 1.3103284714810863, "grad_norm": 1.3330238817727107, "learning_rate": 3.9260216797706794e-05, "loss": 0.2271, "step": 11050 }, { "epoch": 1.310447053243211, "grad_norm": 1.8283583099543241, "learning_rate": 3.925824516887679e-05, "loss": 0.1875, "step": 11051 }, { "epoch": 1.3105656350053363, "grad_norm": 1.112126808617559, "learning_rate": 3.925627340860324e-05, "loss": 0.2711, "step": 11052 }, { "epoch": 1.310684216767461, "grad_norm": 1.0631748504201781, "learning_rate": 3.9254301516904333e-05, "loss": 0.2657, "step": 11053 }, { "epoch": 1.3108027985295863, "grad_norm": 1.1373032644686027, "learning_rate": 3.925232949379825e-05, "loss": 0.2242, "step": 11054 }, { "epoch": 1.310921380291711, "grad_norm": 1.0891894437720193, "learning_rate": 3.9250357339303166e-05, "loss": 0.2861, "step": 11055 }, { "epoch": 1.3110399620538362, "grad_norm": 1.177565738783588, "learning_rate": 3.9248385053437256e-05, "loss": 0.3025, "step": 11056 }, { "epoch": 1.311158543815961, "grad_norm": 1.021076895052079, "learning_rate": 3.9246412636218715e-05, "loss": 0.2359, "step": 11057 }, { "epoch": 1.3112771255780862, "grad_norm": 0.7903176657034932, "learning_rate": 3.9244440087665724e-05, "loss": 0.2004, "step": 11058 }, { "epoch": 1.3113957073402112, "grad_norm": 1.0865025883221286, "learning_rate": 3.924246740779645e-05, "loss": 0.2561, "step": 11059 }, { "epoch": 1.3115142891023361, "grad_norm": 1.363566619894067, "learning_rate": 3.9240494596629105e-05, "loss": 0.3204, "step": 11060 }, { "epoch": 1.3116328708644611, "grad_norm": 1.599123338318329, "learning_rate": 3.923852165418185e-05, "loss": 0.2995, "step": 11061 }, { "epoch": 1.311751452626586, "grad_norm": 1.0673267631199486, "learning_rate": 3.92365485804729e-05, "loss": 0.2122, "step": 11062 }, { "epoch": 1.311870034388711, "grad_norm": 0.8686340651229295, "learning_rate": 3.923457537552042e-05, "loss": 0.2269, "step": 11063 }, { "epoch": 1.311988616150836, "grad_norm": 1.4547613599040894, "learning_rate": 3.9232602039342614e-05, "loss": 0.328, "step": 11064 }, { "epoch": 1.312107197912961, "grad_norm": 0.917687321440443, "learning_rate": 3.923062857195766e-05, "loss": 0.2163, "step": 11065 }, { "epoch": 1.312225779675086, "grad_norm": 1.8920434866946785, "learning_rate": 3.9228654973383775e-05, "loss": 0.4399, "step": 11066 }, { "epoch": 1.312344361437211, "grad_norm": 1.3631238730417725, "learning_rate": 3.922668124363914e-05, "loss": 0.2841, "step": 11067 }, { "epoch": 1.312462943199336, "grad_norm": 1.2049065224743407, "learning_rate": 3.922470738274194e-05, "loss": 0.2271, "step": 11068 }, { "epoch": 1.312581524961461, "grad_norm": 0.7978080042036371, "learning_rate": 3.922273339071039e-05, "loss": 0.1822, "step": 11069 }, { "epoch": 1.312700106723586, "grad_norm": 1.1817334623932707, "learning_rate": 3.922075926756267e-05, "loss": 0.2245, "step": 11070 }, { "epoch": 1.312818688485711, "grad_norm": 0.9933488590652596, "learning_rate": 3.9218785013316995e-05, "loss": 0.1844, "step": 11071 }, { "epoch": 1.312937270247836, "grad_norm": 1.1383745819867643, "learning_rate": 3.9216810627991556e-05, "loss": 0.2846, "step": 11072 }, { "epoch": 1.3130558520099609, "grad_norm": 1.3634093720258977, "learning_rate": 3.921483611160456e-05, "loss": 0.2502, "step": 11073 }, { "epoch": 1.3131744337720859, "grad_norm": 0.9425253275901578, "learning_rate": 3.92128614641742e-05, "loss": 0.1906, "step": 11074 }, { "epoch": 1.3132930155342109, "grad_norm": 1.7546724133971325, "learning_rate": 3.9210886685718696e-05, "loss": 0.4319, "step": 11075 }, { "epoch": 1.3134115972963358, "grad_norm": 0.8193982962349261, "learning_rate": 3.9208911776256236e-05, "loss": 0.1817, "step": 11076 }, { "epoch": 1.3135301790584608, "grad_norm": 0.9706696553986108, "learning_rate": 3.920693673580504e-05, "loss": 0.2144, "step": 11077 }, { "epoch": 1.3136487608205858, "grad_norm": 1.0387828538734425, "learning_rate": 3.92049615643833e-05, "loss": 0.2343, "step": 11078 }, { "epoch": 1.3137673425827108, "grad_norm": 1.5092123672303897, "learning_rate": 3.9202986262009245e-05, "loss": 0.3625, "step": 11079 }, { "epoch": 1.3138859243448358, "grad_norm": 1.0013589593752805, "learning_rate": 3.920101082870106e-05, "loss": 0.2126, "step": 11080 }, { "epoch": 1.3140045061069607, "grad_norm": 1.455391233515485, "learning_rate": 3.919903526447698e-05, "loss": 0.3778, "step": 11081 }, { "epoch": 1.3141230878690857, "grad_norm": 1.3306614495803373, "learning_rate": 3.91970595693552e-05, "loss": 0.2743, "step": 11082 }, { "epoch": 1.3142416696312107, "grad_norm": 0.8854717817415506, "learning_rate": 3.9195083743353946e-05, "loss": 0.2086, "step": 11083 }, { "epoch": 1.3143602513933357, "grad_norm": 0.9309228558840259, "learning_rate": 3.9193107786491425e-05, "loss": 0.1691, "step": 11084 }, { "epoch": 1.3144788331554607, "grad_norm": 0.9639274883011228, "learning_rate": 3.9191131698785854e-05, "loss": 0.266, "step": 11085 }, { "epoch": 1.3145974149175856, "grad_norm": 1.0099147993284139, "learning_rate": 3.918915548025545e-05, "loss": 0.2355, "step": 11086 }, { "epoch": 1.3147159966797106, "grad_norm": 1.040950819401661, "learning_rate": 3.918717913091843e-05, "loss": 0.2711, "step": 11087 }, { "epoch": 1.3148345784418356, "grad_norm": 0.8979440066396986, "learning_rate": 3.918520265079302e-05, "loss": 0.1599, "step": 11088 }, { "epoch": 1.3149531602039606, "grad_norm": 1.4035668725970163, "learning_rate": 3.918322603989743e-05, "loss": 0.2931, "step": 11089 }, { "epoch": 1.3150717419660856, "grad_norm": 1.016047967536112, "learning_rate": 3.91812492982499e-05, "loss": 0.1977, "step": 11090 }, { "epoch": 1.3151903237282105, "grad_norm": 1.296860020455167, "learning_rate": 3.917927242586864e-05, "loss": 0.2835, "step": 11091 }, { "epoch": 1.3153089054903355, "grad_norm": 1.132837159576494, "learning_rate": 3.917729542277188e-05, "loss": 0.244, "step": 11092 }, { "epoch": 1.3154274872524605, "grad_norm": 0.9722535317061051, "learning_rate": 3.9175318288977826e-05, "loss": 0.2607, "step": 11093 }, { "epoch": 1.3155460690145855, "grad_norm": 1.0111097029649245, "learning_rate": 3.917334102450473e-05, "loss": 0.2556, "step": 11094 }, { "epoch": 1.3156646507767105, "grad_norm": 1.4376006768170924, "learning_rate": 3.9171363629370804e-05, "loss": 0.3211, "step": 11095 }, { "epoch": 1.3157832325388354, "grad_norm": 0.9542360103268489, "learning_rate": 3.916938610359429e-05, "loss": 0.2045, "step": 11096 }, { "epoch": 1.3159018143009606, "grad_norm": 1.1433301710210642, "learning_rate": 3.916740844719341e-05, "loss": 0.2653, "step": 11097 }, { "epoch": 1.3160203960630854, "grad_norm": 1.2583909812749718, "learning_rate": 3.9165430660186395e-05, "loss": 0.3208, "step": 11098 }, { "epoch": 1.3161389778252106, "grad_norm": 0.9847176257532655, "learning_rate": 3.916345274259149e-05, "loss": 0.2103, "step": 11099 }, { "epoch": 1.3162575595873354, "grad_norm": 1.0560567427517145, "learning_rate": 3.916147469442691e-05, "loss": 0.2264, "step": 11100 }, { "epoch": 1.3163761413494606, "grad_norm": 0.8502604719882291, "learning_rate": 3.91594965157109e-05, "loss": 0.2165, "step": 11101 }, { "epoch": 1.3164947231115853, "grad_norm": 1.0935678469581984, "learning_rate": 3.9157518206461694e-05, "loss": 0.2414, "step": 11102 }, { "epoch": 1.3166133048737105, "grad_norm": 1.0069150189229674, "learning_rate": 3.915553976669754e-05, "loss": 0.1997, "step": 11103 }, { "epoch": 1.3167318866358353, "grad_norm": 1.0710528384740567, "learning_rate": 3.9153561196436655e-05, "loss": 0.202, "step": 11104 }, { "epoch": 1.3168504683979605, "grad_norm": 1.166389040839692, "learning_rate": 3.9151582495697305e-05, "loss": 0.2152, "step": 11105 }, { "epoch": 1.3169690501600853, "grad_norm": 0.9925540734786864, "learning_rate": 3.91496036644977e-05, "loss": 0.2319, "step": 11106 }, { "epoch": 1.3170876319222105, "grad_norm": 1.117108087192255, "learning_rate": 3.9147624702856125e-05, "loss": 0.2334, "step": 11107 }, { "epoch": 1.3172062136843352, "grad_norm": 1.190714820906956, "learning_rate": 3.914564561079078e-05, "loss": 0.2508, "step": 11108 }, { "epoch": 1.3173247954464604, "grad_norm": 1.1961034923263305, "learning_rate": 3.9143666388319933e-05, "loss": 0.2563, "step": 11109 }, { "epoch": 1.3174433772085854, "grad_norm": 1.4239545831995282, "learning_rate": 3.914168703546183e-05, "loss": 0.3119, "step": 11110 }, { "epoch": 1.3175619589707104, "grad_norm": 0.8067246450069098, "learning_rate": 3.913970755223472e-05, "loss": 0.1634, "step": 11111 }, { "epoch": 1.3176805407328354, "grad_norm": 1.0418585295439347, "learning_rate": 3.913772793865683e-05, "loss": 0.2279, "step": 11112 }, { "epoch": 1.3177991224949603, "grad_norm": 1.2580135676688358, "learning_rate": 3.913574819474644e-05, "loss": 0.2971, "step": 11113 }, { "epoch": 1.3179177042570853, "grad_norm": 1.2206671450237274, "learning_rate": 3.913376832052178e-05, "loss": 0.2903, "step": 11114 }, { "epoch": 1.3180362860192103, "grad_norm": 0.8259670550928198, "learning_rate": 3.913178831600111e-05, "loss": 0.1669, "step": 11115 }, { "epoch": 1.3181548677813353, "grad_norm": 1.154410999065161, "learning_rate": 3.9129808181202676e-05, "loss": 0.2217, "step": 11116 }, { "epoch": 1.3182734495434603, "grad_norm": 1.1027419717510611, "learning_rate": 3.912782791614473e-05, "loss": 0.2292, "step": 11117 }, { "epoch": 1.3183920313055852, "grad_norm": 1.1118561356501484, "learning_rate": 3.912584752084555e-05, "loss": 0.2497, "step": 11118 }, { "epoch": 1.3185106130677102, "grad_norm": 1.168217443821604, "learning_rate": 3.912386699532338e-05, "loss": 0.2019, "step": 11119 }, { "epoch": 1.3186291948298352, "grad_norm": 1.0279788244783217, "learning_rate": 3.912188633959647e-05, "loss": 0.2118, "step": 11120 }, { "epoch": 1.3187477765919602, "grad_norm": 1.065350736431159, "learning_rate": 3.911990555368308e-05, "loss": 0.2389, "step": 11121 }, { "epoch": 1.3188663583540852, "grad_norm": 1.7740945195203377, "learning_rate": 3.911792463760149e-05, "loss": 0.3953, "step": 11122 }, { "epoch": 1.3189849401162101, "grad_norm": 1.2900500047263377, "learning_rate": 3.911594359136993e-05, "loss": 0.3559, "step": 11123 }, { "epoch": 1.3191035218783351, "grad_norm": 1.1239349192456733, "learning_rate": 3.911396241500669e-05, "loss": 0.2205, "step": 11124 }, { "epoch": 1.31922210364046, "grad_norm": 0.851071550010116, "learning_rate": 3.911198110853003e-05, "loss": 0.2023, "step": 11125 }, { "epoch": 1.319340685402585, "grad_norm": 1.1291811921484072, "learning_rate": 3.91099996719582e-05, "loss": 0.246, "step": 11126 }, { "epoch": 1.31945926716471, "grad_norm": 1.3932749920944223, "learning_rate": 3.910801810530947e-05, "loss": 0.2198, "step": 11127 }, { "epoch": 1.319577848926835, "grad_norm": 1.182963377053831, "learning_rate": 3.910603640860213e-05, "loss": 0.2414, "step": 11128 }, { "epoch": 1.31969643068896, "grad_norm": 1.3252350794263683, "learning_rate": 3.910405458185442e-05, "loss": 0.24, "step": 11129 }, { "epoch": 1.319815012451085, "grad_norm": 0.8419268955558591, "learning_rate": 3.910207262508463e-05, "loss": 0.1393, "step": 11130 }, { "epoch": 1.31993359421321, "grad_norm": 1.0711879766345491, "learning_rate": 3.910009053831102e-05, "loss": 0.2844, "step": 11131 }, { "epoch": 1.320052175975335, "grad_norm": 1.1714430922191352, "learning_rate": 3.909810832155186e-05, "loss": 0.2655, "step": 11132 }, { "epoch": 1.32017075773746, "grad_norm": 0.9792247598536512, "learning_rate": 3.909612597482544e-05, "loss": 0.2163, "step": 11133 }, { "epoch": 1.320289339499585, "grad_norm": 0.8938720946758849, "learning_rate": 3.9094143498150024e-05, "loss": 0.2095, "step": 11134 }, { "epoch": 1.32040792126171, "grad_norm": 1.0121530705308068, "learning_rate": 3.909216089154388e-05, "loss": 0.206, "step": 11135 }, { "epoch": 1.320526503023835, "grad_norm": 1.6050876195324195, "learning_rate": 3.90901781550253e-05, "loss": 0.311, "step": 11136 }, { "epoch": 1.3206450847859599, "grad_norm": 0.9355379678853118, "learning_rate": 3.908819528861255e-05, "loss": 0.2077, "step": 11137 }, { "epoch": 1.3207636665480849, "grad_norm": 0.9297858434421828, "learning_rate": 3.908621229232392e-05, "loss": 0.1914, "step": 11138 }, { "epoch": 1.3208822483102098, "grad_norm": 0.8550043052466346, "learning_rate": 3.908422916617768e-05, "loss": 0.1857, "step": 11139 }, { "epoch": 1.3210008300723348, "grad_norm": 1.2616102303168062, "learning_rate": 3.908224591019213e-05, "loss": 0.2665, "step": 11140 }, { "epoch": 1.3211194118344598, "grad_norm": 0.8969710874401429, "learning_rate": 3.9080262524385536e-05, "loss": 0.162, "step": 11141 }, { "epoch": 1.3212379935965848, "grad_norm": 1.868253852967507, "learning_rate": 3.907827900877619e-05, "loss": 0.3202, "step": 11142 }, { "epoch": 1.3213565753587098, "grad_norm": 1.0402102381527065, "learning_rate": 3.907629536338236e-05, "loss": 0.2029, "step": 11143 }, { "epoch": 1.3214751571208347, "grad_norm": 1.1418412596259881, "learning_rate": 3.9074311588222365e-05, "loss": 0.1954, "step": 11144 }, { "epoch": 1.3215937388829597, "grad_norm": 1.6388374811549664, "learning_rate": 3.907232768331448e-05, "loss": 0.3523, "step": 11145 }, { "epoch": 1.3217123206450847, "grad_norm": 0.9468571963888505, "learning_rate": 3.9070343648676976e-05, "loss": 0.1764, "step": 11146 }, { "epoch": 1.3218309024072097, "grad_norm": 1.043918059069733, "learning_rate": 3.906835948432816e-05, "loss": 0.22, "step": 11147 }, { "epoch": 1.3219494841693349, "grad_norm": 1.1679314105015715, "learning_rate": 3.906637519028633e-05, "loss": 0.3077, "step": 11148 }, { "epoch": 1.3220680659314596, "grad_norm": 1.1833998830447887, "learning_rate": 3.906439076656977e-05, "loss": 0.3058, "step": 11149 }, { "epoch": 1.3221866476935848, "grad_norm": 0.966930902337545, "learning_rate": 3.906240621319677e-05, "loss": 0.2242, "step": 11150 }, { "epoch": 1.3223052294557096, "grad_norm": 1.2967596647394277, "learning_rate": 3.9060421530185623e-05, "loss": 0.2763, "step": 11151 }, { "epoch": 1.3224238112178348, "grad_norm": 1.4658769922684316, "learning_rate": 3.9058436717554634e-05, "loss": 0.3028, "step": 11152 }, { "epoch": 1.3225423929799596, "grad_norm": 0.8857357137773915, "learning_rate": 3.905645177532211e-05, "loss": 0.1549, "step": 11153 }, { "epoch": 1.3226609747420848, "grad_norm": 1.1049731273922976, "learning_rate": 3.905446670350633e-05, "loss": 0.2055, "step": 11154 }, { "epoch": 1.3227795565042095, "grad_norm": 1.5346513620353337, "learning_rate": 3.905248150212559e-05, "loss": 0.4317, "step": 11155 }, { "epoch": 1.3228981382663347, "grad_norm": 1.1371586792297637, "learning_rate": 3.905049617119822e-05, "loss": 0.1905, "step": 11156 }, { "epoch": 1.3230167200284595, "grad_norm": 1.1571624989369873, "learning_rate": 3.9048510710742505e-05, "loss": 0.3097, "step": 11157 }, { "epoch": 1.3231353017905847, "grad_norm": 1.1201197341147762, "learning_rate": 3.9046525120776735e-05, "loss": 0.2724, "step": 11158 }, { "epoch": 1.3232538835527097, "grad_norm": 1.3189291942202692, "learning_rate": 3.904453940131924e-05, "loss": 0.2274, "step": 11159 }, { "epoch": 1.3233724653148347, "grad_norm": 1.1363257131888023, "learning_rate": 3.9042553552388304e-05, "loss": 0.2552, "step": 11160 }, { "epoch": 1.3234910470769596, "grad_norm": 1.0371309303330907, "learning_rate": 3.9040567574002255e-05, "loss": 0.2771, "step": 11161 }, { "epoch": 1.3236096288390846, "grad_norm": 1.5136333042803067, "learning_rate": 3.9038581466179386e-05, "loss": 0.3229, "step": 11162 }, { "epoch": 1.3237282106012096, "grad_norm": 0.8992216782241905, "learning_rate": 3.903659522893801e-05, "loss": 0.1991, "step": 11163 }, { "epoch": 1.3238467923633346, "grad_norm": 1.2322927338098717, "learning_rate": 3.903460886229644e-05, "loss": 0.2677, "step": 11164 }, { "epoch": 1.3239653741254596, "grad_norm": 0.9856368743401103, "learning_rate": 3.903262236627299e-05, "loss": 0.1896, "step": 11165 }, { "epoch": 1.3240839558875845, "grad_norm": 1.1980670440785075, "learning_rate": 3.9030635740885964e-05, "loss": 0.2463, "step": 11166 }, { "epoch": 1.3242025376497095, "grad_norm": 0.8476944387448164, "learning_rate": 3.902864898615368e-05, "loss": 0.1982, "step": 11167 }, { "epoch": 1.3243211194118345, "grad_norm": 1.47158841706548, "learning_rate": 3.9026662102094466e-05, "loss": 0.3217, "step": 11168 }, { "epoch": 1.3244397011739595, "grad_norm": 1.0298730310411413, "learning_rate": 3.902467508872662e-05, "loss": 0.2318, "step": 11169 }, { "epoch": 1.3245582829360845, "grad_norm": 0.8706070593077753, "learning_rate": 3.902268794606847e-05, "loss": 0.1781, "step": 11170 }, { "epoch": 1.3246768646982094, "grad_norm": 1.4207262526605546, "learning_rate": 3.9020700674138334e-05, "loss": 0.2787, "step": 11171 }, { "epoch": 1.3247954464603344, "grad_norm": 1.0686297896799677, "learning_rate": 3.901871327295453e-05, "loss": 0.302, "step": 11172 }, { "epoch": 1.3249140282224594, "grad_norm": 1.1692507743382499, "learning_rate": 3.901672574253538e-05, "loss": 0.3112, "step": 11173 }, { "epoch": 1.3250326099845844, "grad_norm": 0.9730206222842813, "learning_rate": 3.9014738082899204e-05, "loss": 0.2001, "step": 11174 }, { "epoch": 1.3251511917467094, "grad_norm": 1.1668338114185135, "learning_rate": 3.901275029406434e-05, "loss": 0.2391, "step": 11175 }, { "epoch": 1.3252697735088343, "grad_norm": 0.9980371856304229, "learning_rate": 3.901076237604909e-05, "loss": 0.2374, "step": 11176 }, { "epoch": 1.3253883552709593, "grad_norm": 1.1058211183288547, "learning_rate": 3.90087743288718e-05, "loss": 0.2514, "step": 11177 }, { "epoch": 1.3255069370330843, "grad_norm": 0.814542366525708, "learning_rate": 3.900678615255078e-05, "loss": 0.1891, "step": 11178 }, { "epoch": 1.3256255187952093, "grad_norm": 0.98966923127372, "learning_rate": 3.900479784710438e-05, "loss": 0.2742, "step": 11179 }, { "epoch": 1.3257441005573343, "grad_norm": 0.7462805746393683, "learning_rate": 3.9002809412550904e-05, "loss": 0.1607, "step": 11180 }, { "epoch": 1.3258626823194593, "grad_norm": 1.2398633190978576, "learning_rate": 3.90008208489087e-05, "loss": 0.2815, "step": 11181 }, { "epoch": 1.3259812640815842, "grad_norm": 1.232846255983191, "learning_rate": 3.8998832156196105e-05, "loss": 0.283, "step": 11182 }, { "epoch": 1.3260998458437092, "grad_norm": 0.8954867942803527, "learning_rate": 3.899684333443144e-05, "loss": 0.2049, "step": 11183 }, { "epoch": 1.3262184276058342, "grad_norm": 1.0221004531487041, "learning_rate": 3.899485438363304e-05, "loss": 0.1974, "step": 11184 }, { "epoch": 1.3263370093679592, "grad_norm": 0.93661717740525, "learning_rate": 3.899286530381925e-05, "loss": 0.2107, "step": 11185 }, { "epoch": 1.3264555911300842, "grad_norm": 1.2537972839120064, "learning_rate": 3.89908760950084e-05, "loss": 0.3179, "step": 11186 }, { "epoch": 1.3265741728922091, "grad_norm": 1.5728288947230045, "learning_rate": 3.898888675721882e-05, "loss": 0.2736, "step": 11187 }, { "epoch": 1.3266927546543341, "grad_norm": 1.4314146265329413, "learning_rate": 3.898689729046887e-05, "loss": 0.3051, "step": 11188 }, { "epoch": 1.326811336416459, "grad_norm": 1.3162707797449178, "learning_rate": 3.898490769477688e-05, "loss": 0.3067, "step": 11189 }, { "epoch": 1.326929918178584, "grad_norm": 0.9717003695123547, "learning_rate": 3.898291797016118e-05, "loss": 0.2164, "step": 11190 }, { "epoch": 1.327048499940709, "grad_norm": 1.0948849456320158, "learning_rate": 3.898092811664013e-05, "loss": 0.3064, "step": 11191 }, { "epoch": 1.327167081702834, "grad_norm": 1.3771509115857439, "learning_rate": 3.897893813423207e-05, "loss": 0.292, "step": 11192 }, { "epoch": 1.327285663464959, "grad_norm": 1.0252068657863302, "learning_rate": 3.897694802295533e-05, "loss": 0.2133, "step": 11193 }, { "epoch": 1.327404245227084, "grad_norm": 0.9909389666011352, "learning_rate": 3.897495778282828e-05, "loss": 0.2067, "step": 11194 }, { "epoch": 1.327522826989209, "grad_norm": 1.2635780433384007, "learning_rate": 3.8972967413869254e-05, "loss": 0.2325, "step": 11195 }, { "epoch": 1.327641408751334, "grad_norm": 1.125877656536, "learning_rate": 3.89709769160966e-05, "loss": 0.25, "step": 11196 }, { "epoch": 1.3277599905134592, "grad_norm": 1.0371067657512503, "learning_rate": 3.8968986289528676e-05, "loss": 0.2666, "step": 11197 }, { "epoch": 1.327878572275584, "grad_norm": 0.9289754136664076, "learning_rate": 3.896699553418382e-05, "loss": 0.2024, "step": 11198 }, { "epoch": 1.3279971540377091, "grad_norm": 1.58602006852646, "learning_rate": 3.896500465008041e-05, "loss": 0.2498, "step": 11199 }, { "epoch": 1.3281157357998339, "grad_norm": 1.3338355380105875, "learning_rate": 3.8963013637236766e-05, "loss": 0.2906, "step": 11200 }, { "epoch": 1.328234317561959, "grad_norm": 1.2582563349818527, "learning_rate": 3.896102249567126e-05, "loss": 0.2398, "step": 11201 }, { "epoch": 1.3283528993240838, "grad_norm": 1.3798226878058262, "learning_rate": 3.895903122540225e-05, "loss": 0.2738, "step": 11202 }, { "epoch": 1.328471481086209, "grad_norm": 1.2658459017053347, "learning_rate": 3.8957039826448095e-05, "loss": 0.2158, "step": 11203 }, { "epoch": 1.3285900628483338, "grad_norm": 0.8943762040453472, "learning_rate": 3.8955048298827146e-05, "loss": 0.1746, "step": 11204 }, { "epoch": 1.328708644610459, "grad_norm": 1.7916217925231084, "learning_rate": 3.895305664255776e-05, "loss": 0.4186, "step": 11205 }, { "epoch": 1.3288272263725838, "grad_norm": 1.4504184329701393, "learning_rate": 3.895106485765831e-05, "loss": 0.3588, "step": 11206 }, { "epoch": 1.328945808134709, "grad_norm": 1.063908695127009, "learning_rate": 3.894907294414714e-05, "loss": 0.1859, "step": 11207 }, { "epoch": 1.3290643898968337, "grad_norm": 1.152374106520925, "learning_rate": 3.894708090204263e-05, "loss": 0.2801, "step": 11208 }, { "epoch": 1.329182971658959, "grad_norm": 1.0283576636217397, "learning_rate": 3.894508873136313e-05, "loss": 0.2404, "step": 11209 }, { "epoch": 1.329301553421084, "grad_norm": 1.2286602507556899, "learning_rate": 3.894309643212701e-05, "loss": 0.2117, "step": 11210 }, { "epoch": 1.329420135183209, "grad_norm": 1.3524100330640447, "learning_rate": 3.8941104004352655e-05, "loss": 0.2606, "step": 11211 }, { "epoch": 1.3295387169453339, "grad_norm": 1.1824870501575757, "learning_rate": 3.8939111448058404e-05, "loss": 0.224, "step": 11212 }, { "epoch": 1.3296572987074589, "grad_norm": 0.9078572172656802, "learning_rate": 3.893711876326265e-05, "loss": 0.1746, "step": 11213 }, { "epoch": 1.3297758804695838, "grad_norm": 1.0956395918222634, "learning_rate": 3.893512594998374e-05, "loss": 0.2466, "step": 11214 }, { "epoch": 1.3298944622317088, "grad_norm": 0.9727696491368, "learning_rate": 3.893313300824006e-05, "loss": 0.2101, "step": 11215 }, { "epoch": 1.3300130439938338, "grad_norm": 0.9793900576167454, "learning_rate": 3.8931139938049975e-05, "loss": 0.2394, "step": 11216 }, { "epoch": 1.3301316257559588, "grad_norm": 1.4516921665387146, "learning_rate": 3.892914673943188e-05, "loss": 0.4469, "step": 11217 }, { "epoch": 1.3302502075180838, "grad_norm": 0.9181860852591295, "learning_rate": 3.892715341240411e-05, "loss": 0.2238, "step": 11218 }, { "epoch": 1.3303687892802087, "grad_norm": 1.0330185531287335, "learning_rate": 3.892515995698508e-05, "loss": 0.2292, "step": 11219 }, { "epoch": 1.3304873710423337, "grad_norm": 1.0152625107201747, "learning_rate": 3.892316637319315e-05, "loss": 0.204, "step": 11220 }, { "epoch": 1.3306059528044587, "grad_norm": 1.1931067144123002, "learning_rate": 3.8921172661046704e-05, "loss": 0.2631, "step": 11221 }, { "epoch": 1.3307245345665837, "grad_norm": 1.1448001865217976, "learning_rate": 3.891917882056411e-05, "loss": 0.2791, "step": 11222 }, { "epoch": 1.3308431163287087, "grad_norm": 1.1847539294720653, "learning_rate": 3.891718485176375e-05, "loss": 0.1951, "step": 11223 }, { "epoch": 1.3309616980908336, "grad_norm": 1.0216674629133529, "learning_rate": 3.891519075466402e-05, "loss": 0.2178, "step": 11224 }, { "epoch": 1.3310802798529586, "grad_norm": 1.009929664738147, "learning_rate": 3.891319652928329e-05, "loss": 0.2413, "step": 11225 }, { "epoch": 1.3311988616150836, "grad_norm": 1.0409785535547231, "learning_rate": 3.891120217563996e-05, "loss": 0.1977, "step": 11226 }, { "epoch": 1.3313174433772086, "grad_norm": 1.1472355774891425, "learning_rate": 3.890920769375239e-05, "loss": 0.2539, "step": 11227 }, { "epoch": 1.3314360251393336, "grad_norm": 1.3276148382084325, "learning_rate": 3.8907213083638996e-05, "loss": 0.2639, "step": 11228 }, { "epoch": 1.3315546069014585, "grad_norm": 1.3518018683738118, "learning_rate": 3.8905218345318145e-05, "loss": 0.3331, "step": 11229 }, { "epoch": 1.3316731886635835, "grad_norm": 0.8234501011890035, "learning_rate": 3.890322347880824e-05, "loss": 0.182, "step": 11230 }, { "epoch": 1.3317917704257085, "grad_norm": 0.9277818973713268, "learning_rate": 3.890122848412765e-05, "loss": 0.2058, "step": 11231 }, { "epoch": 1.3319103521878335, "grad_norm": 1.29952062849528, "learning_rate": 3.889923336129479e-05, "loss": 0.3209, "step": 11232 }, { "epoch": 1.3320289339499585, "grad_norm": 1.1602620814774403, "learning_rate": 3.8897238110328035e-05, "loss": 0.2327, "step": 11233 }, { "epoch": 1.3321475157120835, "grad_norm": 1.053312910796751, "learning_rate": 3.8895242731245795e-05, "loss": 0.2567, "step": 11234 }, { "epoch": 1.3322660974742084, "grad_norm": 0.830245580918566, "learning_rate": 3.8893247224066456e-05, "loss": 0.1902, "step": 11235 }, { "epoch": 1.3323846792363334, "grad_norm": 0.9868038362806985, "learning_rate": 3.889125158880841e-05, "loss": 0.2272, "step": 11236 }, { "epoch": 1.3325032609984584, "grad_norm": 1.083798719160613, "learning_rate": 3.888925582549006e-05, "loss": 0.2216, "step": 11237 }, { "epoch": 1.3326218427605834, "grad_norm": 0.8347503057780409, "learning_rate": 3.88872599341298e-05, "loss": 0.177, "step": 11238 }, { "epoch": 1.3327404245227084, "grad_norm": 0.9595527061280946, "learning_rate": 3.888526391474604e-05, "loss": 0.2268, "step": 11239 }, { "epoch": 1.3328590062848333, "grad_norm": 1.3962729285315243, "learning_rate": 3.888326776735717e-05, "loss": 0.3504, "step": 11240 }, { "epoch": 1.3329775880469583, "grad_norm": 1.3932683405037798, "learning_rate": 3.8881271491981595e-05, "loss": 0.3092, "step": 11241 }, { "epoch": 1.3330961698090833, "grad_norm": 0.8335247694979706, "learning_rate": 3.8879275088637726e-05, "loss": 0.1445, "step": 11242 }, { "epoch": 1.3332147515712083, "grad_norm": 0.7837249590522793, "learning_rate": 3.8877278557343953e-05, "loss": 0.1632, "step": 11243 }, { "epoch": 1.3333333333333333, "grad_norm": 1.3284391978526873, "learning_rate": 3.8875281898118695e-05, "loss": 0.2674, "step": 11244 }, { "epoch": 1.3334519150954582, "grad_norm": 0.992701857047241, "learning_rate": 3.887328511098035e-05, "loss": 0.2044, "step": 11245 }, { "epoch": 1.3335704968575832, "grad_norm": 1.2452310236846582, "learning_rate": 3.8871288195947326e-05, "loss": 0.2911, "step": 11246 }, { "epoch": 1.3336890786197082, "grad_norm": 0.7335653048920354, "learning_rate": 3.886929115303804e-05, "loss": 0.1407, "step": 11247 }, { "epoch": 1.3338076603818334, "grad_norm": 1.7904225999663235, "learning_rate": 3.886729398227089e-05, "loss": 0.3361, "step": 11248 }, { "epoch": 1.3339262421439582, "grad_norm": 0.8677102444193976, "learning_rate": 3.88652966836643e-05, "loss": 0.1532, "step": 11249 }, { "epoch": 1.3340448239060834, "grad_norm": 0.970815483022368, "learning_rate": 3.886329925723667e-05, "loss": 0.1918, "step": 11250 }, { "epoch": 1.3341634056682081, "grad_norm": 1.012658791404795, "learning_rate": 3.8861301703006436e-05, "loss": 0.1698, "step": 11251 }, { "epoch": 1.3342819874303333, "grad_norm": 1.1246213908167446, "learning_rate": 3.885930402099199e-05, "loss": 0.1729, "step": 11252 }, { "epoch": 1.334400569192458, "grad_norm": 0.9725757240337546, "learning_rate": 3.885730621121175e-05, "loss": 0.1859, "step": 11253 }, { "epoch": 1.3345191509545833, "grad_norm": 1.5709184863805237, "learning_rate": 3.8855308273684154e-05, "loss": 0.3373, "step": 11254 }, { "epoch": 1.334637732716708, "grad_norm": 1.253627156440161, "learning_rate": 3.88533102084276e-05, "loss": 0.2681, "step": 11255 }, { "epoch": 1.3347563144788333, "grad_norm": 1.6703355699729674, "learning_rate": 3.8851312015460504e-05, "loss": 0.4387, "step": 11256 }, { "epoch": 1.334874896240958, "grad_norm": 0.8692713520733102, "learning_rate": 3.884931369480131e-05, "loss": 0.2175, "step": 11257 }, { "epoch": 1.3349934780030832, "grad_norm": 1.0531562053632904, "learning_rate": 3.8847315246468425e-05, "loss": 0.1719, "step": 11258 }, { "epoch": 1.3351120597652082, "grad_norm": 1.188939968258312, "learning_rate": 3.884531667048027e-05, "loss": 0.2454, "step": 11259 }, { "epoch": 1.3352306415273332, "grad_norm": 1.1542499281706509, "learning_rate": 3.884331796685527e-05, "loss": 0.2549, "step": 11260 }, { "epoch": 1.3353492232894582, "grad_norm": 1.2036655887981451, "learning_rate": 3.884131913561187e-05, "loss": 0.2418, "step": 11261 }, { "epoch": 1.3354678050515831, "grad_norm": 2.2302916028728417, "learning_rate": 3.8839320176768465e-05, "loss": 0.4717, "step": 11262 }, { "epoch": 1.3355863868137081, "grad_norm": 0.8781422164635321, "learning_rate": 3.88373210903435e-05, "loss": 0.2021, "step": 11263 }, { "epoch": 1.335704968575833, "grad_norm": 0.9116620045616166, "learning_rate": 3.8835321876355407e-05, "loss": 0.1792, "step": 11264 }, { "epoch": 1.335823550337958, "grad_norm": 0.8887350135988367, "learning_rate": 3.883332253482261e-05, "loss": 0.2056, "step": 11265 }, { "epoch": 1.335942132100083, "grad_norm": 1.1686091102705327, "learning_rate": 3.883132306576355e-05, "loss": 0.2434, "step": 11266 }, { "epoch": 1.336060713862208, "grad_norm": 1.4135044068997948, "learning_rate": 3.882932346919664e-05, "loss": 0.3566, "step": 11267 }, { "epoch": 1.336179295624333, "grad_norm": 1.1352004610235988, "learning_rate": 3.8827323745140335e-05, "loss": 0.2483, "step": 11268 }, { "epoch": 1.336297877386458, "grad_norm": 0.76470348283328, "learning_rate": 3.882532389361306e-05, "loss": 0.2041, "step": 11269 }, { "epoch": 1.336416459148583, "grad_norm": 0.9922064509909124, "learning_rate": 3.882332391463325e-05, "loss": 0.213, "step": 11270 }, { "epoch": 1.336535040910708, "grad_norm": 1.3803752073725748, "learning_rate": 3.882132380821935e-05, "loss": 0.2415, "step": 11271 }, { "epoch": 1.336653622672833, "grad_norm": 1.1780003722908097, "learning_rate": 3.8819323574389785e-05, "loss": 0.2849, "step": 11272 }, { "epoch": 1.336772204434958, "grad_norm": 1.1499848683718805, "learning_rate": 3.881732321316301e-05, "loss": 0.2097, "step": 11273 }, { "epoch": 1.336890786197083, "grad_norm": 1.080363129310601, "learning_rate": 3.881532272455746e-05, "loss": 0.2452, "step": 11274 }, { "epoch": 1.3370093679592079, "grad_norm": 1.0365344969424422, "learning_rate": 3.881332210859158e-05, "loss": 0.218, "step": 11275 }, { "epoch": 1.3371279497213329, "grad_norm": 1.0682448782830036, "learning_rate": 3.8811321365283795e-05, "loss": 0.1961, "step": 11276 }, { "epoch": 1.3372465314834578, "grad_norm": 1.2393598476928143, "learning_rate": 3.880932049465257e-05, "loss": 0.2748, "step": 11277 }, { "epoch": 1.3373651132455828, "grad_norm": 0.9513710483996061, "learning_rate": 3.8807319496716345e-05, "loss": 0.2445, "step": 11278 }, { "epoch": 1.3374836950077078, "grad_norm": 1.111162744198963, "learning_rate": 3.880531837149357e-05, "loss": 0.2858, "step": 11279 }, { "epoch": 1.3376022767698328, "grad_norm": 1.005747160062515, "learning_rate": 3.880331711900268e-05, "loss": 0.2245, "step": 11280 }, { "epoch": 1.3377208585319578, "grad_norm": 0.8062097509414904, "learning_rate": 3.880131573926213e-05, "loss": 0.166, "step": 11281 }, { "epoch": 1.3378394402940827, "grad_norm": 0.9556260969318162, "learning_rate": 3.879931423229039e-05, "loss": 0.1947, "step": 11282 }, { "epoch": 1.3379580220562077, "grad_norm": 1.1212506081955949, "learning_rate": 3.879731259810588e-05, "loss": 0.2737, "step": 11283 }, { "epoch": 1.3380766038183327, "grad_norm": 1.739173350783522, "learning_rate": 3.8795310836727075e-05, "loss": 0.3432, "step": 11284 }, { "epoch": 1.3381951855804577, "grad_norm": 0.9166752738505088, "learning_rate": 3.879330894817242e-05, "loss": 0.193, "step": 11285 }, { "epoch": 1.3383137673425827, "grad_norm": 0.9765640424530158, "learning_rate": 3.879130693246037e-05, "loss": 0.2067, "step": 11286 }, { "epoch": 1.3384323491047077, "grad_norm": 0.901567343393729, "learning_rate": 3.878930478960938e-05, "loss": 0.1536, "step": 11287 }, { "epoch": 1.3385509308668326, "grad_norm": 1.5418684006745853, "learning_rate": 3.8787302519637914e-05, "loss": 0.3063, "step": 11288 }, { "epoch": 1.3386695126289576, "grad_norm": 0.9845392278434172, "learning_rate": 3.8785300122564423e-05, "loss": 0.1894, "step": 11289 }, { "epoch": 1.3387880943910826, "grad_norm": 1.148671952840746, "learning_rate": 3.878329759840736e-05, "loss": 0.2334, "step": 11290 }, { "epoch": 1.3389066761532076, "grad_norm": 1.371663308902444, "learning_rate": 3.878129494718521e-05, "loss": 0.2517, "step": 11291 }, { "epoch": 1.3390252579153326, "grad_norm": 0.9274019524173648, "learning_rate": 3.877929216891642e-05, "loss": 0.1688, "step": 11292 }, { "epoch": 1.3391438396774575, "grad_norm": 0.9408543554200137, "learning_rate": 3.8777289263619444e-05, "loss": 0.1895, "step": 11293 }, { "epoch": 1.3392624214395825, "grad_norm": 1.2408909932500836, "learning_rate": 3.877528623131276e-05, "loss": 0.2811, "step": 11294 }, { "epoch": 1.3393810032017075, "grad_norm": 0.8675798781545533, "learning_rate": 3.877328307201483e-05, "loss": 0.168, "step": 11295 }, { "epoch": 1.3394995849638325, "grad_norm": 0.9478996748363286, "learning_rate": 3.877127978574412e-05, "loss": 0.2533, "step": 11296 }, { "epoch": 1.3396181667259577, "grad_norm": 1.4484043257380494, "learning_rate": 3.8769276372519096e-05, "loss": 0.2429, "step": 11297 }, { "epoch": 1.3397367484880824, "grad_norm": 1.0754606199095988, "learning_rate": 3.876727283235823e-05, "loss": 0.1772, "step": 11298 }, { "epoch": 1.3398553302502076, "grad_norm": 1.1222928923049134, "learning_rate": 3.8765269165279996e-05, "loss": 0.2131, "step": 11299 }, { "epoch": 1.3399739120123324, "grad_norm": 0.8496479001579185, "learning_rate": 3.876326537130285e-05, "loss": 0.1566, "step": 11300 }, { "epoch": 1.3400924937744576, "grad_norm": 1.3498337021953544, "learning_rate": 3.876126145044529e-05, "loss": 0.2472, "step": 11301 }, { "epoch": 1.3402110755365824, "grad_norm": 0.993948007357881, "learning_rate": 3.8759257402725754e-05, "loss": 0.2336, "step": 11302 }, { "epoch": 1.3403296572987076, "grad_norm": 1.0710542913698144, "learning_rate": 3.875725322816276e-05, "loss": 0.2273, "step": 11303 }, { "epoch": 1.3404482390608323, "grad_norm": 1.3599400447407781, "learning_rate": 3.8755248926774746e-05, "loss": 0.2475, "step": 11304 }, { "epoch": 1.3405668208229575, "grad_norm": 1.1222051854444417, "learning_rate": 3.875324449858021e-05, "loss": 0.2136, "step": 11305 }, { "epoch": 1.3406854025850823, "grad_norm": 1.060425263752431, "learning_rate": 3.875123994359762e-05, "loss": 0.2757, "step": 11306 }, { "epoch": 1.3408039843472075, "grad_norm": 1.0919499683526668, "learning_rate": 3.874923526184547e-05, "loss": 0.2002, "step": 11307 }, { "epoch": 1.3409225661093322, "grad_norm": 1.0642931248160736, "learning_rate": 3.874723045334222e-05, "loss": 0.2324, "step": 11308 }, { "epoch": 1.3410411478714575, "grad_norm": 1.124702511248524, "learning_rate": 3.874522551810637e-05, "loss": 0.2085, "step": 11309 }, { "epoch": 1.3411597296335824, "grad_norm": 1.202544248780369, "learning_rate": 3.8743220456156394e-05, "loss": 0.2283, "step": 11310 }, { "epoch": 1.3412783113957074, "grad_norm": 1.1278686163628868, "learning_rate": 3.8741215267510774e-05, "loss": 0.2649, "step": 11311 }, { "epoch": 1.3413968931578324, "grad_norm": 1.1755367294967765, "learning_rate": 3.8739209952188016e-05, "loss": 0.2057, "step": 11312 }, { "epoch": 1.3415154749199574, "grad_norm": 1.3943350174542728, "learning_rate": 3.873720451020658e-05, "loss": 0.3089, "step": 11313 }, { "epoch": 1.3416340566820824, "grad_norm": 1.0356761183256773, "learning_rate": 3.873519894158496e-05, "loss": 0.1948, "step": 11314 }, { "epoch": 1.3417526384442073, "grad_norm": 0.9850707570593819, "learning_rate": 3.873319324634165e-05, "loss": 0.248, "step": 11315 }, { "epoch": 1.3418712202063323, "grad_norm": 1.0574554017676447, "learning_rate": 3.8731187424495146e-05, "loss": 0.2225, "step": 11316 }, { "epoch": 1.3419898019684573, "grad_norm": 1.3256186803482177, "learning_rate": 3.872918147606393e-05, "loss": 0.3169, "step": 11317 }, { "epoch": 1.3421083837305823, "grad_norm": 0.9716412614070841, "learning_rate": 3.87271754010665e-05, "loss": 0.2113, "step": 11318 }, { "epoch": 1.3422269654927073, "grad_norm": 1.2149523214277407, "learning_rate": 3.8725169199521337e-05, "loss": 0.2758, "step": 11319 }, { "epoch": 1.3423455472548322, "grad_norm": 1.1028203157003447, "learning_rate": 3.872316287144695e-05, "loss": 0.2388, "step": 11320 }, { "epoch": 1.3424641290169572, "grad_norm": 0.7664476288888664, "learning_rate": 3.872115641686183e-05, "loss": 0.2028, "step": 11321 }, { "epoch": 1.3425827107790822, "grad_norm": 1.221894212915627, "learning_rate": 3.871914983578447e-05, "loss": 0.263, "step": 11322 }, { "epoch": 1.3427012925412072, "grad_norm": 1.0800877832243465, "learning_rate": 3.8717143128233375e-05, "loss": 0.1931, "step": 11323 }, { "epoch": 1.3428198743033322, "grad_norm": 1.4751327660435922, "learning_rate": 3.871513629422704e-05, "loss": 0.3222, "step": 11324 }, { "epoch": 1.3429384560654571, "grad_norm": 1.2998631384232902, "learning_rate": 3.8713129333783965e-05, "loss": 0.3299, "step": 11325 }, { "epoch": 1.3430570378275821, "grad_norm": 1.209338552274968, "learning_rate": 3.871112224692266e-05, "loss": 0.2394, "step": 11326 }, { "epoch": 1.343175619589707, "grad_norm": 1.0163696750028253, "learning_rate": 3.870911503366161e-05, "loss": 0.1864, "step": 11327 }, { "epoch": 1.343294201351832, "grad_norm": 0.9901762702210827, "learning_rate": 3.8707107694019346e-05, "loss": 0.2212, "step": 11328 }, { "epoch": 1.343412783113957, "grad_norm": 1.099552960998955, "learning_rate": 3.870510022801435e-05, "loss": 0.2792, "step": 11329 }, { "epoch": 1.343531364876082, "grad_norm": 1.0047563598139304, "learning_rate": 3.870309263566514e-05, "loss": 0.2313, "step": 11330 }, { "epoch": 1.343649946638207, "grad_norm": 1.1638245034528119, "learning_rate": 3.8701084916990216e-05, "loss": 0.2668, "step": 11331 }, { "epoch": 1.343768528400332, "grad_norm": 0.9574047059617741, "learning_rate": 3.869907707200809e-05, "loss": 0.1951, "step": 11332 }, { "epoch": 1.343887110162457, "grad_norm": 1.063473166655854, "learning_rate": 3.869706910073727e-05, "loss": 0.173, "step": 11333 }, { "epoch": 1.344005691924582, "grad_norm": 1.6178873368123503, "learning_rate": 3.869506100319628e-05, "loss": 0.316, "step": 11334 }, { "epoch": 1.344124273686707, "grad_norm": 1.1821358700715576, "learning_rate": 3.8693052779403613e-05, "loss": 0.2505, "step": 11335 }, { "epoch": 1.344242855448832, "grad_norm": 1.0610453122840684, "learning_rate": 3.8691044429377786e-05, "loss": 0.2024, "step": 11336 }, { "epoch": 1.344361437210957, "grad_norm": 1.2133089250425866, "learning_rate": 3.868903595313733e-05, "loss": 0.2575, "step": 11337 }, { "epoch": 1.344480018973082, "grad_norm": 1.3509699021809551, "learning_rate": 3.868702735070074e-05, "loss": 0.2935, "step": 11338 }, { "epoch": 1.3445986007352069, "grad_norm": 1.303693410970034, "learning_rate": 3.8685018622086545e-05, "loss": 0.2496, "step": 11339 }, { "epoch": 1.3447171824973319, "grad_norm": 0.9019047192998477, "learning_rate": 3.868300976731326e-05, "loss": 0.2097, "step": 11340 }, { "epoch": 1.3448357642594568, "grad_norm": 0.9753806188134191, "learning_rate": 3.86810007863994e-05, "loss": 0.2016, "step": 11341 }, { "epoch": 1.3449543460215818, "grad_norm": 0.9469060395657887, "learning_rate": 3.86789916793635e-05, "loss": 0.211, "step": 11342 }, { "epoch": 1.3450729277837068, "grad_norm": 1.1572520794034677, "learning_rate": 3.8676982446224065e-05, "loss": 0.2879, "step": 11343 }, { "epoch": 1.3451915095458318, "grad_norm": 1.2405378200255515, "learning_rate": 3.867497308699962e-05, "loss": 0.2758, "step": 11344 }, { "epoch": 1.3453100913079568, "grad_norm": 0.8999392253151615, "learning_rate": 3.86729636017087e-05, "loss": 0.1871, "step": 11345 }, { "epoch": 1.345428673070082, "grad_norm": 1.6378741959606367, "learning_rate": 3.867095399036982e-05, "loss": 0.3584, "step": 11346 }, { "epoch": 1.3455472548322067, "grad_norm": 1.3443030618956133, "learning_rate": 3.866894425300151e-05, "loss": 0.2694, "step": 11347 }, { "epoch": 1.345665836594332, "grad_norm": 0.9166398372519783, "learning_rate": 3.866693438962229e-05, "loss": 0.1568, "step": 11348 }, { "epoch": 1.3457844183564567, "grad_norm": 1.355685940404418, "learning_rate": 3.86649244002507e-05, "loss": 0.221, "step": 11349 }, { "epoch": 1.3459030001185819, "grad_norm": 0.9832628473411721, "learning_rate": 3.866291428490526e-05, "loss": 0.2008, "step": 11350 }, { "epoch": 1.3460215818807066, "grad_norm": 1.1189094352556128, "learning_rate": 3.866090404360451e-05, "loss": 0.2125, "step": 11351 }, { "epoch": 1.3461401636428318, "grad_norm": 1.0890232148020365, "learning_rate": 3.8658893676366967e-05, "loss": 0.2521, "step": 11352 }, { "epoch": 1.3462587454049566, "grad_norm": 1.1745467162633203, "learning_rate": 3.865688318321119e-05, "loss": 0.2452, "step": 11353 }, { "epoch": 1.3463773271670818, "grad_norm": 1.3025485627765956, "learning_rate": 3.865487256415569e-05, "loss": 0.326, "step": 11354 }, { "epoch": 1.3464959089292066, "grad_norm": 1.0396327064829547, "learning_rate": 3.8652861819219e-05, "loss": 0.2477, "step": 11355 }, { "epoch": 1.3466144906913318, "grad_norm": 1.1944055820533044, "learning_rate": 3.865085094841968e-05, "loss": 0.2793, "step": 11356 }, { "epoch": 1.3467330724534565, "grad_norm": 1.097262654238483, "learning_rate": 3.864883995177625e-05, "loss": 0.2502, "step": 11357 }, { "epoch": 1.3468516542155817, "grad_norm": 1.1176841114775684, "learning_rate": 3.8646828829307245e-05, "loss": 0.2505, "step": 11358 }, { "epoch": 1.3469702359777067, "grad_norm": 1.4079513930277667, "learning_rate": 3.864481758103122e-05, "loss": 0.2878, "step": 11359 }, { "epoch": 1.3470888177398317, "grad_norm": 1.7026719016718004, "learning_rate": 3.8642806206966705e-05, "loss": 0.4218, "step": 11360 }, { "epoch": 1.3472073995019567, "grad_norm": 1.1766599164811282, "learning_rate": 3.864079470713226e-05, "loss": 0.2466, "step": 11361 }, { "epoch": 1.3473259812640817, "grad_norm": 1.3158027699156127, "learning_rate": 3.8638783081546396e-05, "loss": 0.3565, "step": 11362 }, { "epoch": 1.3474445630262066, "grad_norm": 1.513043263900365, "learning_rate": 3.863677133022769e-05, "loss": 0.32, "step": 11363 }, { "epoch": 1.3475631447883316, "grad_norm": 1.273552397767931, "learning_rate": 3.863475945319467e-05, "loss": 0.2669, "step": 11364 }, { "epoch": 1.3476817265504566, "grad_norm": 1.1142239243212528, "learning_rate": 3.8632747450465886e-05, "loss": 0.2699, "step": 11365 }, { "epoch": 1.3478003083125816, "grad_norm": 0.7984180621583492, "learning_rate": 3.8630735322059896e-05, "loss": 0.1732, "step": 11366 }, { "epoch": 1.3479188900747066, "grad_norm": 0.9098685216599087, "learning_rate": 3.862872306799523e-05, "loss": 0.1916, "step": 11367 }, { "epoch": 1.3480374718368315, "grad_norm": 1.2101692578184067, "learning_rate": 3.862671068829046e-05, "loss": 0.2378, "step": 11368 }, { "epoch": 1.3481560535989565, "grad_norm": 1.1230849396548137, "learning_rate": 3.862469818296413e-05, "loss": 0.2779, "step": 11369 }, { "epoch": 1.3482746353610815, "grad_norm": 1.150992440136119, "learning_rate": 3.8622685552034775e-05, "loss": 0.2212, "step": 11370 }, { "epoch": 1.3483932171232065, "grad_norm": 1.084377620769417, "learning_rate": 3.8620672795520975e-05, "loss": 0.2845, "step": 11371 }, { "epoch": 1.3485117988853315, "grad_norm": 1.2633831808677862, "learning_rate": 3.861865991344128e-05, "loss": 0.2135, "step": 11372 }, { "epoch": 1.3486303806474564, "grad_norm": 1.0637690717644257, "learning_rate": 3.861664690581423e-05, "loss": 0.2256, "step": 11373 }, { "epoch": 1.3487489624095814, "grad_norm": 1.1129911926959992, "learning_rate": 3.86146337726584e-05, "loss": 0.2639, "step": 11374 }, { "epoch": 1.3488675441717064, "grad_norm": 1.1531771686187675, "learning_rate": 3.861262051399234e-05, "loss": 0.3201, "step": 11375 }, { "epoch": 1.3489861259338314, "grad_norm": 0.9055265714674992, "learning_rate": 3.861060712983461e-05, "loss": 0.2725, "step": 11376 }, { "epoch": 1.3491047076959564, "grad_norm": 1.1716273964328419, "learning_rate": 3.860859362020377e-05, "loss": 0.2554, "step": 11377 }, { "epoch": 1.3492232894580813, "grad_norm": 0.9134539562475308, "learning_rate": 3.86065799851184e-05, "loss": 0.2136, "step": 11378 }, { "epoch": 1.3493418712202063, "grad_norm": 1.6427361047093187, "learning_rate": 3.860456622459703e-05, "loss": 0.4836, "step": 11379 }, { "epoch": 1.3494604529823313, "grad_norm": 1.2976628892276372, "learning_rate": 3.860255233865825e-05, "loss": 0.3006, "step": 11380 }, { "epoch": 1.3495790347444563, "grad_norm": 0.9517775640458085, "learning_rate": 3.860053832732062e-05, "loss": 0.2244, "step": 11381 }, { "epoch": 1.3496976165065813, "grad_norm": 1.2441614374894716, "learning_rate": 3.85985241906027e-05, "loss": 0.212, "step": 11382 }, { "epoch": 1.3498161982687062, "grad_norm": 1.2780967276346673, "learning_rate": 3.859650992852306e-05, "loss": 0.2544, "step": 11383 }, { "epoch": 1.3499347800308312, "grad_norm": 1.047859594729622, "learning_rate": 3.8594495541100284e-05, "loss": 0.2207, "step": 11384 }, { "epoch": 1.3500533617929562, "grad_norm": 1.1028029192838678, "learning_rate": 3.8592481028352925e-05, "loss": 0.2796, "step": 11385 }, { "epoch": 1.3501719435550812, "grad_norm": 1.5292838791949017, "learning_rate": 3.859046639029955e-05, "loss": 0.291, "step": 11386 }, { "epoch": 1.3502905253172062, "grad_norm": 0.9107623643634299, "learning_rate": 3.858845162695875e-05, "loss": 0.1913, "step": 11387 }, { "epoch": 1.3504091070793312, "grad_norm": 1.0288877012352384, "learning_rate": 3.8586436738349085e-05, "loss": 0.249, "step": 11388 }, { "epoch": 1.3505276888414561, "grad_norm": 1.5006707839421543, "learning_rate": 3.8584421724489133e-05, "loss": 0.3838, "step": 11389 }, { "epoch": 1.3506462706035811, "grad_norm": 0.7504197158820977, "learning_rate": 3.858240658539747e-05, "loss": 0.1602, "step": 11390 }, { "epoch": 1.350764852365706, "grad_norm": 1.140577413657662, "learning_rate": 3.8580391321092677e-05, "loss": 0.2087, "step": 11391 }, { "epoch": 1.350883434127831, "grad_norm": 1.3040168860826156, "learning_rate": 3.857837593159334e-05, "loss": 0.3368, "step": 11392 }, { "epoch": 1.351002015889956, "grad_norm": 1.0446807976651733, "learning_rate": 3.857636041691801e-05, "loss": 0.2482, "step": 11393 }, { "epoch": 1.351120597652081, "grad_norm": 0.9951716259451728, "learning_rate": 3.857434477708529e-05, "loss": 0.2663, "step": 11394 }, { "epoch": 1.351239179414206, "grad_norm": 0.9788369625422153, "learning_rate": 3.857232901211376e-05, "loss": 0.2551, "step": 11395 }, { "epoch": 1.351357761176331, "grad_norm": 0.9907211688186556, "learning_rate": 3.857031312202201e-05, "loss": 0.2622, "step": 11396 }, { "epoch": 1.3514763429384562, "grad_norm": 0.7347940306863251, "learning_rate": 3.85682971068286e-05, "loss": 0.2129, "step": 11397 }, { "epoch": 1.351594924700581, "grad_norm": 1.1894439322544605, "learning_rate": 3.856628096655213e-05, "loss": 0.2705, "step": 11398 }, { "epoch": 1.3517135064627062, "grad_norm": 0.9316688260721552, "learning_rate": 3.856426470121119e-05, "loss": 0.2016, "step": 11399 }, { "epoch": 1.351832088224831, "grad_norm": 0.8100558185461376, "learning_rate": 3.8562248310824364e-05, "loss": 0.1823, "step": 11400 }, { "epoch": 1.3519506699869561, "grad_norm": 0.9245203688113711, "learning_rate": 3.8560231795410235e-05, "loss": 0.2065, "step": 11401 }, { "epoch": 1.3520692517490809, "grad_norm": 1.0173902926773999, "learning_rate": 3.85582151549874e-05, "loss": 0.2115, "step": 11402 }, { "epoch": 1.352187833511206, "grad_norm": 1.357252232103516, "learning_rate": 3.855619838957445e-05, "loss": 0.2949, "step": 11403 }, { "epoch": 1.3523064152733308, "grad_norm": 0.997860482678172, "learning_rate": 3.855418149918997e-05, "loss": 0.1918, "step": 11404 }, { "epoch": 1.352424997035456, "grad_norm": 1.144264533259226, "learning_rate": 3.8552164483852556e-05, "loss": 0.256, "step": 11405 }, { "epoch": 1.3525435787975808, "grad_norm": 1.1206608728417224, "learning_rate": 3.855014734358081e-05, "loss": 0.2218, "step": 11406 }, { "epoch": 1.352662160559706, "grad_norm": 1.3682389552199388, "learning_rate": 3.854813007839332e-05, "loss": 0.2574, "step": 11407 }, { "epoch": 1.3527807423218308, "grad_norm": 1.1305745854554832, "learning_rate": 3.854611268830869e-05, "loss": 0.306, "step": 11408 }, { "epoch": 1.352899324083956, "grad_norm": 1.318054436467911, "learning_rate": 3.85440951733455e-05, "loss": 0.239, "step": 11409 }, { "epoch": 1.353017905846081, "grad_norm": 1.0033563126130476, "learning_rate": 3.854207753352237e-05, "loss": 0.2536, "step": 11410 }, { "epoch": 1.353136487608206, "grad_norm": 0.7125593617735019, "learning_rate": 3.854005976885789e-05, "loss": 0.1806, "step": 11411 }, { "epoch": 1.353255069370331, "grad_norm": 1.3974778399922243, "learning_rate": 3.853804187937066e-05, "loss": 0.3547, "step": 11412 }, { "epoch": 1.353373651132456, "grad_norm": 1.1104395110232534, "learning_rate": 3.853602386507929e-05, "loss": 0.2493, "step": 11413 }, { "epoch": 1.3534922328945809, "grad_norm": 1.263538284543217, "learning_rate": 3.8534005726002376e-05, "loss": 0.2669, "step": 11414 }, { "epoch": 1.3536108146567059, "grad_norm": 1.2049415415286113, "learning_rate": 3.8531987462158524e-05, "loss": 0.274, "step": 11415 }, { "epoch": 1.3537293964188308, "grad_norm": 1.1146059623406104, "learning_rate": 3.8529969073566344e-05, "loss": 0.255, "step": 11416 }, { "epoch": 1.3538479781809558, "grad_norm": 1.010098811693673, "learning_rate": 3.852795056024445e-05, "loss": 0.2466, "step": 11417 }, { "epoch": 1.3539665599430808, "grad_norm": 1.1992444672807792, "learning_rate": 3.8525931922211426e-05, "loss": 0.2359, "step": 11418 }, { "epoch": 1.3540851417052058, "grad_norm": 0.8510258760655327, "learning_rate": 3.8523913159485904e-05, "loss": 0.1981, "step": 11419 }, { "epoch": 1.3542037234673308, "grad_norm": 0.906450223969736, "learning_rate": 3.852189427208648e-05, "loss": 0.201, "step": 11420 }, { "epoch": 1.3543223052294557, "grad_norm": 1.1278762662824224, "learning_rate": 3.851987526003178e-05, "loss": 0.2281, "step": 11421 }, { "epoch": 1.3544408869915807, "grad_norm": 1.0047371649560812, "learning_rate": 3.85178561233404e-05, "loss": 0.2413, "step": 11422 }, { "epoch": 1.3545594687537057, "grad_norm": 1.2722109638708838, "learning_rate": 3.851583686203097e-05, "loss": 0.3338, "step": 11423 }, { "epoch": 1.3546780505158307, "grad_norm": 1.1748380733464143, "learning_rate": 3.85138174761221e-05, "loss": 0.2291, "step": 11424 }, { "epoch": 1.3547966322779557, "grad_norm": 1.2066028118976486, "learning_rate": 3.8511797965632405e-05, "loss": 0.2925, "step": 11425 }, { "epoch": 1.3549152140400806, "grad_norm": 1.3245818944903474, "learning_rate": 3.85097783305805e-05, "loss": 0.2233, "step": 11426 }, { "epoch": 1.3550337958022056, "grad_norm": 0.9354892351774108, "learning_rate": 3.850775857098501e-05, "loss": 0.1735, "step": 11427 }, { "epoch": 1.3551523775643306, "grad_norm": 0.8693213181997481, "learning_rate": 3.850573868686454e-05, "loss": 0.2126, "step": 11428 }, { "epoch": 1.3552709593264556, "grad_norm": 1.1131390248192903, "learning_rate": 3.850371867823773e-05, "loss": 0.2133, "step": 11429 }, { "epoch": 1.3553895410885806, "grad_norm": 1.2837820255641308, "learning_rate": 3.85016985451232e-05, "loss": 0.2621, "step": 11430 }, { "epoch": 1.3555081228507055, "grad_norm": 1.1926858099527637, "learning_rate": 3.849967828753955e-05, "loss": 0.2202, "step": 11431 }, { "epoch": 1.3556267046128305, "grad_norm": 1.1856603051927073, "learning_rate": 3.8497657905505425e-05, "loss": 0.2476, "step": 11432 }, { "epoch": 1.3557452863749555, "grad_norm": 1.0296567531718979, "learning_rate": 3.849563739903944e-05, "loss": 0.2513, "step": 11433 }, { "epoch": 1.3558638681370805, "grad_norm": 1.0617623246765036, "learning_rate": 3.8493616768160245e-05, "loss": 0.1896, "step": 11434 }, { "epoch": 1.3559824498992055, "grad_norm": 1.4600344102096678, "learning_rate": 3.849159601288643e-05, "loss": 0.2537, "step": 11435 }, { "epoch": 1.3561010316613304, "grad_norm": 1.7070587640269037, "learning_rate": 3.8489575133236666e-05, "loss": 0.4201, "step": 11436 }, { "epoch": 1.3562196134234554, "grad_norm": 1.4783749561369266, "learning_rate": 3.848755412922955e-05, "loss": 0.2688, "step": 11437 }, { "epoch": 1.3563381951855804, "grad_norm": 0.9654320979982584, "learning_rate": 3.848553300088372e-05, "loss": 0.1826, "step": 11438 }, { "epoch": 1.3564567769477054, "grad_norm": 1.219432606481704, "learning_rate": 3.8483511748217814e-05, "loss": 0.2474, "step": 11439 }, { "epoch": 1.3565753587098304, "grad_norm": 0.7289999967829813, "learning_rate": 3.8481490371250475e-05, "loss": 0.1249, "step": 11440 }, { "epoch": 1.3566939404719554, "grad_norm": 1.2263985525783696, "learning_rate": 3.8479468870000316e-05, "loss": 0.2651, "step": 11441 }, { "epoch": 1.3568125222340803, "grad_norm": 1.0052616128333935, "learning_rate": 3.8477447244485984e-05, "loss": 0.2072, "step": 11442 }, { "epoch": 1.3569311039962053, "grad_norm": 1.0048199491247325, "learning_rate": 3.847542549472612e-05, "loss": 0.168, "step": 11443 }, { "epoch": 1.3570496857583303, "grad_norm": 1.2621016803257394, "learning_rate": 3.847340362073936e-05, "loss": 0.2765, "step": 11444 }, { "epoch": 1.3571682675204553, "grad_norm": 1.0924651932739444, "learning_rate": 3.8471381622544334e-05, "loss": 0.2425, "step": 11445 }, { "epoch": 1.3572868492825805, "grad_norm": 0.8015895859765464, "learning_rate": 3.84693595001597e-05, "loss": 0.1887, "step": 11446 }, { "epoch": 1.3574054310447052, "grad_norm": 1.005734182431909, "learning_rate": 3.8467337253604075e-05, "loss": 0.1423, "step": 11447 }, { "epoch": 1.3575240128068304, "grad_norm": 1.3060679015396828, "learning_rate": 3.8465314882896116e-05, "loss": 0.2507, "step": 11448 }, { "epoch": 1.3576425945689552, "grad_norm": 1.0049221637216814, "learning_rate": 3.8463292388054483e-05, "loss": 0.1895, "step": 11449 }, { "epoch": 1.3577611763310804, "grad_norm": 0.9269149054986429, "learning_rate": 3.8461269769097784e-05, "loss": 0.2073, "step": 11450 }, { "epoch": 1.3578797580932052, "grad_norm": 1.197734536741664, "learning_rate": 3.84592470260447e-05, "loss": 0.2147, "step": 11451 }, { "epoch": 1.3579983398553304, "grad_norm": 0.7225249640486606, "learning_rate": 3.845722415891385e-05, "loss": 0.1632, "step": 11452 }, { "epoch": 1.3581169216174551, "grad_norm": 1.171865478296089, "learning_rate": 3.84552011677239e-05, "loss": 0.275, "step": 11453 }, { "epoch": 1.3582355033795803, "grad_norm": 1.0862442512134167, "learning_rate": 3.84531780524935e-05, "loss": 0.2094, "step": 11454 }, { "epoch": 1.358354085141705, "grad_norm": 0.8161204653052915, "learning_rate": 3.845115481324129e-05, "loss": 0.1788, "step": 11455 }, { "epoch": 1.3584726669038303, "grad_norm": 1.2225481205543498, "learning_rate": 3.844913144998593e-05, "loss": 0.2468, "step": 11456 }, { "epoch": 1.358591248665955, "grad_norm": 1.3136179162515014, "learning_rate": 3.8447107962746077e-05, "loss": 0.2373, "step": 11457 }, { "epoch": 1.3587098304280802, "grad_norm": 0.898385227629991, "learning_rate": 3.844508435154037e-05, "loss": 0.1921, "step": 11458 }, { "epoch": 1.3588284121902052, "grad_norm": 1.3606633751763424, "learning_rate": 3.844306061638747e-05, "loss": 0.3157, "step": 11459 }, { "epoch": 1.3589469939523302, "grad_norm": 1.2579571844195148, "learning_rate": 3.844103675730605e-05, "loss": 0.2832, "step": 11460 }, { "epoch": 1.3590655757144552, "grad_norm": 1.0408158775680338, "learning_rate": 3.8439012774314744e-05, "loss": 0.26, "step": 11461 }, { "epoch": 1.3591841574765802, "grad_norm": 1.0095491466523432, "learning_rate": 3.843698866743221e-05, "loss": 0.2029, "step": 11462 }, { "epoch": 1.3593027392387051, "grad_norm": 1.0058870186760083, "learning_rate": 3.843496443667713e-05, "loss": 0.2253, "step": 11463 }, { "epoch": 1.3594213210008301, "grad_norm": 1.107608745386719, "learning_rate": 3.843294008206816e-05, "loss": 0.2284, "step": 11464 }, { "epoch": 1.359539902762955, "grad_norm": 0.8224455272525313, "learning_rate": 3.843091560362395e-05, "loss": 0.2049, "step": 11465 }, { "epoch": 1.35965848452508, "grad_norm": 1.1190316814987422, "learning_rate": 3.8428891001363157e-05, "loss": 0.215, "step": 11466 }, { "epoch": 1.359777066287205, "grad_norm": 0.9114033992956317, "learning_rate": 3.8426866275304475e-05, "loss": 0.2165, "step": 11467 }, { "epoch": 1.35989564804933, "grad_norm": 0.977401549705111, "learning_rate": 3.842484142546654e-05, "loss": 0.2453, "step": 11468 }, { "epoch": 1.360014229811455, "grad_norm": 0.976894446755083, "learning_rate": 3.8422816451868026e-05, "loss": 0.1721, "step": 11469 }, { "epoch": 1.36013281157358, "grad_norm": 0.8630101727429302, "learning_rate": 3.842079135452761e-05, "loss": 0.198, "step": 11470 }, { "epoch": 1.360251393335705, "grad_norm": 1.434368217782574, "learning_rate": 3.841876613346395e-05, "loss": 0.2354, "step": 11471 }, { "epoch": 1.36036997509783, "grad_norm": 1.3024601317440734, "learning_rate": 3.8416740788695725e-05, "loss": 0.3054, "step": 11472 }, { "epoch": 1.360488556859955, "grad_norm": 1.6649248820599774, "learning_rate": 3.8414715320241607e-05, "loss": 0.3261, "step": 11473 }, { "epoch": 1.36060713862208, "grad_norm": 0.9175847447753697, "learning_rate": 3.841268972812026e-05, "loss": 0.1727, "step": 11474 }, { "epoch": 1.360725720384205, "grad_norm": 0.7431815482127834, "learning_rate": 3.841066401235036e-05, "loss": 0.1701, "step": 11475 }, { "epoch": 1.36084430214633, "grad_norm": 0.900434844073702, "learning_rate": 3.840863817295058e-05, "loss": 0.1682, "step": 11476 }, { "epoch": 1.3609628839084549, "grad_norm": 0.773803422724477, "learning_rate": 3.8406612209939615e-05, "loss": 0.1528, "step": 11477 }, { "epoch": 1.3610814656705799, "grad_norm": 1.256355294037502, "learning_rate": 3.840458612333611e-05, "loss": 0.2643, "step": 11478 }, { "epoch": 1.3612000474327048, "grad_norm": 0.9112005880448195, "learning_rate": 3.840255991315876e-05, "loss": 0.21, "step": 11479 }, { "epoch": 1.3613186291948298, "grad_norm": 1.0700169181612262, "learning_rate": 3.840053357942625e-05, "loss": 0.2439, "step": 11480 }, { "epoch": 1.3614372109569548, "grad_norm": 0.9452277840257948, "learning_rate": 3.839850712215724e-05, "loss": 0.1754, "step": 11481 }, { "epoch": 1.3615557927190798, "grad_norm": 0.7380744725673793, "learning_rate": 3.839648054137044e-05, "loss": 0.1918, "step": 11482 }, { "epoch": 1.3616743744812048, "grad_norm": 0.953591297228943, "learning_rate": 3.839445383708451e-05, "loss": 0.1652, "step": 11483 }, { "epoch": 1.3617929562433297, "grad_norm": 0.8149213630635902, "learning_rate": 3.8392427009318144e-05, "loss": 0.1637, "step": 11484 }, { "epoch": 1.3619115380054547, "grad_norm": 0.9999671125470065, "learning_rate": 3.839040005809002e-05, "loss": 0.1996, "step": 11485 }, { "epoch": 1.3620301197675797, "grad_norm": 1.6573318953247655, "learning_rate": 3.838837298341883e-05, "loss": 0.3543, "step": 11486 }, { "epoch": 1.3621487015297047, "grad_norm": 0.9747477288308484, "learning_rate": 3.838634578532326e-05, "loss": 0.2647, "step": 11487 }, { "epoch": 1.3622672832918297, "grad_norm": 1.0588165656150592, "learning_rate": 3.8384318463822e-05, "loss": 0.2237, "step": 11488 }, { "epoch": 1.3623858650539546, "grad_norm": 1.4264621898063805, "learning_rate": 3.838229101893373e-05, "loss": 0.2617, "step": 11489 }, { "epoch": 1.3625044468160796, "grad_norm": 1.2926945236393466, "learning_rate": 3.838026345067714e-05, "loss": 0.2908, "step": 11490 }, { "epoch": 1.3626230285782046, "grad_norm": 1.2289080478488237, "learning_rate": 3.837823575907095e-05, "loss": 0.2753, "step": 11491 }, { "epoch": 1.3627416103403296, "grad_norm": 0.8101018912421671, "learning_rate": 3.8376207944133815e-05, "loss": 0.1868, "step": 11492 }, { "epoch": 1.3628601921024546, "grad_norm": 0.8769091064184533, "learning_rate": 3.8374180005884454e-05, "loss": 0.2225, "step": 11493 }, { "epoch": 1.3629787738645796, "grad_norm": 1.195922243412925, "learning_rate": 3.8372151944341553e-05, "loss": 0.2523, "step": 11494 }, { "epoch": 1.3630973556267045, "grad_norm": 0.9095187598873063, "learning_rate": 3.837012375952381e-05, "loss": 0.1529, "step": 11495 }, { "epoch": 1.3632159373888295, "grad_norm": 0.9448403451574178, "learning_rate": 3.8368095451449915e-05, "loss": 0.194, "step": 11496 }, { "epoch": 1.3633345191509547, "grad_norm": 1.1431082287109404, "learning_rate": 3.836606702013858e-05, "loss": 0.2198, "step": 11497 }, { "epoch": 1.3634531009130795, "grad_norm": 1.1404967249509663, "learning_rate": 3.836403846560849e-05, "loss": 0.24, "step": 11498 }, { "epoch": 1.3635716826752047, "grad_norm": 1.36666311170755, "learning_rate": 3.836200978787836e-05, "loss": 0.2736, "step": 11499 }, { "epoch": 1.3636902644373294, "grad_norm": 1.025543368466899, "learning_rate": 3.835998098696688e-05, "loss": 0.2328, "step": 11500 }, { "epoch": 1.3638088461994546, "grad_norm": 0.8847607228113296, "learning_rate": 3.835795206289277e-05, "loss": 0.1679, "step": 11501 }, { "epoch": 1.3639274279615794, "grad_norm": 0.7970909242100421, "learning_rate": 3.835592301567471e-05, "loss": 0.1229, "step": 11502 }, { "epoch": 1.3640460097237046, "grad_norm": 1.49047387835025, "learning_rate": 3.835389384533142e-05, "loss": 0.2874, "step": 11503 }, { "epoch": 1.3641645914858294, "grad_norm": 0.912482209221664, "learning_rate": 3.835186455188161e-05, "loss": 0.2111, "step": 11504 }, { "epoch": 1.3642831732479546, "grad_norm": 1.1077398766680024, "learning_rate": 3.834983513534398e-05, "loss": 0.228, "step": 11505 }, { "epoch": 1.3644017550100793, "grad_norm": 2.052197205717078, "learning_rate": 3.834780559573724e-05, "loss": 0.4021, "step": 11506 }, { "epoch": 1.3645203367722045, "grad_norm": 1.063067780351459, "learning_rate": 3.83457759330801e-05, "loss": 0.2013, "step": 11507 }, { "epoch": 1.3646389185343293, "grad_norm": 1.0036823501656926, "learning_rate": 3.834374614739127e-05, "loss": 0.242, "step": 11508 }, { "epoch": 1.3647575002964545, "grad_norm": 1.1326788997355022, "learning_rate": 3.834171623868946e-05, "loss": 0.2513, "step": 11509 }, { "epoch": 1.3648760820585795, "grad_norm": 0.8493611847781761, "learning_rate": 3.833968620699339e-05, "loss": 0.1966, "step": 11510 }, { "epoch": 1.3649946638207044, "grad_norm": 0.9890119229361736, "learning_rate": 3.833765605232178e-05, "loss": 0.2178, "step": 11511 }, { "epoch": 1.3651132455828294, "grad_norm": 1.1343872961005985, "learning_rate": 3.8335625774693326e-05, "loss": 0.2626, "step": 11512 }, { "epoch": 1.3652318273449544, "grad_norm": 1.2166273732383015, "learning_rate": 3.833359537412676e-05, "loss": 0.3019, "step": 11513 }, { "epoch": 1.3653504091070794, "grad_norm": 1.1402305228464653, "learning_rate": 3.83315648506408e-05, "loss": 0.2164, "step": 11514 }, { "epoch": 1.3654689908692044, "grad_norm": 1.503306693888745, "learning_rate": 3.832953420425415e-05, "loss": 0.3534, "step": 11515 }, { "epoch": 1.3655875726313293, "grad_norm": 1.7253554392617687, "learning_rate": 3.8327503434985554e-05, "loss": 0.3209, "step": 11516 }, { "epoch": 1.3657061543934543, "grad_norm": 1.093432528015903, "learning_rate": 3.83254725428537e-05, "loss": 0.2201, "step": 11517 }, { "epoch": 1.3658247361555793, "grad_norm": 0.8079654338126313, "learning_rate": 3.832344152787735e-05, "loss": 0.1727, "step": 11518 }, { "epoch": 1.3659433179177043, "grad_norm": 1.1586627624400723, "learning_rate": 3.8321410390075196e-05, "loss": 0.2257, "step": 11519 }, { "epoch": 1.3660618996798293, "grad_norm": 1.1831525800020468, "learning_rate": 3.831937912946598e-05, "loss": 0.2179, "step": 11520 }, { "epoch": 1.3661804814419543, "grad_norm": 1.0330196534882996, "learning_rate": 3.831734774606841e-05, "loss": 0.2669, "step": 11521 }, { "epoch": 1.3662990632040792, "grad_norm": 1.1053626875582552, "learning_rate": 3.831531623990124e-05, "loss": 0.1981, "step": 11522 }, { "epoch": 1.3664176449662042, "grad_norm": 1.340344268892906, "learning_rate": 3.8313284610983175e-05, "loss": 0.2764, "step": 11523 }, { "epoch": 1.3665362267283292, "grad_norm": 1.3839486056738226, "learning_rate": 3.8311252859332945e-05, "loss": 0.3521, "step": 11524 }, { "epoch": 1.3666548084904542, "grad_norm": 1.824123001805835, "learning_rate": 3.83092209849693e-05, "loss": 0.4044, "step": 11525 }, { "epoch": 1.3667733902525792, "grad_norm": 0.8807458330066636, "learning_rate": 3.830718898791095e-05, "loss": 0.1763, "step": 11526 }, { "epoch": 1.3668919720147041, "grad_norm": 1.1126973618387652, "learning_rate": 3.8305156868176645e-05, "loss": 0.2552, "step": 11527 }, { "epoch": 1.3670105537768291, "grad_norm": 1.0036934910623472, "learning_rate": 3.8303124625785104e-05, "loss": 0.2477, "step": 11528 }, { "epoch": 1.367129135538954, "grad_norm": 0.8512367144386453, "learning_rate": 3.8301092260755064e-05, "loss": 0.1729, "step": 11529 }, { "epoch": 1.367247717301079, "grad_norm": 0.918008725142799, "learning_rate": 3.829905977310527e-05, "loss": 0.2528, "step": 11530 }, { "epoch": 1.367366299063204, "grad_norm": 0.9827566146622403, "learning_rate": 3.829702716285445e-05, "loss": 0.2203, "step": 11531 }, { "epoch": 1.367484880825329, "grad_norm": 1.309830675658511, "learning_rate": 3.829499443002134e-05, "loss": 0.2768, "step": 11532 }, { "epoch": 1.367603462587454, "grad_norm": 1.017628792982879, "learning_rate": 3.82929615746247e-05, "loss": 0.2241, "step": 11533 }, { "epoch": 1.367722044349579, "grad_norm": 1.2112757510268062, "learning_rate": 3.829092859668324e-05, "loss": 0.2735, "step": 11534 }, { "epoch": 1.367840626111704, "grad_norm": 0.9531134267394311, "learning_rate": 3.8288895496215724e-05, "loss": 0.191, "step": 11535 }, { "epoch": 1.367959207873829, "grad_norm": 1.7717897626513173, "learning_rate": 3.828686227324088e-05, "loss": 0.504, "step": 11536 }, { "epoch": 1.368077789635954, "grad_norm": 1.1539688548838405, "learning_rate": 3.8284828927777474e-05, "loss": 0.2758, "step": 11537 }, { "epoch": 1.368196371398079, "grad_norm": 1.0438547459125327, "learning_rate": 3.8282795459844224e-05, "loss": 0.2184, "step": 11538 }, { "epoch": 1.368314953160204, "grad_norm": 1.0848696191484088, "learning_rate": 3.8280761869459894e-05, "loss": 0.2216, "step": 11539 }, { "epoch": 1.3684335349223289, "grad_norm": 1.127010423394401, "learning_rate": 3.827872815664322e-05, "loss": 0.2748, "step": 11540 }, { "epoch": 1.3685521166844539, "grad_norm": 1.1294223157916825, "learning_rate": 3.8276694321412956e-05, "loss": 0.241, "step": 11541 }, { "epoch": 1.3686706984465788, "grad_norm": 0.9698102749287434, "learning_rate": 3.827466036378785e-05, "loss": 0.2347, "step": 11542 }, { "epoch": 1.3687892802087038, "grad_norm": 1.0640896817871968, "learning_rate": 3.827262628378666e-05, "loss": 0.1931, "step": 11543 }, { "epoch": 1.3689078619708288, "grad_norm": 0.9317988244678225, "learning_rate": 3.8270592081428134e-05, "loss": 0.2018, "step": 11544 }, { "epoch": 1.3690264437329538, "grad_norm": 0.9241003050253291, "learning_rate": 3.826855775673102e-05, "loss": 0.2148, "step": 11545 }, { "epoch": 1.369145025495079, "grad_norm": 0.8968866785751012, "learning_rate": 3.8266523309714067e-05, "loss": 0.2259, "step": 11546 }, { "epoch": 1.3692636072572038, "grad_norm": 0.9649333983262626, "learning_rate": 3.8264488740396046e-05, "loss": 0.1959, "step": 11547 }, { "epoch": 1.369382189019329, "grad_norm": 0.9273549362921337, "learning_rate": 3.826245404879571e-05, "loss": 0.1734, "step": 11548 }, { "epoch": 1.3695007707814537, "grad_norm": 0.9587931340697953, "learning_rate": 3.8260419234931794e-05, "loss": 0.1631, "step": 11549 }, { "epoch": 1.369619352543579, "grad_norm": 1.100654722733813, "learning_rate": 3.825838429882308e-05, "loss": 0.2539, "step": 11550 }, { "epoch": 1.3697379343057037, "grad_norm": 1.1756270371914692, "learning_rate": 3.825634924048832e-05, "loss": 0.2464, "step": 11551 }, { "epoch": 1.3698565160678289, "grad_norm": 0.9665802287563825, "learning_rate": 3.8254314059946286e-05, "loss": 0.2167, "step": 11552 }, { "epoch": 1.3699750978299536, "grad_norm": 0.8383600202020025, "learning_rate": 3.825227875721572e-05, "loss": 0.1653, "step": 11553 }, { "epoch": 1.3700936795920788, "grad_norm": 0.7593284772416189, "learning_rate": 3.82502433323154e-05, "loss": 0.1445, "step": 11554 }, { "epoch": 1.3702122613542036, "grad_norm": 1.3742819439173186, "learning_rate": 3.824820778526409e-05, "loss": 0.3419, "step": 11555 }, { "epoch": 1.3703308431163288, "grad_norm": 1.093982319973032, "learning_rate": 3.8246172116080536e-05, "loss": 0.2115, "step": 11556 }, { "epoch": 1.3704494248784536, "grad_norm": 0.8894848345571955, "learning_rate": 3.824413632478353e-05, "loss": 0.2066, "step": 11557 }, { "epoch": 1.3705680066405788, "grad_norm": 1.1068800778365078, "learning_rate": 3.8242100411391825e-05, "loss": 0.2436, "step": 11558 }, { "epoch": 1.3706865884027037, "grad_norm": 1.2381972366975642, "learning_rate": 3.82400643759242e-05, "loss": 0.2716, "step": 11559 }, { "epoch": 1.3708051701648287, "grad_norm": 1.1203800997929338, "learning_rate": 3.82380282183994e-05, "loss": 0.2267, "step": 11560 }, { "epoch": 1.3709237519269537, "grad_norm": 1.0173883597786517, "learning_rate": 3.823599193883622e-05, "loss": 0.1802, "step": 11561 }, { "epoch": 1.3710423336890787, "grad_norm": 1.2049314011661474, "learning_rate": 3.823395553725343e-05, "loss": 0.2087, "step": 11562 }, { "epoch": 1.3711609154512037, "grad_norm": 1.1506471826278661, "learning_rate": 3.8231919013669805e-05, "loss": 0.2158, "step": 11563 }, { "epoch": 1.3712794972133286, "grad_norm": 1.5351631341909235, "learning_rate": 3.82298823681041e-05, "loss": 0.2089, "step": 11564 }, { "epoch": 1.3713980789754536, "grad_norm": 1.0754374741018191, "learning_rate": 3.822784560057512e-05, "loss": 0.2687, "step": 11565 }, { "epoch": 1.3715166607375786, "grad_norm": 1.181780986573648, "learning_rate": 3.8225808711101606e-05, "loss": 0.2546, "step": 11566 }, { "epoch": 1.3716352424997036, "grad_norm": 1.0863857071217833, "learning_rate": 3.822377169970236e-05, "loss": 0.2276, "step": 11567 }, { "epoch": 1.3717538242618286, "grad_norm": 1.1557923505945404, "learning_rate": 3.822173456639616e-05, "loss": 0.2198, "step": 11568 }, { "epoch": 1.3718724060239536, "grad_norm": 0.8602494473676461, "learning_rate": 3.8219697311201776e-05, "loss": 0.2225, "step": 11569 }, { "epoch": 1.3719909877860785, "grad_norm": 0.9471861373492907, "learning_rate": 3.8217659934137986e-05, "loss": 0.1714, "step": 11570 }, { "epoch": 1.3721095695482035, "grad_norm": 1.2191310002122917, "learning_rate": 3.8215622435223595e-05, "loss": 0.2694, "step": 11571 }, { "epoch": 1.3722281513103285, "grad_norm": 0.9498621765567019, "learning_rate": 3.8213584814477364e-05, "loss": 0.2492, "step": 11572 }, { "epoch": 1.3723467330724535, "grad_norm": 1.1840802267198383, "learning_rate": 3.8211547071918085e-05, "loss": 0.2118, "step": 11573 }, { "epoch": 1.3724653148345785, "grad_norm": 0.9708518021083982, "learning_rate": 3.820950920756454e-05, "loss": 0.2494, "step": 11574 }, { "epoch": 1.3725838965967034, "grad_norm": 0.9255620189134793, "learning_rate": 3.8207471221435524e-05, "loss": 0.2241, "step": 11575 }, { "epoch": 1.3727024783588284, "grad_norm": 1.1768253505733437, "learning_rate": 3.8205433113549824e-05, "loss": 0.2146, "step": 11576 }, { "epoch": 1.3728210601209534, "grad_norm": 0.8830450319762738, "learning_rate": 3.820339488392621e-05, "loss": 0.1895, "step": 11577 }, { "epoch": 1.3729396418830784, "grad_norm": 1.6767068833757066, "learning_rate": 3.82013565325835e-05, "loss": 0.3324, "step": 11578 }, { "epoch": 1.3730582236452034, "grad_norm": 1.136085427530583, "learning_rate": 3.819931805954047e-05, "loss": 0.2611, "step": 11579 }, { "epoch": 1.3731768054073283, "grad_norm": 1.141521663536434, "learning_rate": 3.8197279464815906e-05, "loss": 0.2312, "step": 11580 }, { "epoch": 1.3732953871694533, "grad_norm": 1.1129458080629637, "learning_rate": 3.819524074842861e-05, "loss": 0.2197, "step": 11581 }, { "epoch": 1.3734139689315783, "grad_norm": 1.0712925399265956, "learning_rate": 3.819320191039737e-05, "loss": 0.2008, "step": 11582 }, { "epoch": 1.3735325506937033, "grad_norm": 0.9896909329715283, "learning_rate": 3.8191162950741e-05, "loss": 0.2692, "step": 11583 }, { "epoch": 1.3736511324558283, "grad_norm": 1.015216987280327, "learning_rate": 3.8189123869478276e-05, "loss": 0.2214, "step": 11584 }, { "epoch": 1.3737697142179532, "grad_norm": 0.9429739214065641, "learning_rate": 3.818708466662801e-05, "loss": 0.1657, "step": 11585 }, { "epoch": 1.3738882959800782, "grad_norm": 1.4891212577670847, "learning_rate": 3.8185045342208986e-05, "loss": 0.2929, "step": 11586 }, { "epoch": 1.3740068777422032, "grad_norm": 1.5069521992734345, "learning_rate": 3.818300589624002e-05, "loss": 0.3735, "step": 11587 }, { "epoch": 1.3741254595043282, "grad_norm": 1.2264119528591744, "learning_rate": 3.81809663287399e-05, "loss": 0.2318, "step": 11588 }, { "epoch": 1.3742440412664532, "grad_norm": 1.0675630651723664, "learning_rate": 3.817892663972743e-05, "loss": 0.2338, "step": 11589 }, { "epoch": 1.3743626230285781, "grad_norm": 0.9458422756799072, "learning_rate": 3.817688682922143e-05, "loss": 0.2484, "step": 11590 }, { "epoch": 1.3744812047907031, "grad_norm": 0.9475420898628014, "learning_rate": 3.817484689724069e-05, "loss": 0.2186, "step": 11591 }, { "epoch": 1.374599786552828, "grad_norm": 1.0410249974497925, "learning_rate": 3.817280684380401e-05, "loss": 0.2179, "step": 11592 }, { "epoch": 1.374718368314953, "grad_norm": 0.9529085605981507, "learning_rate": 3.817076666893021e-05, "loss": 0.2341, "step": 11593 }, { "epoch": 1.374836950077078, "grad_norm": 0.9936687355267575, "learning_rate": 3.816872637263809e-05, "loss": 0.2282, "step": 11594 }, { "epoch": 1.374955531839203, "grad_norm": 1.0346597098881127, "learning_rate": 3.8166685954946456e-05, "loss": 0.2294, "step": 11595 }, { "epoch": 1.375074113601328, "grad_norm": 1.0148193278832296, "learning_rate": 3.8164645415874136e-05, "loss": 0.2002, "step": 11596 }, { "epoch": 1.3751926953634532, "grad_norm": 1.1855932846075419, "learning_rate": 3.816260475543991e-05, "loss": 0.2413, "step": 11597 }, { "epoch": 1.375311277125578, "grad_norm": 0.9187724556456782, "learning_rate": 3.816056397366263e-05, "loss": 0.1612, "step": 11598 }, { "epoch": 1.3754298588877032, "grad_norm": 1.0141195498137883, "learning_rate": 3.8158523070561084e-05, "loss": 0.1624, "step": 11599 }, { "epoch": 1.375548440649828, "grad_norm": 0.8879388542556891, "learning_rate": 3.815648204615408e-05, "loss": 0.1734, "step": 11600 }, { "epoch": 1.3756670224119532, "grad_norm": 0.99433493475632, "learning_rate": 3.8154440900460456e-05, "loss": 0.2118, "step": 11601 }, { "epoch": 1.375785604174078, "grad_norm": 0.8045154958643767, "learning_rate": 3.815239963349901e-05, "loss": 0.1331, "step": 11602 }, { "epoch": 1.3759041859362031, "grad_norm": 0.8049675780375013, "learning_rate": 3.8150358245288576e-05, "loss": 0.1671, "step": 11603 }, { "epoch": 1.3760227676983279, "grad_norm": 1.1362749011551458, "learning_rate": 3.814831673584796e-05, "loss": 0.2832, "step": 11604 }, { "epoch": 1.376141349460453, "grad_norm": 0.9426112946406048, "learning_rate": 3.8146275105195986e-05, "loss": 0.2094, "step": 11605 }, { "epoch": 1.3762599312225778, "grad_norm": 1.0271942317580842, "learning_rate": 3.814423335335148e-05, "loss": 0.2218, "step": 11606 }, { "epoch": 1.376378512984703, "grad_norm": 0.9351871789873166, "learning_rate": 3.814219148033326e-05, "loss": 0.2355, "step": 11607 }, { "epoch": 1.376497094746828, "grad_norm": 0.8841298083946295, "learning_rate": 3.8140149486160156e-05, "loss": 0.2, "step": 11608 }, { "epoch": 1.376615676508953, "grad_norm": 1.3034162019656979, "learning_rate": 3.813810737085098e-05, "loss": 0.2954, "step": 11609 }, { "epoch": 1.376734258271078, "grad_norm": 0.851182922106267, "learning_rate": 3.813606513442456e-05, "loss": 0.1462, "step": 11610 }, { "epoch": 1.376852840033203, "grad_norm": 1.0758896158256905, "learning_rate": 3.8134022776899735e-05, "loss": 0.2417, "step": 11611 }, { "epoch": 1.376971421795328, "grad_norm": 0.8121975138912816, "learning_rate": 3.813198029829532e-05, "loss": 0.1669, "step": 11612 }, { "epoch": 1.377090003557453, "grad_norm": 1.633042659954068, "learning_rate": 3.812993769863016e-05, "loss": 0.2659, "step": 11613 }, { "epoch": 1.377208585319578, "grad_norm": 1.0667632526822566, "learning_rate": 3.812789497792307e-05, "loss": 0.2201, "step": 11614 }, { "epoch": 1.3773271670817029, "grad_norm": 1.2622523259623089, "learning_rate": 3.81258521361929e-05, "loss": 0.2976, "step": 11615 }, { "epoch": 1.3774457488438279, "grad_norm": 1.0675948294172282, "learning_rate": 3.812380917345845e-05, "loss": 0.2095, "step": 11616 }, { "epoch": 1.3775643306059528, "grad_norm": 0.7717286231292465, "learning_rate": 3.8121766089738586e-05, "loss": 0.1719, "step": 11617 }, { "epoch": 1.3776829123680778, "grad_norm": 1.109007351821086, "learning_rate": 3.811972288505212e-05, "loss": 0.2306, "step": 11618 }, { "epoch": 1.3778014941302028, "grad_norm": 1.0634386313111028, "learning_rate": 3.8117679559417894e-05, "loss": 0.2265, "step": 11619 }, { "epoch": 1.3779200758923278, "grad_norm": 1.7928681841100762, "learning_rate": 3.8115636112854757e-05, "loss": 0.3998, "step": 11620 }, { "epoch": 1.3780386576544528, "grad_norm": 1.079541693607142, "learning_rate": 3.811359254538154e-05, "loss": 0.2202, "step": 11621 }, { "epoch": 1.3781572394165778, "grad_norm": 1.2273901826041866, "learning_rate": 3.8111548857017074e-05, "loss": 0.2429, "step": 11622 }, { "epoch": 1.3782758211787027, "grad_norm": 1.396203445518278, "learning_rate": 3.810950504778021e-05, "loss": 0.3112, "step": 11623 }, { "epoch": 1.3783944029408277, "grad_norm": 0.9264357221236016, "learning_rate": 3.8107461117689794e-05, "loss": 0.2397, "step": 11624 }, { "epoch": 1.3785129847029527, "grad_norm": 1.146154163362731, "learning_rate": 3.810541706676465e-05, "loss": 0.2221, "step": 11625 }, { "epoch": 1.3786315664650777, "grad_norm": 1.1765154749311542, "learning_rate": 3.8103372895023634e-05, "loss": 0.2548, "step": 11626 }, { "epoch": 1.3787501482272027, "grad_norm": 1.0137171693162335, "learning_rate": 3.810132860248559e-05, "loss": 0.2036, "step": 11627 }, { "epoch": 1.3788687299893276, "grad_norm": 1.1747356694680524, "learning_rate": 3.809928418916936e-05, "loss": 0.2546, "step": 11628 }, { "epoch": 1.3789873117514526, "grad_norm": 0.8178394483600226, "learning_rate": 3.809723965509379e-05, "loss": 0.171, "step": 11629 }, { "epoch": 1.3791058935135776, "grad_norm": 1.0185428391802518, "learning_rate": 3.8095195000277726e-05, "loss": 0.239, "step": 11630 }, { "epoch": 1.3792244752757026, "grad_norm": 1.518794680485174, "learning_rate": 3.809315022474003e-05, "loss": 0.3, "step": 11631 }, { "epoch": 1.3793430570378276, "grad_norm": 1.1779802847201954, "learning_rate": 3.809110532849955e-05, "loss": 0.2482, "step": 11632 }, { "epoch": 1.3794616387999525, "grad_norm": 1.16300185382879, "learning_rate": 3.8089060311575125e-05, "loss": 0.2317, "step": 11633 }, { "epoch": 1.3795802205620775, "grad_norm": 0.9136232371871255, "learning_rate": 3.808701517398562e-05, "loss": 0.1528, "step": 11634 }, { "epoch": 1.3796988023242025, "grad_norm": 0.9047472022175441, "learning_rate": 3.808496991574988e-05, "loss": 0.2689, "step": 11635 }, { "epoch": 1.3798173840863275, "grad_norm": 1.1227952552202538, "learning_rate": 3.8082924536886765e-05, "loss": 0.2256, "step": 11636 }, { "epoch": 1.3799359658484525, "grad_norm": 1.480725492619134, "learning_rate": 3.808087903741513e-05, "loss": 0.3263, "step": 11637 }, { "epoch": 1.3800545476105774, "grad_norm": 1.33639984186706, "learning_rate": 3.8078833417353823e-05, "loss": 0.2109, "step": 11638 }, { "epoch": 1.3801731293727024, "grad_norm": 0.9780719283877892, "learning_rate": 3.807678767672171e-05, "loss": 0.218, "step": 11639 }, { "epoch": 1.3802917111348274, "grad_norm": 0.9316156148818309, "learning_rate": 3.807474181553766e-05, "loss": 0.2025, "step": 11640 }, { "epoch": 1.3804102928969524, "grad_norm": 1.4107972915352154, "learning_rate": 3.8072695833820526e-05, "loss": 0.2474, "step": 11641 }, { "epoch": 1.3805288746590774, "grad_norm": 0.9223849617949521, "learning_rate": 3.807064973158916e-05, "loss": 0.1914, "step": 11642 }, { "epoch": 1.3806474564212023, "grad_norm": 1.1148120253183535, "learning_rate": 3.8068603508862434e-05, "loss": 0.2177, "step": 11643 }, { "epoch": 1.3807660381833273, "grad_norm": 1.5213213522216134, "learning_rate": 3.806655716565921e-05, "loss": 0.3332, "step": 11644 }, { "epoch": 1.3808846199454523, "grad_norm": 0.9529819526746945, "learning_rate": 3.806451070199835e-05, "loss": 0.2316, "step": 11645 }, { "epoch": 1.3810032017075775, "grad_norm": 1.126099537681736, "learning_rate": 3.8062464117898724e-05, "loss": 0.266, "step": 11646 }, { "epoch": 1.3811217834697023, "grad_norm": 1.1656469345886902, "learning_rate": 3.80604174133792e-05, "loss": 0.2484, "step": 11647 }, { "epoch": 1.3812403652318275, "grad_norm": 0.8726092315288855, "learning_rate": 3.8058370588458636e-05, "loss": 0.1724, "step": 11648 }, { "epoch": 1.3813589469939522, "grad_norm": 1.2099201150480496, "learning_rate": 3.805632364315591e-05, "loss": 0.2263, "step": 11649 }, { "epoch": 1.3814775287560774, "grad_norm": 2.072644380056955, "learning_rate": 3.8054276577489886e-05, "loss": 0.5645, "step": 11650 }, { "epoch": 1.3815961105182022, "grad_norm": 2.3265894384322734, "learning_rate": 3.805222939147945e-05, "loss": 0.6096, "step": 11651 }, { "epoch": 1.3817146922803274, "grad_norm": 0.9084863985400732, "learning_rate": 3.805018208514347e-05, "loss": 0.1949, "step": 11652 }, { "epoch": 1.3818332740424522, "grad_norm": 1.1455596802559262, "learning_rate": 3.80481346585008e-05, "loss": 0.2712, "step": 11653 }, { "epoch": 1.3819518558045774, "grad_norm": 1.3016917926864515, "learning_rate": 3.804608711157034e-05, "loss": 0.2982, "step": 11654 }, { "epoch": 1.3820704375667021, "grad_norm": 1.1178173485500835, "learning_rate": 3.804403944437095e-05, "loss": 0.2413, "step": 11655 }, { "epoch": 1.3821890193288273, "grad_norm": 1.0399181061615257, "learning_rate": 3.804199165692151e-05, "loss": 0.2305, "step": 11656 }, { "epoch": 1.382307601090952, "grad_norm": 1.152040362215518, "learning_rate": 3.8039943749240905e-05, "loss": 0.3089, "step": 11657 }, { "epoch": 1.3824261828530773, "grad_norm": 1.286512207853283, "learning_rate": 3.8037895721348007e-05, "loss": 0.2987, "step": 11658 }, { "epoch": 1.3825447646152023, "grad_norm": 1.1408209034155599, "learning_rate": 3.8035847573261695e-05, "loss": 0.2888, "step": 11659 }, { "epoch": 1.3826633463773272, "grad_norm": 1.3056536833291719, "learning_rate": 3.8033799305000856e-05, "loss": 0.3922, "step": 11660 }, { "epoch": 1.3827819281394522, "grad_norm": 1.0222787632008368, "learning_rate": 3.803175091658437e-05, "loss": 0.243, "step": 11661 }, { "epoch": 1.3829005099015772, "grad_norm": 0.8423498880318245, "learning_rate": 3.802970240803112e-05, "loss": 0.1834, "step": 11662 }, { "epoch": 1.3830190916637022, "grad_norm": 0.9798343266481615, "learning_rate": 3.8027653779359995e-05, "loss": 0.264, "step": 11663 }, { "epoch": 1.3831376734258272, "grad_norm": 1.515512063306586, "learning_rate": 3.802560503058988e-05, "loss": 0.4355, "step": 11664 }, { "epoch": 1.3832562551879521, "grad_norm": 1.3241622408159397, "learning_rate": 3.802355616173966e-05, "loss": 0.2996, "step": 11665 }, { "epoch": 1.3833748369500771, "grad_norm": 1.027519187556859, "learning_rate": 3.802150717282822e-05, "loss": 0.2413, "step": 11666 }, { "epoch": 1.383493418712202, "grad_norm": 1.3940291603035448, "learning_rate": 3.8019458063874445e-05, "loss": 0.2797, "step": 11667 }, { "epoch": 1.383612000474327, "grad_norm": 1.1145297411824455, "learning_rate": 3.801740883489724e-05, "loss": 0.241, "step": 11668 }, { "epoch": 1.383730582236452, "grad_norm": 1.9317419340792936, "learning_rate": 3.801535948591548e-05, "loss": 0.2815, "step": 11669 }, { "epoch": 1.383849163998577, "grad_norm": 1.336011361524933, "learning_rate": 3.801331001694808e-05, "loss": 0.3236, "step": 11670 }, { "epoch": 1.383967745760702, "grad_norm": 0.8520568292283144, "learning_rate": 3.801126042801391e-05, "loss": 0.1993, "step": 11671 }, { "epoch": 1.384086327522827, "grad_norm": 1.1248951978316697, "learning_rate": 3.8009210719131876e-05, "loss": 0.3045, "step": 11672 }, { "epoch": 1.384204909284952, "grad_norm": 0.9817534650911878, "learning_rate": 3.800716089032088e-05, "loss": 0.2435, "step": 11673 }, { "epoch": 1.384323491047077, "grad_norm": 1.0523897454457578, "learning_rate": 3.80051109415998e-05, "loss": 0.2522, "step": 11674 }, { "epoch": 1.384442072809202, "grad_norm": 0.950664754110988, "learning_rate": 3.800306087298755e-05, "loss": 0.1879, "step": 11675 }, { "epoch": 1.384560654571327, "grad_norm": 0.8518874958355376, "learning_rate": 3.8001010684503023e-05, "loss": 0.1772, "step": 11676 }, { "epoch": 1.384679236333452, "grad_norm": 0.9346029666852868, "learning_rate": 3.799896037616513e-05, "loss": 0.2269, "step": 11677 }, { "epoch": 1.384797818095577, "grad_norm": 0.9342471304717896, "learning_rate": 3.799690994799275e-05, "loss": 0.2379, "step": 11678 }, { "epoch": 1.3849163998577019, "grad_norm": 0.7594883705202069, "learning_rate": 3.79948594000048e-05, "loss": 0.1857, "step": 11679 }, { "epoch": 1.3850349816198269, "grad_norm": 1.1956819616438479, "learning_rate": 3.7992808732220174e-05, "loss": 0.2236, "step": 11680 }, { "epoch": 1.3851535633819518, "grad_norm": 0.9501790191938247, "learning_rate": 3.7990757944657795e-05, "loss": 0.2102, "step": 11681 }, { "epoch": 1.3852721451440768, "grad_norm": 1.0479365000232284, "learning_rate": 3.798870703733656e-05, "loss": 0.2162, "step": 11682 }, { "epoch": 1.3853907269062018, "grad_norm": 0.9181600166156222, "learning_rate": 3.798665601027537e-05, "loss": 0.2081, "step": 11683 }, { "epoch": 1.3855093086683268, "grad_norm": 1.1384317201913023, "learning_rate": 3.798460486349314e-05, "loss": 0.2052, "step": 11684 }, { "epoch": 1.3856278904304518, "grad_norm": 1.375226648540397, "learning_rate": 3.798255359700877e-05, "loss": 0.3145, "step": 11685 }, { "epoch": 1.3857464721925767, "grad_norm": 0.9720490622329074, "learning_rate": 3.7980502210841184e-05, "loss": 0.1543, "step": 11686 }, { "epoch": 1.3858650539547017, "grad_norm": 1.0974139392926017, "learning_rate": 3.7978450705009285e-05, "loss": 0.2259, "step": 11687 }, { "epoch": 1.3859836357168267, "grad_norm": 1.1655074874349016, "learning_rate": 3.797639907953198e-05, "loss": 0.2282, "step": 11688 }, { "epoch": 1.3861022174789517, "grad_norm": 1.1676214107590557, "learning_rate": 3.7974347334428186e-05, "loss": 0.2547, "step": 11689 }, { "epoch": 1.3862207992410767, "grad_norm": 0.9811833613767394, "learning_rate": 3.7972295469716824e-05, "loss": 0.1722, "step": 11690 }, { "epoch": 1.3863393810032016, "grad_norm": 0.9217234419740634, "learning_rate": 3.79702434854168e-05, "loss": 0.2052, "step": 11691 }, { "epoch": 1.3864579627653266, "grad_norm": 1.249622425796674, "learning_rate": 3.7968191381547044e-05, "loss": 0.2479, "step": 11692 }, { "epoch": 1.3865765445274516, "grad_norm": 0.9470408896613219, "learning_rate": 3.796613915812647e-05, "loss": 0.1989, "step": 11693 }, { "epoch": 1.3866951262895766, "grad_norm": 1.0287463204310414, "learning_rate": 3.7964086815173985e-05, "loss": 0.2309, "step": 11694 }, { "epoch": 1.3868137080517016, "grad_norm": 1.2883314885896133, "learning_rate": 3.796203435270852e-05, "loss": 0.3235, "step": 11695 }, { "epoch": 1.3869322898138265, "grad_norm": 1.1902605012635654, "learning_rate": 3.795998177074899e-05, "loss": 0.2782, "step": 11696 }, { "epoch": 1.3870508715759517, "grad_norm": 0.8943886225828693, "learning_rate": 3.795792906931432e-05, "loss": 0.217, "step": 11697 }, { "epoch": 1.3871694533380765, "grad_norm": 1.0353547084222592, "learning_rate": 3.795587624842344e-05, "loss": 0.208, "step": 11698 }, { "epoch": 1.3872880351002017, "grad_norm": 0.9753218361465616, "learning_rate": 3.795382330809526e-05, "loss": 0.2515, "step": 11699 }, { "epoch": 1.3874066168623265, "grad_norm": 1.2240788452078855, "learning_rate": 3.7951770248348714e-05, "loss": 0.2029, "step": 11700 }, { "epoch": 1.3875251986244517, "grad_norm": 0.849879815525532, "learning_rate": 3.794971706920274e-05, "loss": 0.1781, "step": 11701 }, { "epoch": 1.3876437803865764, "grad_norm": 1.2670248028831297, "learning_rate": 3.794766377067624e-05, "loss": 0.2519, "step": 11702 }, { "epoch": 1.3877623621487016, "grad_norm": 0.870006188856422, "learning_rate": 3.794561035278818e-05, "loss": 0.1735, "step": 11703 }, { "epoch": 1.3878809439108264, "grad_norm": 1.1249852039877075, "learning_rate": 3.794355681555745e-05, "loss": 0.2285, "step": 11704 }, { "epoch": 1.3879995256729516, "grad_norm": 1.236465061605593, "learning_rate": 3.7941503159003e-05, "loss": 0.2502, "step": 11705 }, { "epoch": 1.3881181074350764, "grad_norm": 1.3198892442342647, "learning_rate": 3.7939449383143765e-05, "loss": 0.2455, "step": 11706 }, { "epoch": 1.3882366891972016, "grad_norm": 0.8376962412763824, "learning_rate": 3.793739548799867e-05, "loss": 0.1525, "step": 11707 }, { "epoch": 1.3883552709593265, "grad_norm": 0.8736679623474605, "learning_rate": 3.793534147358666e-05, "loss": 0.2194, "step": 11708 }, { "epoch": 1.3884738527214515, "grad_norm": 0.9773442219153157, "learning_rate": 3.793328733992666e-05, "loss": 0.2265, "step": 11709 }, { "epoch": 1.3885924344835765, "grad_norm": 0.9678286249321079, "learning_rate": 3.793123308703761e-05, "loss": 0.1702, "step": 11710 }, { "epoch": 1.3887110162457015, "grad_norm": 1.3757003915674009, "learning_rate": 3.792917871493844e-05, "loss": 0.3329, "step": 11711 }, { "epoch": 1.3888295980078265, "grad_norm": 1.307952078222463, "learning_rate": 3.792712422364812e-05, "loss": 0.2832, "step": 11712 }, { "epoch": 1.3889481797699514, "grad_norm": 1.1528172371530734, "learning_rate": 3.7925069613185544e-05, "loss": 0.299, "step": 11713 }, { "epoch": 1.3890667615320764, "grad_norm": 1.33519536498042, "learning_rate": 3.792301488356969e-05, "loss": 0.2431, "step": 11714 }, { "epoch": 1.3891853432942014, "grad_norm": 1.3736881808250359, "learning_rate": 3.7920960034819474e-05, "loss": 0.2634, "step": 11715 }, { "epoch": 1.3893039250563264, "grad_norm": 0.8250222084118504, "learning_rate": 3.7918905066953856e-05, "loss": 0.1841, "step": 11716 }, { "epoch": 1.3894225068184514, "grad_norm": 1.1041381911855892, "learning_rate": 3.791684997999178e-05, "loss": 0.2795, "step": 11717 }, { "epoch": 1.3895410885805763, "grad_norm": 1.04829278744971, "learning_rate": 3.7914794773952186e-05, "loss": 0.2013, "step": 11718 }, { "epoch": 1.3896596703427013, "grad_norm": 0.9036315199865541, "learning_rate": 3.7912739448854015e-05, "loss": 0.211, "step": 11719 }, { "epoch": 1.3897782521048263, "grad_norm": 1.2388078387566788, "learning_rate": 3.791068400471623e-05, "loss": 0.3346, "step": 11720 }, { "epoch": 1.3898968338669513, "grad_norm": 1.0278791585679479, "learning_rate": 3.790862844155776e-05, "loss": 0.2372, "step": 11721 }, { "epoch": 1.3900154156290763, "grad_norm": 1.1933092327746997, "learning_rate": 3.7906572759397574e-05, "loss": 0.2939, "step": 11722 }, { "epoch": 1.3901339973912012, "grad_norm": 0.8871064739745295, "learning_rate": 3.790451695825461e-05, "loss": 0.1589, "step": 11723 }, { "epoch": 1.3902525791533262, "grad_norm": 0.8868960492789336, "learning_rate": 3.790246103814783e-05, "loss": 0.1856, "step": 11724 }, { "epoch": 1.3903711609154512, "grad_norm": 0.9453466696642369, "learning_rate": 3.7900404999096176e-05, "loss": 0.1974, "step": 11725 }, { "epoch": 1.3904897426775762, "grad_norm": 1.146836015019331, "learning_rate": 3.789834884111861e-05, "loss": 0.319, "step": 11726 }, { "epoch": 1.3906083244397012, "grad_norm": 0.7689444107893225, "learning_rate": 3.789629256423408e-05, "loss": 0.1597, "step": 11727 }, { "epoch": 1.3907269062018262, "grad_norm": 0.8697322990585152, "learning_rate": 3.789423616846155e-05, "loss": 0.1964, "step": 11728 }, { "epoch": 1.3908454879639511, "grad_norm": 1.4327668193771204, "learning_rate": 3.7892179653819974e-05, "loss": 0.2773, "step": 11729 }, { "epoch": 1.3909640697260761, "grad_norm": 1.0005031257388581, "learning_rate": 3.789012302032831e-05, "loss": 0.2074, "step": 11730 }, { "epoch": 1.391082651488201, "grad_norm": 0.8623139045757928, "learning_rate": 3.788806626800553e-05, "loss": 0.1771, "step": 11731 }, { "epoch": 1.391201233250326, "grad_norm": 1.07298305362667, "learning_rate": 3.788600939687057e-05, "loss": 0.2378, "step": 11732 }, { "epoch": 1.391319815012451, "grad_norm": 1.1404889360853088, "learning_rate": 3.788395240694241e-05, "loss": 0.2385, "step": 11733 }, { "epoch": 1.391438396774576, "grad_norm": 1.2103331975844942, "learning_rate": 3.788189529824e-05, "loss": 0.2908, "step": 11734 }, { "epoch": 1.391556978536701, "grad_norm": 2.7156189389018635, "learning_rate": 3.787983807078233e-05, "loss": 0.1935, "step": 11735 }, { "epoch": 1.391675560298826, "grad_norm": 1.5896943841687383, "learning_rate": 3.787778072458833e-05, "loss": 0.2962, "step": 11736 }, { "epoch": 1.391794142060951, "grad_norm": 1.4440622629811657, "learning_rate": 3.7875723259677e-05, "loss": 0.3253, "step": 11737 }, { "epoch": 1.391912723823076, "grad_norm": 0.8190177231102774, "learning_rate": 3.787366567606727e-05, "loss": 0.1758, "step": 11738 }, { "epoch": 1.392031305585201, "grad_norm": 1.6331976136890054, "learning_rate": 3.7871607973778144e-05, "loss": 0.4214, "step": 11739 }, { "epoch": 1.392149887347326, "grad_norm": 1.2860629235938787, "learning_rate": 3.786955015282857e-05, "loss": 0.223, "step": 11740 }, { "epoch": 1.392268469109451, "grad_norm": 1.1303607993400593, "learning_rate": 3.786749221323752e-05, "loss": 0.2812, "step": 11741 }, { "epoch": 1.3923870508715759, "grad_norm": 1.3279355384013456, "learning_rate": 3.786543415502399e-05, "loss": 0.2755, "step": 11742 }, { "epoch": 1.3925056326337009, "grad_norm": 0.9986038301181903, "learning_rate": 3.7863375978206915e-05, "loss": 0.2531, "step": 11743 }, { "epoch": 1.3926242143958258, "grad_norm": 0.8311163384039943, "learning_rate": 3.78613176828053e-05, "loss": 0.1935, "step": 11744 }, { "epoch": 1.3927427961579508, "grad_norm": 0.8308046324562159, "learning_rate": 3.7859259268838106e-05, "loss": 0.1863, "step": 11745 }, { "epoch": 1.392861377920076, "grad_norm": 2.5046081212450813, "learning_rate": 3.7857200736324305e-05, "loss": 0.271, "step": 11746 }, { "epoch": 1.3929799596822008, "grad_norm": 0.8575847534885975, "learning_rate": 3.7855142085282887e-05, "loss": 0.18, "step": 11747 }, { "epoch": 1.393098541444326, "grad_norm": 0.8984090271342068, "learning_rate": 3.785308331573282e-05, "loss": 0.1848, "step": 11748 }, { "epoch": 1.3932171232064507, "grad_norm": 1.2230106267321248, "learning_rate": 3.785102442769308e-05, "loss": 0.2375, "step": 11749 }, { "epoch": 1.393335704968576, "grad_norm": 1.1171287686649092, "learning_rate": 3.784896542118266e-05, "loss": 0.2653, "step": 11750 }, { "epoch": 1.3934542867307007, "grad_norm": 1.063427377403751, "learning_rate": 3.784690629622053e-05, "loss": 0.206, "step": 11751 }, { "epoch": 1.393572868492826, "grad_norm": 0.8617395483586597, "learning_rate": 3.784484705282568e-05, "loss": 0.1761, "step": 11752 }, { "epoch": 1.3936914502549507, "grad_norm": 1.0298999241052507, "learning_rate": 3.7842787691017096e-05, "loss": 0.2664, "step": 11753 }, { "epoch": 1.3938100320170759, "grad_norm": 1.0384370806660737, "learning_rate": 3.7840728210813756e-05, "loss": 0.2322, "step": 11754 }, { "epoch": 1.3939286137792006, "grad_norm": 1.1372328081606993, "learning_rate": 3.783866861223465e-05, "loss": 0.2598, "step": 11755 }, { "epoch": 1.3940471955413258, "grad_norm": 0.9977061737362231, "learning_rate": 3.783660889529876e-05, "loss": 0.1986, "step": 11756 }, { "epoch": 1.3941657773034506, "grad_norm": 1.4764942900584554, "learning_rate": 3.7834549060025084e-05, "loss": 0.2102, "step": 11757 }, { "epoch": 1.3942843590655758, "grad_norm": 1.0221256093147484, "learning_rate": 3.783248910643259e-05, "loss": 0.1585, "step": 11758 }, { "epoch": 1.3944029408277008, "grad_norm": 0.797291166634366, "learning_rate": 3.783042903454029e-05, "loss": 0.1783, "step": 11759 }, { "epoch": 1.3945215225898258, "grad_norm": 0.8529992128847337, "learning_rate": 3.782836884436717e-05, "loss": 0.1711, "step": 11760 }, { "epoch": 1.3946401043519507, "grad_norm": 1.3322098087604564, "learning_rate": 3.782630853593222e-05, "loss": 0.3112, "step": 11761 }, { "epoch": 1.3947586861140757, "grad_norm": 1.273044098823193, "learning_rate": 3.782424810925444e-05, "loss": 0.1832, "step": 11762 }, { "epoch": 1.3948772678762007, "grad_norm": 1.6608952704991797, "learning_rate": 3.782218756435281e-05, "loss": 0.3393, "step": 11763 }, { "epoch": 1.3949958496383257, "grad_norm": 0.9104161438772452, "learning_rate": 3.7820126901246334e-05, "loss": 0.1875, "step": 11764 }, { "epoch": 1.3951144314004507, "grad_norm": 1.1481563404347932, "learning_rate": 3.781806611995401e-05, "loss": 0.2359, "step": 11765 }, { "epoch": 1.3952330131625756, "grad_norm": 1.173275331474507, "learning_rate": 3.781600522049484e-05, "loss": 0.2571, "step": 11766 }, { "epoch": 1.3953515949247006, "grad_norm": 1.1470768540722223, "learning_rate": 3.781394420288781e-05, "loss": 0.2518, "step": 11767 }, { "epoch": 1.3954701766868256, "grad_norm": 1.1107885629881789, "learning_rate": 3.7811883067151935e-05, "loss": 0.2457, "step": 11768 }, { "epoch": 1.3955887584489506, "grad_norm": 2.490921613395685, "learning_rate": 3.78098218133062e-05, "loss": 0.2939, "step": 11769 }, { "epoch": 1.3957073402110756, "grad_norm": 1.0231487679148645, "learning_rate": 3.780776044136963e-05, "loss": 0.2085, "step": 11770 }, { "epoch": 1.3958259219732005, "grad_norm": 0.9406222292479104, "learning_rate": 3.78056989513612e-05, "loss": 0.1853, "step": 11771 }, { "epoch": 1.3959445037353255, "grad_norm": 1.0062257216473427, "learning_rate": 3.780363734329994e-05, "loss": 0.1847, "step": 11772 }, { "epoch": 1.3960630854974505, "grad_norm": 1.3313921377081395, "learning_rate": 3.780157561720484e-05, "loss": 0.2355, "step": 11773 }, { "epoch": 1.3961816672595755, "grad_norm": 1.011055802173935, "learning_rate": 3.779951377309492e-05, "loss": 0.1834, "step": 11774 }, { "epoch": 1.3963002490217005, "grad_norm": 1.3306998606902443, "learning_rate": 3.7797451810989166e-05, "loss": 0.2434, "step": 11775 }, { "epoch": 1.3964188307838254, "grad_norm": 1.0363478591591835, "learning_rate": 3.7795389730906604e-05, "loss": 0.2588, "step": 11776 }, { "epoch": 1.3965374125459504, "grad_norm": 0.9159301178298277, "learning_rate": 3.779332753286624e-05, "loss": 0.2019, "step": 11777 }, { "epoch": 1.3966559943080754, "grad_norm": 1.1827818095373892, "learning_rate": 3.7791265216887085e-05, "loss": 0.2138, "step": 11778 }, { "epoch": 1.3967745760702004, "grad_norm": 0.9246945868409026, "learning_rate": 3.778920278298814e-05, "loss": 0.1631, "step": 11779 }, { "epoch": 1.3968931578323254, "grad_norm": 0.9405347278233707, "learning_rate": 3.7787140231188446e-05, "loss": 0.2274, "step": 11780 }, { "epoch": 1.3970117395944504, "grad_norm": 0.9645836219093814, "learning_rate": 3.7785077561506986e-05, "loss": 0.2364, "step": 11781 }, { "epoch": 1.3971303213565753, "grad_norm": 1.0131436963429135, "learning_rate": 3.77830147739628e-05, "loss": 0.2385, "step": 11782 }, { "epoch": 1.3972489031187003, "grad_norm": 1.2703098196935425, "learning_rate": 3.778095186857489e-05, "loss": 0.2502, "step": 11783 }, { "epoch": 1.3973674848808253, "grad_norm": 1.2852045851020282, "learning_rate": 3.777888884536227e-05, "loss": 0.2135, "step": 11784 }, { "epoch": 1.3974860666429503, "grad_norm": 0.8306602826284123, "learning_rate": 3.7776825704343975e-05, "loss": 0.1908, "step": 11785 }, { "epoch": 1.3976046484050753, "grad_norm": 1.0203139471756995, "learning_rate": 3.777476244553901e-05, "loss": 0.1976, "step": 11786 }, { "epoch": 1.3977232301672002, "grad_norm": 1.4870112607633188, "learning_rate": 3.777269906896641e-05, "loss": 0.2798, "step": 11787 }, { "epoch": 1.3978418119293252, "grad_norm": 1.0975671781888432, "learning_rate": 3.777063557464517e-05, "loss": 0.2967, "step": 11788 }, { "epoch": 1.3979603936914502, "grad_norm": 1.2504346582059775, "learning_rate": 3.7768571962594333e-05, "loss": 0.2741, "step": 11789 }, { "epoch": 1.3980789754535752, "grad_norm": 1.3340061862501758, "learning_rate": 3.776650823283293e-05, "loss": 0.3024, "step": 11790 }, { "epoch": 1.3981975572157002, "grad_norm": 1.047190534672841, "learning_rate": 3.776444438537997e-05, "loss": 0.2514, "step": 11791 }, { "epoch": 1.3983161389778251, "grad_norm": 0.9404263242038233, "learning_rate": 3.77623804202545e-05, "loss": 0.149, "step": 11792 }, { "epoch": 1.3984347207399501, "grad_norm": 1.7352635522962636, "learning_rate": 3.776031633747551e-05, "loss": 0.301, "step": 11793 }, { "epoch": 1.398553302502075, "grad_norm": 1.6666972031718896, "learning_rate": 3.775825213706207e-05, "loss": 0.4312, "step": 11794 }, { "epoch": 1.3986718842642003, "grad_norm": 0.7878124719931893, "learning_rate": 3.7756187819033176e-05, "loss": 0.1575, "step": 11795 }, { "epoch": 1.398790466026325, "grad_norm": 1.0766199308433646, "learning_rate": 3.775412338340788e-05, "loss": 0.2398, "step": 11796 }, { "epoch": 1.3989090477884503, "grad_norm": 1.0052987778744344, "learning_rate": 3.775205883020521e-05, "loss": 0.199, "step": 11797 }, { "epoch": 1.399027629550575, "grad_norm": 0.8259400668321436, "learning_rate": 3.7749994159444194e-05, "loss": 0.1459, "step": 11798 }, { "epoch": 1.3991462113127002, "grad_norm": 0.9589804964011711, "learning_rate": 3.7747929371143855e-05, "loss": 0.1789, "step": 11799 }, { "epoch": 1.399264793074825, "grad_norm": 0.852149364912768, "learning_rate": 3.774586446532326e-05, "loss": 0.1561, "step": 11800 }, { "epoch": 1.3993833748369502, "grad_norm": 1.4971129761212456, "learning_rate": 3.77437994420014e-05, "loss": 0.2937, "step": 11801 }, { "epoch": 1.399501956599075, "grad_norm": 0.8262874223727408, "learning_rate": 3.7741734301197365e-05, "loss": 0.1406, "step": 11802 }, { "epoch": 1.3996205383612002, "grad_norm": 0.680667999385187, "learning_rate": 3.773966904293014e-05, "loss": 0.1435, "step": 11803 }, { "epoch": 1.399739120123325, "grad_norm": 1.18648017920351, "learning_rate": 3.77376036672188e-05, "loss": 0.2545, "step": 11804 }, { "epoch": 1.3998577018854501, "grad_norm": 1.0060454719375114, "learning_rate": 3.7735538174082376e-05, "loss": 0.1967, "step": 11805 }, { "epoch": 1.3999762836475749, "grad_norm": 1.0444755755816888, "learning_rate": 3.773347256353991e-05, "loss": 0.2311, "step": 11806 }, { "epoch": 1.4000948654097, "grad_norm": 1.1484352785090235, "learning_rate": 3.773140683561043e-05, "loss": 0.2349, "step": 11807 }, { "epoch": 1.400213447171825, "grad_norm": 0.8217323013419425, "learning_rate": 3.7729340990312996e-05, "loss": 0.1687, "step": 11808 }, { "epoch": 1.40033202893395, "grad_norm": 1.0959543801613512, "learning_rate": 3.772727502766665e-05, "loss": 0.187, "step": 11809 }, { "epoch": 1.400450610696075, "grad_norm": 1.0823676514694107, "learning_rate": 3.7725208947690424e-05, "loss": 0.2184, "step": 11810 }, { "epoch": 1.4005691924582, "grad_norm": 1.044664026383668, "learning_rate": 3.7723142750403394e-05, "loss": 0.2033, "step": 11811 }, { "epoch": 1.400687774220325, "grad_norm": 1.2886690514722077, "learning_rate": 3.772107643582459e-05, "loss": 0.2838, "step": 11812 }, { "epoch": 1.40080635598245, "grad_norm": 0.9545228452033405, "learning_rate": 3.771901000397305e-05, "loss": 0.2544, "step": 11813 }, { "epoch": 1.400924937744575, "grad_norm": 1.4028537881836198, "learning_rate": 3.7716943454867835e-05, "loss": 0.3332, "step": 11814 }, { "epoch": 1.4010435195067, "grad_norm": 1.7008168368856453, "learning_rate": 3.7714876788528004e-05, "loss": 0.3563, "step": 11815 }, { "epoch": 1.401162101268825, "grad_norm": 0.9842667966036264, "learning_rate": 3.77128100049726e-05, "loss": 0.2259, "step": 11816 }, { "epoch": 1.4012806830309499, "grad_norm": 1.5080300995092297, "learning_rate": 3.771074310422067e-05, "loss": 0.3236, "step": 11817 }, { "epoch": 1.4013992647930749, "grad_norm": 1.1789850316031516, "learning_rate": 3.770867608629128e-05, "loss": 0.222, "step": 11818 }, { "epoch": 1.4015178465551998, "grad_norm": 1.2054065142076724, "learning_rate": 3.7706608951203476e-05, "loss": 0.2333, "step": 11819 }, { "epoch": 1.4016364283173248, "grad_norm": 0.8048241079781335, "learning_rate": 3.770454169897633e-05, "loss": 0.1812, "step": 11820 }, { "epoch": 1.4017550100794498, "grad_norm": 1.12448350324929, "learning_rate": 3.770247432962888e-05, "loss": 0.2011, "step": 11821 }, { "epoch": 1.4018735918415748, "grad_norm": 0.9458123167953258, "learning_rate": 3.770040684318019e-05, "loss": 0.1709, "step": 11822 }, { "epoch": 1.4019921736036998, "grad_norm": 1.1186694402959132, "learning_rate": 3.7698339239649336e-05, "loss": 0.2494, "step": 11823 }, { "epoch": 1.4021107553658247, "grad_norm": 1.4487726670457945, "learning_rate": 3.7696271519055354e-05, "loss": 0.3046, "step": 11824 }, { "epoch": 1.4022293371279497, "grad_norm": 1.221742077414103, "learning_rate": 3.769420368141732e-05, "loss": 0.2571, "step": 11825 }, { "epoch": 1.4023479188900747, "grad_norm": 0.8406633998141934, "learning_rate": 3.76921357267543e-05, "loss": 0.1916, "step": 11826 }, { "epoch": 1.4024665006521997, "grad_norm": 1.039797084956976, "learning_rate": 3.769006765508535e-05, "loss": 0.2388, "step": 11827 }, { "epoch": 1.4025850824143247, "grad_norm": 0.8710523986047978, "learning_rate": 3.7687999466429534e-05, "loss": 0.1779, "step": 11828 }, { "epoch": 1.4027036641764496, "grad_norm": 1.0326667033584702, "learning_rate": 3.768593116080593e-05, "loss": 0.198, "step": 11829 }, { "epoch": 1.4028222459385746, "grad_norm": 0.8118419291513017, "learning_rate": 3.7683862738233594e-05, "loss": 0.2045, "step": 11830 }, { "epoch": 1.4029408277006996, "grad_norm": 0.9993917635933244, "learning_rate": 3.768179419873159e-05, "loss": 0.1749, "step": 11831 }, { "epoch": 1.4030594094628246, "grad_norm": 1.0837966681869857, "learning_rate": 3.767972554231901e-05, "loss": 0.2317, "step": 11832 }, { "epoch": 1.4031779912249496, "grad_norm": 0.8601307548568242, "learning_rate": 3.7677656769014904e-05, "loss": 0.161, "step": 11833 }, { "epoch": 1.4032965729870746, "grad_norm": 1.1534127262282365, "learning_rate": 3.767558787883834e-05, "loss": 0.2261, "step": 11834 }, { "epoch": 1.4034151547491995, "grad_norm": 1.5879504687784103, "learning_rate": 3.7673518871808404e-05, "loss": 0.3387, "step": 11835 }, { "epoch": 1.4035337365113245, "grad_norm": 1.2978821830014682, "learning_rate": 3.7671449747944174e-05, "loss": 0.2628, "step": 11836 }, { "epoch": 1.4036523182734495, "grad_norm": 0.9078406631796384, "learning_rate": 3.766938050726471e-05, "loss": 0.1979, "step": 11837 }, { "epoch": 1.4037709000355745, "grad_norm": 1.0414034786146722, "learning_rate": 3.7667311149789084e-05, "loss": 0.2022, "step": 11838 }, { "epoch": 1.4038894817976995, "grad_norm": 1.0819858225469132, "learning_rate": 3.766524167553639e-05, "loss": 0.2153, "step": 11839 }, { "epoch": 1.4040080635598244, "grad_norm": 1.0792031994468052, "learning_rate": 3.76631720845257e-05, "loss": 0.2576, "step": 11840 }, { "epoch": 1.4041266453219494, "grad_norm": 1.425698747719983, "learning_rate": 3.7661102376776094e-05, "loss": 0.2905, "step": 11841 }, { "epoch": 1.4042452270840744, "grad_norm": 1.1790534461351472, "learning_rate": 3.765903255230665e-05, "loss": 0.2384, "step": 11842 }, { "epoch": 1.4043638088461994, "grad_norm": 0.9195574251849155, "learning_rate": 3.765696261113645e-05, "loss": 0.2024, "step": 11843 }, { "epoch": 1.4044823906083244, "grad_norm": 1.5577361842521718, "learning_rate": 3.765489255328457e-05, "loss": 0.2465, "step": 11844 }, { "epoch": 1.4046009723704493, "grad_norm": 1.0650532456756472, "learning_rate": 3.765282237877011e-05, "loss": 0.2096, "step": 11845 }, { "epoch": 1.4047195541325745, "grad_norm": 1.3015790703224803, "learning_rate": 3.765075208761213e-05, "loss": 0.2567, "step": 11846 }, { "epoch": 1.4048381358946993, "grad_norm": 1.0246279394618798, "learning_rate": 3.764868167982974e-05, "loss": 0.214, "step": 11847 }, { "epoch": 1.4049567176568245, "grad_norm": 1.0740297637658935, "learning_rate": 3.764661115544201e-05, "loss": 0.2304, "step": 11848 }, { "epoch": 1.4050752994189493, "grad_norm": 1.0723701209678436, "learning_rate": 3.764454051446804e-05, "loss": 0.2377, "step": 11849 }, { "epoch": 1.4051938811810745, "grad_norm": 1.3871986202051383, "learning_rate": 3.76424697569269e-05, "loss": 0.2347, "step": 11850 }, { "epoch": 1.4053124629431992, "grad_norm": 0.7979625622890033, "learning_rate": 3.76403988828377e-05, "loss": 0.137, "step": 11851 }, { "epoch": 1.4054310447053244, "grad_norm": 0.9745408744635045, "learning_rate": 3.763832789221953e-05, "loss": 0.2296, "step": 11852 }, { "epoch": 1.4055496264674492, "grad_norm": 0.92467853672325, "learning_rate": 3.7636256785091465e-05, "loss": 0.213, "step": 11853 }, { "epoch": 1.4056682082295744, "grad_norm": 1.1455798346712767, "learning_rate": 3.7634185561472606e-05, "loss": 0.2676, "step": 11854 }, { "epoch": 1.4057867899916991, "grad_norm": 0.991628055648665, "learning_rate": 3.7632114221382056e-05, "loss": 0.2446, "step": 11855 }, { "epoch": 1.4059053717538244, "grad_norm": 1.5136196999683604, "learning_rate": 3.76300427648389e-05, "loss": 0.3018, "step": 11856 }, { "epoch": 1.406023953515949, "grad_norm": 1.1249043015232225, "learning_rate": 3.762797119186224e-05, "loss": 0.2798, "step": 11857 }, { "epoch": 1.4061425352780743, "grad_norm": 1.1228322416095846, "learning_rate": 3.7625899502471165e-05, "loss": 0.2096, "step": 11858 }, { "epoch": 1.4062611170401993, "grad_norm": 0.9952026795715397, "learning_rate": 3.762382769668478e-05, "loss": 0.1946, "step": 11859 }, { "epoch": 1.4063796988023243, "grad_norm": 0.9824810018765867, "learning_rate": 3.7621755774522194e-05, "loss": 0.1708, "step": 11860 }, { "epoch": 1.4064982805644493, "grad_norm": 0.9032469326634257, "learning_rate": 3.761968373600249e-05, "loss": 0.1902, "step": 11861 }, { "epoch": 1.4066168623265742, "grad_norm": 1.6392592595213498, "learning_rate": 3.7617611581144786e-05, "loss": 0.3493, "step": 11862 }, { "epoch": 1.4067354440886992, "grad_norm": 1.1721221996946984, "learning_rate": 3.761553930996816e-05, "loss": 0.2327, "step": 11863 }, { "epoch": 1.4068540258508242, "grad_norm": 1.0964767356255427, "learning_rate": 3.761346692249175e-05, "loss": 0.2188, "step": 11864 }, { "epoch": 1.4069726076129492, "grad_norm": 1.0652929925046986, "learning_rate": 3.761139441873463e-05, "loss": 0.2284, "step": 11865 }, { "epoch": 1.4070911893750742, "grad_norm": 0.8087170443706976, "learning_rate": 3.760932179871592e-05, "loss": 0.2004, "step": 11866 }, { "epoch": 1.4072097711371991, "grad_norm": 0.8569407398863946, "learning_rate": 3.760724906245473e-05, "loss": 0.2134, "step": 11867 }, { "epoch": 1.4073283528993241, "grad_norm": 0.9533567064763148, "learning_rate": 3.7605176209970155e-05, "loss": 0.2306, "step": 11868 }, { "epoch": 1.407446934661449, "grad_norm": 1.2272651058505897, "learning_rate": 3.760310324128132e-05, "loss": 0.2475, "step": 11869 }, { "epoch": 1.407565516423574, "grad_norm": 1.190124156897787, "learning_rate": 3.760103015640733e-05, "loss": 0.2301, "step": 11870 }, { "epoch": 1.407684098185699, "grad_norm": 1.4695834910751413, "learning_rate": 3.7598956955367285e-05, "loss": 0.2983, "step": 11871 }, { "epoch": 1.407802679947824, "grad_norm": 0.9110986177479906, "learning_rate": 3.759688363818031e-05, "loss": 0.2242, "step": 11872 }, { "epoch": 1.407921261709949, "grad_norm": 1.0757468142947726, "learning_rate": 3.7594810204865525e-05, "loss": 0.2252, "step": 11873 }, { "epoch": 1.408039843472074, "grad_norm": 1.269907369040871, "learning_rate": 3.759273665544203e-05, "loss": 0.2716, "step": 11874 }, { "epoch": 1.408158425234199, "grad_norm": 1.5294149746890948, "learning_rate": 3.7590662989928946e-05, "loss": 0.4076, "step": 11875 }, { "epoch": 1.408277006996324, "grad_norm": 1.3324966677578851, "learning_rate": 3.758858920834538e-05, "loss": 0.2784, "step": 11876 }, { "epoch": 1.408395588758449, "grad_norm": 0.8091879553026329, "learning_rate": 3.758651531071046e-05, "loss": 0.164, "step": 11877 }, { "epoch": 1.408514170520574, "grad_norm": 1.0329661980269718, "learning_rate": 3.758444129704331e-05, "loss": 0.1912, "step": 11878 }, { "epoch": 1.408632752282699, "grad_norm": 1.0661677254857778, "learning_rate": 3.758236716736304e-05, "loss": 0.2375, "step": 11879 }, { "epoch": 1.4087513340448239, "grad_norm": 1.2188916312947056, "learning_rate": 3.758029292168877e-05, "loss": 0.2833, "step": 11880 }, { "epoch": 1.4088699158069489, "grad_norm": 0.6169914746171681, "learning_rate": 3.757821856003963e-05, "loss": 0.1448, "step": 11881 }, { "epoch": 1.4089884975690739, "grad_norm": 1.40751295717724, "learning_rate": 3.757614408243475e-05, "loss": 0.3378, "step": 11882 }, { "epoch": 1.4091070793311988, "grad_norm": 1.11195650803133, "learning_rate": 3.757406948889324e-05, "loss": 0.2343, "step": 11883 }, { "epoch": 1.4092256610933238, "grad_norm": 1.140903046700466, "learning_rate": 3.7571994779434225e-05, "loss": 0.2003, "step": 11884 }, { "epoch": 1.4093442428554488, "grad_norm": 0.9507215302066782, "learning_rate": 3.7569919954076824e-05, "loss": 0.2268, "step": 11885 }, { "epoch": 1.4094628246175738, "grad_norm": 0.7545854341329731, "learning_rate": 3.756784501284019e-05, "loss": 0.1802, "step": 11886 }, { "epoch": 1.4095814063796988, "grad_norm": 1.3528067661654315, "learning_rate": 3.756576995574343e-05, "loss": 0.2973, "step": 11887 }, { "epoch": 1.4096999881418237, "grad_norm": 1.387296608661351, "learning_rate": 3.756369478280568e-05, "loss": 0.2782, "step": 11888 }, { "epoch": 1.4098185699039487, "grad_norm": 0.9626680342901079, "learning_rate": 3.7561619494046076e-05, "loss": 0.203, "step": 11889 }, { "epoch": 1.4099371516660737, "grad_norm": 1.0318387809753797, "learning_rate": 3.755954408948374e-05, "loss": 0.2162, "step": 11890 }, { "epoch": 1.4100557334281987, "grad_norm": 0.9009392023340695, "learning_rate": 3.755746856913781e-05, "loss": 0.1966, "step": 11891 }, { "epoch": 1.4101743151903237, "grad_norm": 1.247204195633211, "learning_rate": 3.755539293302742e-05, "loss": 0.2726, "step": 11892 }, { "epoch": 1.4102928969524486, "grad_norm": 1.6774934494591192, "learning_rate": 3.75533171811717e-05, "loss": 0.3455, "step": 11893 }, { "epoch": 1.4104114787145736, "grad_norm": 1.3083019318388487, "learning_rate": 3.755124131358979e-05, "loss": 0.2398, "step": 11894 }, { "epoch": 1.4105300604766988, "grad_norm": 1.1568666306791122, "learning_rate": 3.754916533030083e-05, "loss": 0.2584, "step": 11895 }, { "epoch": 1.4106486422388236, "grad_norm": 0.8178442916921913, "learning_rate": 3.7547089231323954e-05, "loss": 0.1885, "step": 11896 }, { "epoch": 1.4107672240009488, "grad_norm": 0.9659711051453562, "learning_rate": 3.754501301667829e-05, "loss": 0.2397, "step": 11897 }, { "epoch": 1.4108858057630735, "grad_norm": 0.8020143368167996, "learning_rate": 3.7542936686383e-05, "loss": 0.1823, "step": 11898 }, { "epoch": 1.4110043875251987, "grad_norm": 1.3727905450598366, "learning_rate": 3.754086024045722e-05, "loss": 0.3669, "step": 11899 }, { "epoch": 1.4111229692873235, "grad_norm": 0.9167431816796885, "learning_rate": 3.7538783678920076e-05, "loss": 0.1547, "step": 11900 }, { "epoch": 1.4112415510494487, "grad_norm": 1.137542204077945, "learning_rate": 3.753670700179073e-05, "loss": 0.2106, "step": 11901 }, { "epoch": 1.4113601328115735, "grad_norm": 1.2410342187508858, "learning_rate": 3.7534630209088315e-05, "loss": 0.2904, "step": 11902 }, { "epoch": 1.4114787145736987, "grad_norm": 1.2799913132961949, "learning_rate": 3.753255330083199e-05, "loss": 0.2529, "step": 11903 }, { "epoch": 1.4115972963358234, "grad_norm": 0.8382685704476928, "learning_rate": 3.7530476277040886e-05, "loss": 0.1676, "step": 11904 }, { "epoch": 1.4117158780979486, "grad_norm": 1.146862656076266, "learning_rate": 3.752839913773415e-05, "loss": 0.2436, "step": 11905 }, { "epoch": 1.4118344598600734, "grad_norm": 1.1180565632915234, "learning_rate": 3.7526321882930947e-05, "loss": 0.2535, "step": 11906 }, { "epoch": 1.4119530416221986, "grad_norm": 1.315060130465517, "learning_rate": 3.752424451265041e-05, "loss": 0.2845, "step": 11907 }, { "epoch": 1.4120716233843236, "grad_norm": 0.7935997888963029, "learning_rate": 3.7522167026911695e-05, "loss": 0.1949, "step": 11908 }, { "epoch": 1.4121902051464486, "grad_norm": 1.529640993362518, "learning_rate": 3.752008942573396e-05, "loss": 0.2884, "step": 11909 }, { "epoch": 1.4123087869085735, "grad_norm": 0.896038850539518, "learning_rate": 3.7518011709136355e-05, "loss": 0.1756, "step": 11910 }, { "epoch": 1.4124273686706985, "grad_norm": 0.9648162778003362, "learning_rate": 3.751593387713803e-05, "loss": 0.2435, "step": 11911 }, { "epoch": 1.4125459504328235, "grad_norm": 1.1620885681106663, "learning_rate": 3.7513855929758146e-05, "loss": 0.2758, "step": 11912 }, { "epoch": 1.4126645321949485, "grad_norm": 1.206341713591499, "learning_rate": 3.751177786701585e-05, "loss": 0.288, "step": 11913 }, { "epoch": 1.4127831139570735, "grad_norm": 1.139185567593586, "learning_rate": 3.750969968893032e-05, "loss": 0.2026, "step": 11914 }, { "epoch": 1.4129016957191984, "grad_norm": 0.9162682953899665, "learning_rate": 3.7507621395520683e-05, "loss": 0.2023, "step": 11915 }, { "epoch": 1.4130202774813234, "grad_norm": 0.9828866805489438, "learning_rate": 3.750554298680612e-05, "loss": 0.2539, "step": 11916 }, { "epoch": 1.4131388592434484, "grad_norm": 1.2347286086574232, "learning_rate": 3.7503464462805784e-05, "loss": 0.2748, "step": 11917 }, { "epoch": 1.4132574410055734, "grad_norm": 1.1667994047750778, "learning_rate": 3.7501385823538834e-05, "loss": 0.2748, "step": 11918 }, { "epoch": 1.4133760227676984, "grad_norm": 1.1016760848721174, "learning_rate": 3.7499307069024445e-05, "loss": 0.2101, "step": 11919 }, { "epoch": 1.4134946045298233, "grad_norm": 0.8045703681941118, "learning_rate": 3.7497228199281774e-05, "loss": 0.2198, "step": 11920 }, { "epoch": 1.4136131862919483, "grad_norm": 0.9329928422753266, "learning_rate": 3.7495149214329985e-05, "loss": 0.1958, "step": 11921 }, { "epoch": 1.4137317680540733, "grad_norm": 1.158455948946292, "learning_rate": 3.749307011418824e-05, "loss": 0.2426, "step": 11922 }, { "epoch": 1.4138503498161983, "grad_norm": 0.8894028820745146, "learning_rate": 3.74909908988757e-05, "loss": 0.2332, "step": 11923 }, { "epoch": 1.4139689315783233, "grad_norm": 1.0416727964699128, "learning_rate": 3.748891156841155e-05, "loss": 0.2044, "step": 11924 }, { "epoch": 1.4140875133404482, "grad_norm": 1.024033634639564, "learning_rate": 3.7486832122814955e-05, "loss": 0.1915, "step": 11925 }, { "epoch": 1.4142060951025732, "grad_norm": 1.100056857054474, "learning_rate": 3.748475256210507e-05, "loss": 0.2121, "step": 11926 }, { "epoch": 1.4143246768646982, "grad_norm": 1.1410340384432158, "learning_rate": 3.748267288630107e-05, "loss": 0.2648, "step": 11927 }, { "epoch": 1.4144432586268232, "grad_norm": 1.1831707448580802, "learning_rate": 3.748059309542215e-05, "loss": 0.2625, "step": 11928 }, { "epoch": 1.4145618403889482, "grad_norm": 1.2722999210801806, "learning_rate": 3.747851318948746e-05, "loss": 0.2148, "step": 11929 }, { "epoch": 1.4146804221510731, "grad_norm": 1.3130181242351964, "learning_rate": 3.747643316851618e-05, "loss": 0.2251, "step": 11930 }, { "epoch": 1.4147990039131981, "grad_norm": 1.5799594332966045, "learning_rate": 3.747435303252749e-05, "loss": 0.2117, "step": 11931 }, { "epoch": 1.414917585675323, "grad_norm": 1.3294431029406917, "learning_rate": 3.747227278154055e-05, "loss": 0.2554, "step": 11932 }, { "epoch": 1.415036167437448, "grad_norm": 0.9651666913277549, "learning_rate": 3.747019241557457e-05, "loss": 0.2457, "step": 11933 }, { "epoch": 1.415154749199573, "grad_norm": 1.0835257400081713, "learning_rate": 3.7468111934648685e-05, "loss": 0.284, "step": 11934 }, { "epoch": 1.415273330961698, "grad_norm": 1.45307188083344, "learning_rate": 3.746603133878212e-05, "loss": 0.2681, "step": 11935 }, { "epoch": 1.415391912723823, "grad_norm": 1.1006760954370562, "learning_rate": 3.746395062799402e-05, "loss": 0.2074, "step": 11936 }, { "epoch": 1.415510494485948, "grad_norm": 1.1766132283014097, "learning_rate": 3.746186980230357e-05, "loss": 0.2148, "step": 11937 }, { "epoch": 1.415629076248073, "grad_norm": 1.4735271651734851, "learning_rate": 3.745978886172997e-05, "loss": 0.2929, "step": 11938 }, { "epoch": 1.415747658010198, "grad_norm": 1.1032395719154604, "learning_rate": 3.7457707806292395e-05, "loss": 0.207, "step": 11939 }, { "epoch": 1.415866239772323, "grad_norm": 1.7197056694613981, "learning_rate": 3.745562663601004e-05, "loss": 0.3101, "step": 11940 }, { "epoch": 1.415984821534448, "grad_norm": 0.78304786245339, "learning_rate": 3.745354535090207e-05, "loss": 0.1747, "step": 11941 }, { "epoch": 1.416103403296573, "grad_norm": 1.3011632218647013, "learning_rate": 3.74514639509877e-05, "loss": 0.27, "step": 11942 }, { "epoch": 1.416221985058698, "grad_norm": 1.132827101835751, "learning_rate": 3.744938243628608e-05, "loss": 0.209, "step": 11943 }, { "epoch": 1.4163405668208229, "grad_norm": 1.3512940137015497, "learning_rate": 3.744730080681644e-05, "loss": 0.2804, "step": 11944 }, { "epoch": 1.4164591485829479, "grad_norm": 1.0071277237834186, "learning_rate": 3.744521906259794e-05, "loss": 0.2146, "step": 11945 }, { "epoch": 1.416577730345073, "grad_norm": 1.0701795011719524, "learning_rate": 3.744313720364979e-05, "loss": 0.2152, "step": 11946 }, { "epoch": 1.4166963121071978, "grad_norm": 0.920506465424451, "learning_rate": 3.744105522999116e-05, "loss": 0.1941, "step": 11947 }, { "epoch": 1.416814893869323, "grad_norm": 1.1259767990316916, "learning_rate": 3.743897314164127e-05, "loss": 0.1589, "step": 11948 }, { "epoch": 1.4169334756314478, "grad_norm": 1.3050659813246257, "learning_rate": 3.7436890938619286e-05, "loss": 0.2772, "step": 11949 }, { "epoch": 1.417052057393573, "grad_norm": 0.9982786210944002, "learning_rate": 3.743480862094443e-05, "loss": 0.2131, "step": 11950 }, { "epoch": 1.4171706391556977, "grad_norm": 1.0368172154358368, "learning_rate": 3.743272618863588e-05, "loss": 0.1956, "step": 11951 }, { "epoch": 1.417289220917823, "grad_norm": 1.1878495825113666, "learning_rate": 3.743064364171285e-05, "loss": 0.2009, "step": 11952 }, { "epoch": 1.4174078026799477, "grad_norm": 0.9056630896747744, "learning_rate": 3.742856098019452e-05, "loss": 0.1921, "step": 11953 }, { "epoch": 1.417526384442073, "grad_norm": 1.023166475119506, "learning_rate": 3.74264782041001e-05, "loss": 0.203, "step": 11954 }, { "epoch": 1.4176449662041977, "grad_norm": 1.2991100820457953, "learning_rate": 3.7424395313448804e-05, "loss": 0.3219, "step": 11955 }, { "epoch": 1.4177635479663229, "grad_norm": 1.333929919864602, "learning_rate": 3.74223123082598e-05, "loss": 0.2886, "step": 11956 }, { "epoch": 1.4178821297284476, "grad_norm": 0.7151882655890506, "learning_rate": 3.7420229188552316e-05, "loss": 0.13, "step": 11957 }, { "epoch": 1.4180007114905728, "grad_norm": 1.114094213487098, "learning_rate": 3.7418145954345554e-05, "loss": 0.2773, "step": 11958 }, { "epoch": 1.4181192932526978, "grad_norm": 1.032364871530729, "learning_rate": 3.741606260565871e-05, "loss": 0.2148, "step": 11959 }, { "epoch": 1.4182378750148228, "grad_norm": 1.1149114960402489, "learning_rate": 3.741397914251099e-05, "loss": 0.2025, "step": 11960 }, { "epoch": 1.4183564567769478, "grad_norm": 1.0570946723796142, "learning_rate": 3.741189556492162e-05, "loss": 0.2223, "step": 11961 }, { "epoch": 1.4184750385390728, "grad_norm": 0.961860692020639, "learning_rate": 3.7409811872909776e-05, "loss": 0.164, "step": 11962 }, { "epoch": 1.4185936203011977, "grad_norm": 1.5840539159324694, "learning_rate": 3.7407728066494696e-05, "loss": 0.3376, "step": 11963 }, { "epoch": 1.4187122020633227, "grad_norm": 1.0104171318173516, "learning_rate": 3.7405644145695576e-05, "loss": 0.2121, "step": 11964 }, { "epoch": 1.4188307838254477, "grad_norm": 0.9911416726771245, "learning_rate": 3.740356011053163e-05, "loss": 0.2044, "step": 11965 }, { "epoch": 1.4189493655875727, "grad_norm": 1.1102571499849925, "learning_rate": 3.740147596102207e-05, "loss": 0.2204, "step": 11966 }, { "epoch": 1.4190679473496977, "grad_norm": 1.2333017217894224, "learning_rate": 3.7399391697186105e-05, "loss": 0.2295, "step": 11967 }, { "epoch": 1.4191865291118226, "grad_norm": 1.052269815743381, "learning_rate": 3.739730731904295e-05, "loss": 0.2141, "step": 11968 }, { "epoch": 1.4193051108739476, "grad_norm": 0.8673024693649638, "learning_rate": 3.7395222826611834e-05, "loss": 0.2055, "step": 11969 }, { "epoch": 1.4194236926360726, "grad_norm": 1.389745331681283, "learning_rate": 3.739313821991196e-05, "loss": 0.3222, "step": 11970 }, { "epoch": 1.4195422743981976, "grad_norm": 0.9039733242011868, "learning_rate": 3.739105349896255e-05, "loss": 0.1988, "step": 11971 }, { "epoch": 1.4196608561603226, "grad_norm": 1.4090900881266861, "learning_rate": 3.738896866378283e-05, "loss": 0.3531, "step": 11972 }, { "epoch": 1.4197794379224475, "grad_norm": 1.0602170145630208, "learning_rate": 3.7386883714392e-05, "loss": 0.2274, "step": 11973 }, { "epoch": 1.4198980196845725, "grad_norm": 1.011896978753643, "learning_rate": 3.738479865080929e-05, "loss": 0.2633, "step": 11974 }, { "epoch": 1.4200166014466975, "grad_norm": 0.8951701895706808, "learning_rate": 3.7382713473053934e-05, "loss": 0.1722, "step": 11975 }, { "epoch": 1.4201351832088225, "grad_norm": 1.6032075863526767, "learning_rate": 3.7380628181145134e-05, "loss": 0.3008, "step": 11976 }, { "epoch": 1.4202537649709475, "grad_norm": 1.322252080511777, "learning_rate": 3.7378542775102126e-05, "loss": 0.2216, "step": 11977 }, { "epoch": 1.4203723467330724, "grad_norm": 1.1034763458698946, "learning_rate": 3.737645725494414e-05, "loss": 0.2545, "step": 11978 }, { "epoch": 1.4204909284951974, "grad_norm": 1.2740354777225764, "learning_rate": 3.737437162069039e-05, "loss": 0.2532, "step": 11979 }, { "epoch": 1.4206095102573224, "grad_norm": 0.9794190951265678, "learning_rate": 3.7372285872360114e-05, "loss": 0.1705, "step": 11980 }, { "epoch": 1.4207280920194474, "grad_norm": 0.8449441033256117, "learning_rate": 3.737020000997252e-05, "loss": 0.1796, "step": 11981 }, { "epoch": 1.4208466737815724, "grad_norm": 1.2959549418745546, "learning_rate": 3.7368114033546866e-05, "loss": 0.276, "step": 11982 }, { "epoch": 1.4209652555436973, "grad_norm": 0.9812083671125547, "learning_rate": 3.736602794310237e-05, "loss": 0.1977, "step": 11983 }, { "epoch": 1.4210838373058223, "grad_norm": 1.1658842635744877, "learning_rate": 3.736394173865825e-05, "loss": 0.178, "step": 11984 }, { "epoch": 1.4212024190679473, "grad_norm": 0.8530627784623562, "learning_rate": 3.736185542023375e-05, "loss": 0.1682, "step": 11985 }, { "epoch": 1.4213210008300723, "grad_norm": 1.1984105092870918, "learning_rate": 3.7359768987848096e-05, "loss": 0.2786, "step": 11986 }, { "epoch": 1.4214395825921973, "grad_norm": 0.9211369612815075, "learning_rate": 3.735768244152054e-05, "loss": 0.1786, "step": 11987 }, { "epoch": 1.4215581643543223, "grad_norm": 1.2911479641659251, "learning_rate": 3.7355595781270304e-05, "loss": 0.3608, "step": 11988 }, { "epoch": 1.4216767461164472, "grad_norm": 0.8831923492960647, "learning_rate": 3.735350900711663e-05, "loss": 0.1867, "step": 11989 }, { "epoch": 1.4217953278785722, "grad_norm": 1.2003106211001768, "learning_rate": 3.735142211907874e-05, "loss": 0.2373, "step": 11990 }, { "epoch": 1.4219139096406972, "grad_norm": 1.0991150502179141, "learning_rate": 3.7349335117175896e-05, "loss": 0.2295, "step": 11991 }, { "epoch": 1.4220324914028222, "grad_norm": 1.0859898570880155, "learning_rate": 3.734724800142732e-05, "loss": 0.2548, "step": 11992 }, { "epoch": 1.4221510731649472, "grad_norm": 0.8668081328163169, "learning_rate": 3.7345160771852254e-05, "loss": 0.1905, "step": 11993 }, { "epoch": 1.4222696549270721, "grad_norm": 0.838933825485151, "learning_rate": 3.7343073428469954e-05, "loss": 0.1867, "step": 11994 }, { "epoch": 1.4223882366891973, "grad_norm": 1.3825626654733925, "learning_rate": 3.7340985971299646e-05, "loss": 0.276, "step": 11995 }, { "epoch": 1.422506818451322, "grad_norm": 1.5663639011108836, "learning_rate": 3.733889840036058e-05, "loss": 0.355, "step": 11996 }, { "epoch": 1.4226254002134473, "grad_norm": 1.3382757298278536, "learning_rate": 3.7336810715672014e-05, "loss": 0.2758, "step": 11997 }, { "epoch": 1.422743981975572, "grad_norm": 1.1336223022982024, "learning_rate": 3.7334722917253165e-05, "loss": 0.2366, "step": 11998 }, { "epoch": 1.4228625637376973, "grad_norm": 0.9367522206086788, "learning_rate": 3.73326350051233e-05, "loss": 0.176, "step": 11999 }, { "epoch": 1.422981145499822, "grad_norm": 0.8218561579344483, "learning_rate": 3.733054697930167e-05, "loss": 0.1926, "step": 12000 }, { "epoch": 1.4230997272619472, "grad_norm": 0.951255074023938, "learning_rate": 3.732845883980752e-05, "loss": 0.174, "step": 12001 }, { "epoch": 1.423218309024072, "grad_norm": 1.1343281217573846, "learning_rate": 3.7326370586660095e-05, "loss": 0.2671, "step": 12002 }, { "epoch": 1.4233368907861972, "grad_norm": 0.8652340403318025, "learning_rate": 3.732428221987864e-05, "loss": 0.2146, "step": 12003 }, { "epoch": 1.423455472548322, "grad_norm": 1.0475610249036065, "learning_rate": 3.732219373948243e-05, "loss": 0.2222, "step": 12004 }, { "epoch": 1.4235740543104471, "grad_norm": 1.002841497746369, "learning_rate": 3.73201051454907e-05, "loss": 0.2217, "step": 12005 }, { "epoch": 1.423692636072572, "grad_norm": 1.2139889917722577, "learning_rate": 3.7318016437922696e-05, "loss": 0.2633, "step": 12006 }, { "epoch": 1.423811217834697, "grad_norm": 1.0849538686013969, "learning_rate": 3.7315927616797697e-05, "loss": 0.2029, "step": 12007 }, { "epoch": 1.423929799596822, "grad_norm": 1.1545427513661384, "learning_rate": 3.731383868213495e-05, "loss": 0.2125, "step": 12008 }, { "epoch": 1.424048381358947, "grad_norm": 1.3358855964240732, "learning_rate": 3.73117496339537e-05, "loss": 0.2548, "step": 12009 }, { "epoch": 1.424166963121072, "grad_norm": 1.0149872519199645, "learning_rate": 3.7309660472273225e-05, "loss": 0.1796, "step": 12010 }, { "epoch": 1.424285544883197, "grad_norm": 0.8666149069817426, "learning_rate": 3.7307571197112775e-05, "loss": 0.1978, "step": 12011 }, { "epoch": 1.424404126645322, "grad_norm": 1.4454027333665467, "learning_rate": 3.730548180849161e-05, "loss": 0.2692, "step": 12012 }, { "epoch": 1.424522708407447, "grad_norm": 1.3573374758529448, "learning_rate": 3.7303392306429e-05, "loss": 0.2425, "step": 12013 }, { "epoch": 1.424641290169572, "grad_norm": 0.9218075791379029, "learning_rate": 3.730130269094418e-05, "loss": 0.2105, "step": 12014 }, { "epoch": 1.424759871931697, "grad_norm": 1.4538753403783078, "learning_rate": 3.7299212962056454e-05, "loss": 0.377, "step": 12015 }, { "epoch": 1.424878453693822, "grad_norm": 1.227830273325606, "learning_rate": 3.729712311978506e-05, "loss": 0.2382, "step": 12016 }, { "epoch": 1.424997035455947, "grad_norm": 1.499721348874308, "learning_rate": 3.729503316414927e-05, "loss": 0.3678, "step": 12017 }, { "epoch": 1.425115617218072, "grad_norm": 0.9635882059339691, "learning_rate": 3.729294309516835e-05, "loss": 0.21, "step": 12018 }, { "epoch": 1.4252341989801969, "grad_norm": 1.1989818204752054, "learning_rate": 3.729085291286157e-05, "loss": 0.2697, "step": 12019 }, { "epoch": 1.4253527807423219, "grad_norm": 1.1411600711676084, "learning_rate": 3.728876261724819e-05, "loss": 0.2629, "step": 12020 }, { "epoch": 1.4254713625044468, "grad_norm": 1.2925891537665914, "learning_rate": 3.728667220834751e-05, "loss": 0.2115, "step": 12021 }, { "epoch": 1.4255899442665718, "grad_norm": 0.973266107412103, "learning_rate": 3.728458168617877e-05, "loss": 0.1993, "step": 12022 }, { "epoch": 1.4257085260286968, "grad_norm": 1.5400997653157809, "learning_rate": 3.728249105076125e-05, "loss": 0.3209, "step": 12023 }, { "epoch": 1.4258271077908218, "grad_norm": 1.389016798979837, "learning_rate": 3.728040030211421e-05, "loss": 0.3172, "step": 12024 }, { "epoch": 1.4259456895529468, "grad_norm": 0.9016096773864852, "learning_rate": 3.727830944025695e-05, "loss": 0.1778, "step": 12025 }, { "epoch": 1.4260642713150717, "grad_norm": 1.1155217521462248, "learning_rate": 3.727621846520874e-05, "loss": 0.2371, "step": 12026 }, { "epoch": 1.4261828530771967, "grad_norm": 1.124666981951748, "learning_rate": 3.727412737698884e-05, "loss": 0.2122, "step": 12027 }, { "epoch": 1.4263014348393217, "grad_norm": 1.094608374927019, "learning_rate": 3.7272036175616544e-05, "loss": 0.2329, "step": 12028 }, { "epoch": 1.4264200166014467, "grad_norm": 1.1911518045668308, "learning_rate": 3.726994486111112e-05, "loss": 0.2304, "step": 12029 }, { "epoch": 1.4265385983635717, "grad_norm": 0.915841192879363, "learning_rate": 3.726785343349185e-05, "loss": 0.1748, "step": 12030 }, { "epoch": 1.4266571801256966, "grad_norm": 0.9268160013558113, "learning_rate": 3.726576189277802e-05, "loss": 0.1761, "step": 12031 }, { "epoch": 1.4267757618878216, "grad_norm": 0.871737394083587, "learning_rate": 3.72636702389889e-05, "loss": 0.1965, "step": 12032 }, { "epoch": 1.4268943436499466, "grad_norm": 1.0077043465372557, "learning_rate": 3.7261578472143784e-05, "loss": 0.2044, "step": 12033 }, { "epoch": 1.4270129254120716, "grad_norm": 0.893832525571922, "learning_rate": 3.725948659226195e-05, "loss": 0.2089, "step": 12034 }, { "epoch": 1.4271315071741966, "grad_norm": 1.1716682187387586, "learning_rate": 3.725739459936268e-05, "loss": 0.2721, "step": 12035 }, { "epoch": 1.4272500889363215, "grad_norm": 1.1607246727973193, "learning_rate": 3.725530249346526e-05, "loss": 0.196, "step": 12036 }, { "epoch": 1.4273686706984465, "grad_norm": 1.0237261638849915, "learning_rate": 3.7253210274588984e-05, "loss": 0.1978, "step": 12037 }, { "epoch": 1.4274872524605715, "grad_norm": 1.0387635891096125, "learning_rate": 3.725111794275313e-05, "loss": 0.1784, "step": 12038 }, { "epoch": 1.4276058342226965, "grad_norm": 1.0431852646773652, "learning_rate": 3.7249025497977e-05, "loss": 0.2518, "step": 12039 }, { "epoch": 1.4277244159848215, "grad_norm": 0.850245247116313, "learning_rate": 3.724693294027987e-05, "loss": 0.1832, "step": 12040 }, { "epoch": 1.4278429977469465, "grad_norm": 0.8146333053938954, "learning_rate": 3.724484026968105e-05, "loss": 0.1778, "step": 12041 }, { "epoch": 1.4279615795090714, "grad_norm": 0.8717952922833572, "learning_rate": 3.72427474861998e-05, "loss": 0.1824, "step": 12042 }, { "epoch": 1.4280801612711964, "grad_norm": 0.8932636885357316, "learning_rate": 3.724065458985545e-05, "loss": 0.2092, "step": 12043 }, { "epoch": 1.4281987430333214, "grad_norm": 0.8245061288880984, "learning_rate": 3.7238561580667255e-05, "loss": 0.1652, "step": 12044 }, { "epoch": 1.4283173247954464, "grad_norm": 0.9249864273581121, "learning_rate": 3.7236468458654545e-05, "loss": 0.1654, "step": 12045 }, { "epoch": 1.4284359065575716, "grad_norm": 1.1490678478180048, "learning_rate": 3.723437522383659e-05, "loss": 0.226, "step": 12046 }, { "epoch": 1.4285544883196963, "grad_norm": 0.7477944646145495, "learning_rate": 3.72322818762327e-05, "loss": 0.138, "step": 12047 }, { "epoch": 1.4286730700818215, "grad_norm": 1.1906030203156808, "learning_rate": 3.723018841586218e-05, "loss": 0.2276, "step": 12048 }, { "epoch": 1.4287916518439463, "grad_norm": 1.4780923816958342, "learning_rate": 3.722809484274432e-05, "loss": 0.2644, "step": 12049 }, { "epoch": 1.4289102336060715, "grad_norm": 0.9700998358896614, "learning_rate": 3.72260011568984e-05, "loss": 0.229, "step": 12050 }, { "epoch": 1.4290288153681963, "grad_norm": 1.137778644924611, "learning_rate": 3.722390735834377e-05, "loss": 0.2453, "step": 12051 }, { "epoch": 1.4291473971303215, "grad_norm": 1.179072689979875, "learning_rate": 3.722181344709969e-05, "loss": 0.2413, "step": 12052 }, { "epoch": 1.4292659788924462, "grad_norm": 1.1666626996176908, "learning_rate": 3.721971942318547e-05, "loss": 0.2275, "step": 12053 }, { "epoch": 1.4293845606545714, "grad_norm": 1.5645182550933716, "learning_rate": 3.721762528662044e-05, "loss": 0.2766, "step": 12054 }, { "epoch": 1.4295031424166962, "grad_norm": 0.8312756044786797, "learning_rate": 3.721553103742388e-05, "loss": 0.1641, "step": 12055 }, { "epoch": 1.4296217241788214, "grad_norm": 1.0707763044730485, "learning_rate": 3.72134366756151e-05, "loss": 0.1908, "step": 12056 }, { "epoch": 1.4297403059409464, "grad_norm": 1.6273476537518772, "learning_rate": 3.7211342201213404e-05, "loss": 0.3077, "step": 12057 }, { "epoch": 1.4298588877030713, "grad_norm": 0.9757904487851846, "learning_rate": 3.720924761423812e-05, "loss": 0.2213, "step": 12058 }, { "epoch": 1.4299774694651963, "grad_norm": 1.1808199419205503, "learning_rate": 3.720715291470854e-05, "loss": 0.284, "step": 12059 }, { "epoch": 1.4300960512273213, "grad_norm": 0.9896720965606722, "learning_rate": 3.720505810264399e-05, "loss": 0.2181, "step": 12060 }, { "epoch": 1.4302146329894463, "grad_norm": 0.902656262905738, "learning_rate": 3.7202963178063756e-05, "loss": 0.196, "step": 12061 }, { "epoch": 1.4303332147515713, "grad_norm": 1.5313211884433833, "learning_rate": 3.7200868140987174e-05, "loss": 0.3908, "step": 12062 }, { "epoch": 1.4304517965136962, "grad_norm": 0.8832586795729839, "learning_rate": 3.719877299143354e-05, "loss": 0.1637, "step": 12063 }, { "epoch": 1.4305703782758212, "grad_norm": 0.8820369992393825, "learning_rate": 3.719667772942219e-05, "loss": 0.1685, "step": 12064 }, { "epoch": 1.4306889600379462, "grad_norm": 1.226769331590938, "learning_rate": 3.719458235497242e-05, "loss": 0.2894, "step": 12065 }, { "epoch": 1.4308075418000712, "grad_norm": 0.941602467711519, "learning_rate": 3.719248686810356e-05, "loss": 0.2124, "step": 12066 }, { "epoch": 1.4309261235621962, "grad_norm": 1.0679796915360902, "learning_rate": 3.719039126883492e-05, "loss": 0.2658, "step": 12067 }, { "epoch": 1.4310447053243212, "grad_norm": 0.9874963930411335, "learning_rate": 3.718829555718581e-05, "loss": 0.2346, "step": 12068 }, { "epoch": 1.4311632870864461, "grad_norm": 1.2077980024789088, "learning_rate": 3.718619973317558e-05, "loss": 0.278, "step": 12069 }, { "epoch": 1.4312818688485711, "grad_norm": 1.1387238459889275, "learning_rate": 3.718410379682352e-05, "loss": 0.2823, "step": 12070 }, { "epoch": 1.431400450610696, "grad_norm": 1.1477317644925151, "learning_rate": 3.7182007748148976e-05, "loss": 0.2664, "step": 12071 }, { "epoch": 1.431519032372821, "grad_norm": 0.9758634059137739, "learning_rate": 3.717991158717125e-05, "loss": 0.1765, "step": 12072 }, { "epoch": 1.431637614134946, "grad_norm": 1.1430747434393123, "learning_rate": 3.717781531390968e-05, "loss": 0.1979, "step": 12073 }, { "epoch": 1.431756195897071, "grad_norm": 0.8340528075254353, "learning_rate": 3.7175718928383584e-05, "loss": 0.1669, "step": 12074 }, { "epoch": 1.431874777659196, "grad_norm": 1.4950489133715994, "learning_rate": 3.7173622430612285e-05, "loss": 0.2624, "step": 12075 }, { "epoch": 1.431993359421321, "grad_norm": 0.9523535657562556, "learning_rate": 3.7171525820615124e-05, "loss": 0.2081, "step": 12076 }, { "epoch": 1.432111941183446, "grad_norm": 1.273336048344334, "learning_rate": 3.7169429098411415e-05, "loss": 0.2601, "step": 12077 }, { "epoch": 1.432230522945571, "grad_norm": 1.3605923416599812, "learning_rate": 3.7167332264020495e-05, "loss": 0.2948, "step": 12078 }, { "epoch": 1.432349104707696, "grad_norm": 1.3030253022437912, "learning_rate": 3.716523531746169e-05, "loss": 0.276, "step": 12079 }, { "epoch": 1.432467686469821, "grad_norm": 1.255223187864126, "learning_rate": 3.7163138258754336e-05, "loss": 0.2632, "step": 12080 }, { "epoch": 1.432586268231946, "grad_norm": 0.8071621326685432, "learning_rate": 3.7161041087917755e-05, "loss": 0.171, "step": 12081 }, { "epoch": 1.4327048499940709, "grad_norm": 0.912067898598135, "learning_rate": 3.71589438049713e-05, "loss": 0.1963, "step": 12082 }, { "epoch": 1.4328234317561959, "grad_norm": 0.8013158887921227, "learning_rate": 3.715684640993429e-05, "loss": 0.2068, "step": 12083 }, { "epoch": 1.4329420135183208, "grad_norm": 1.2304630901654887, "learning_rate": 3.715474890282606e-05, "loss": 0.267, "step": 12084 }, { "epoch": 1.4330605952804458, "grad_norm": 1.7256209193966836, "learning_rate": 3.7152651283665954e-05, "loss": 0.2943, "step": 12085 }, { "epoch": 1.4331791770425708, "grad_norm": 0.9879517642608836, "learning_rate": 3.71505535524733e-05, "loss": 0.2043, "step": 12086 }, { "epoch": 1.4332977588046958, "grad_norm": 0.967355083607919, "learning_rate": 3.714845570926745e-05, "loss": 0.175, "step": 12087 }, { "epoch": 1.4334163405668208, "grad_norm": 0.9073698796585046, "learning_rate": 3.714635775406773e-05, "loss": 0.2001, "step": 12088 }, { "epoch": 1.4335349223289457, "grad_norm": 1.2170088208869456, "learning_rate": 3.714425968689349e-05, "loss": 0.2062, "step": 12089 }, { "epoch": 1.4336535040910707, "grad_norm": 0.8362130324932386, "learning_rate": 3.714216150776407e-05, "loss": 0.2138, "step": 12090 }, { "epoch": 1.4337720858531957, "grad_norm": 0.9977982798124712, "learning_rate": 3.71400632166988e-05, "loss": 0.1902, "step": 12091 }, { "epoch": 1.4338906676153207, "grad_norm": 0.9574272309094275, "learning_rate": 3.7137964813717045e-05, "loss": 0.1756, "step": 12092 }, { "epoch": 1.4340092493774457, "grad_norm": 1.076599308778476, "learning_rate": 3.7135866298838135e-05, "loss": 0.2802, "step": 12093 }, { "epoch": 1.4341278311395707, "grad_norm": 0.8566292566126845, "learning_rate": 3.713376767208142e-05, "loss": 0.182, "step": 12094 }, { "epoch": 1.4342464129016959, "grad_norm": 0.8252179342639918, "learning_rate": 3.7131668933466244e-05, "loss": 0.161, "step": 12095 }, { "epoch": 1.4343649946638206, "grad_norm": 0.9479811908598932, "learning_rate": 3.712957008301196e-05, "loss": 0.1987, "step": 12096 }, { "epoch": 1.4344835764259458, "grad_norm": 1.4195736003464925, "learning_rate": 3.712747112073791e-05, "loss": 0.3094, "step": 12097 }, { "epoch": 1.4346021581880706, "grad_norm": 1.0737092730814193, "learning_rate": 3.712537204666345e-05, "loss": 0.2181, "step": 12098 }, { "epoch": 1.4347207399501958, "grad_norm": 0.9343344956956153, "learning_rate": 3.712327286080793e-05, "loss": 0.2673, "step": 12099 }, { "epoch": 1.4348393217123205, "grad_norm": 1.0676425421543592, "learning_rate": 3.71211735631907e-05, "loss": 0.2236, "step": 12100 }, { "epoch": 1.4349579034744457, "grad_norm": 1.0374595375475222, "learning_rate": 3.711907415383112e-05, "loss": 0.2179, "step": 12101 }, { "epoch": 1.4350764852365705, "grad_norm": 1.440455703086093, "learning_rate": 3.711697463274853e-05, "loss": 0.2994, "step": 12102 }, { "epoch": 1.4351950669986957, "grad_norm": 0.9486640997205259, "learning_rate": 3.71148749999623e-05, "loss": 0.1957, "step": 12103 }, { "epoch": 1.4353136487608205, "grad_norm": 1.0335167654517876, "learning_rate": 3.7112775255491774e-05, "loss": 0.2364, "step": 12104 }, { "epoch": 1.4354322305229457, "grad_norm": 1.1417684487106745, "learning_rate": 3.711067539935632e-05, "loss": 0.2824, "step": 12105 }, { "epoch": 1.4355508122850704, "grad_norm": 1.1466950780223135, "learning_rate": 3.710857543157528e-05, "loss": 0.2931, "step": 12106 }, { "epoch": 1.4356693940471956, "grad_norm": 1.099878433542885, "learning_rate": 3.710647535216802e-05, "loss": 0.229, "step": 12107 }, { "epoch": 1.4357879758093206, "grad_norm": 1.0674591836765452, "learning_rate": 3.7104375161153915e-05, "loss": 0.249, "step": 12108 }, { "epoch": 1.4359065575714456, "grad_norm": 1.1641382150649549, "learning_rate": 3.710227485855231e-05, "loss": 0.2275, "step": 12109 }, { "epoch": 1.4360251393335706, "grad_norm": 1.1939079148452676, "learning_rate": 3.710017444438257e-05, "loss": 0.2247, "step": 12110 }, { "epoch": 1.4361437210956955, "grad_norm": 0.9780877309967877, "learning_rate": 3.709807391866406e-05, "loss": 0.2012, "step": 12111 }, { "epoch": 1.4362623028578205, "grad_norm": 1.1015832834922257, "learning_rate": 3.7095973281416144e-05, "loss": 0.2087, "step": 12112 }, { "epoch": 1.4363808846199455, "grad_norm": 0.9589659253628081, "learning_rate": 3.709387253265819e-05, "loss": 0.2327, "step": 12113 }, { "epoch": 1.4364994663820705, "grad_norm": 1.2413148164399888, "learning_rate": 3.709177167240957e-05, "loss": 0.2332, "step": 12114 }, { "epoch": 1.4366180481441955, "grad_norm": 1.243872161695892, "learning_rate": 3.7089670700689625e-05, "loss": 0.2474, "step": 12115 }, { "epoch": 1.4367366299063205, "grad_norm": 0.8438765659915752, "learning_rate": 3.7087569617517744e-05, "loss": 0.1705, "step": 12116 }, { "epoch": 1.4368552116684454, "grad_norm": 2.5432911230684123, "learning_rate": 3.7085468422913307e-05, "loss": 0.2623, "step": 12117 }, { "epoch": 1.4369737934305704, "grad_norm": 0.9779322037913933, "learning_rate": 3.7083367116895664e-05, "loss": 0.187, "step": 12118 }, { "epoch": 1.4370923751926954, "grad_norm": 0.9398953143023279, "learning_rate": 3.708126569948419e-05, "loss": 0.1888, "step": 12119 }, { "epoch": 1.4372109569548204, "grad_norm": 1.1097102363172018, "learning_rate": 3.7079164170698264e-05, "loss": 0.2008, "step": 12120 }, { "epoch": 1.4373295387169454, "grad_norm": 1.2990870377951567, "learning_rate": 3.707706253055726e-05, "loss": 0.2972, "step": 12121 }, { "epoch": 1.4374481204790703, "grad_norm": 0.9153478521513964, "learning_rate": 3.7074960779080546e-05, "loss": 0.2001, "step": 12122 }, { "epoch": 1.4375667022411953, "grad_norm": 1.1701098423406637, "learning_rate": 3.707285891628751e-05, "loss": 0.2556, "step": 12123 }, { "epoch": 1.4376852840033203, "grad_norm": 1.104624130232348, "learning_rate": 3.70707569421975e-05, "loss": 0.2044, "step": 12124 }, { "epoch": 1.4378038657654453, "grad_norm": 0.9976465637280325, "learning_rate": 3.7068654856829934e-05, "loss": 0.2025, "step": 12125 }, { "epoch": 1.4379224475275703, "grad_norm": 1.9406724457452853, "learning_rate": 3.7066552660204154e-05, "loss": 0.4404, "step": 12126 }, { "epoch": 1.4380410292896952, "grad_norm": 1.2948387207019898, "learning_rate": 3.7064450352339564e-05, "loss": 0.2187, "step": 12127 }, { "epoch": 1.4381596110518202, "grad_norm": 1.344198570613, "learning_rate": 3.706234793325553e-05, "loss": 0.2131, "step": 12128 }, { "epoch": 1.4382781928139452, "grad_norm": 0.9461295052049635, "learning_rate": 3.706024540297145e-05, "loss": 0.2024, "step": 12129 }, { "epoch": 1.4383967745760702, "grad_norm": 1.4303000697114605, "learning_rate": 3.705814276150669e-05, "loss": 0.2768, "step": 12130 }, { "epoch": 1.4385153563381952, "grad_norm": 0.9954608709565025, "learning_rate": 3.705604000888064e-05, "loss": 0.2413, "step": 12131 }, { "epoch": 1.4386339381003201, "grad_norm": 1.4898175901826856, "learning_rate": 3.7053937145112684e-05, "loss": 0.3167, "step": 12132 }, { "epoch": 1.4387525198624451, "grad_norm": 0.8421325585217315, "learning_rate": 3.705183417022222e-05, "loss": 0.1846, "step": 12133 }, { "epoch": 1.43887110162457, "grad_norm": 1.6607221510599175, "learning_rate": 3.7049731084228614e-05, "loss": 0.4252, "step": 12134 }, { "epoch": 1.438989683386695, "grad_norm": 1.5244514240362093, "learning_rate": 3.704762788715126e-05, "loss": 0.384, "step": 12135 }, { "epoch": 1.43910826514882, "grad_norm": 1.2518813594184361, "learning_rate": 3.704552457900955e-05, "loss": 0.2263, "step": 12136 }, { "epoch": 1.439226846910945, "grad_norm": 0.9071037063793584, "learning_rate": 3.704342115982288e-05, "loss": 0.2178, "step": 12137 }, { "epoch": 1.43934542867307, "grad_norm": 1.0339193705580532, "learning_rate": 3.704131762961064e-05, "loss": 0.2015, "step": 12138 }, { "epoch": 1.439464010435195, "grad_norm": 1.1621741118449682, "learning_rate": 3.703921398839221e-05, "loss": 0.2267, "step": 12139 }, { "epoch": 1.43958259219732, "grad_norm": 0.8175945400536053, "learning_rate": 3.7037110236187e-05, "loss": 0.1566, "step": 12140 }, { "epoch": 1.439701173959445, "grad_norm": 0.8640591755079933, "learning_rate": 3.703500637301438e-05, "loss": 0.1978, "step": 12141 }, { "epoch": 1.43981975572157, "grad_norm": 1.2049961743000681, "learning_rate": 3.7032902398893776e-05, "loss": 0.2401, "step": 12142 }, { "epoch": 1.439938337483695, "grad_norm": 0.9113250673839405, "learning_rate": 3.703079831384456e-05, "loss": 0.1918, "step": 12143 }, { "epoch": 1.44005691924582, "grad_norm": 1.1132092973719285, "learning_rate": 3.702869411788613e-05, "loss": 0.2279, "step": 12144 }, { "epoch": 1.440175501007945, "grad_norm": 0.9315445828953833, "learning_rate": 3.70265898110379e-05, "loss": 0.2287, "step": 12145 }, { "epoch": 1.44029408277007, "grad_norm": 1.5793747923873895, "learning_rate": 3.702448539331925e-05, "loss": 0.3726, "step": 12146 }, { "epoch": 1.4404126645321949, "grad_norm": 1.0579605258454343, "learning_rate": 3.70223808647496e-05, "loss": 0.2096, "step": 12147 }, { "epoch": 1.44053124629432, "grad_norm": 0.8416604090059927, "learning_rate": 3.702027622534834e-05, "loss": 0.1974, "step": 12148 }, { "epoch": 1.4406498280564448, "grad_norm": 1.6696086177691976, "learning_rate": 3.701817147513487e-05, "loss": 0.2514, "step": 12149 }, { "epoch": 1.44076840981857, "grad_norm": 1.3720970327870996, "learning_rate": 3.70160666141286e-05, "loss": 0.4278, "step": 12150 }, { "epoch": 1.4408869915806948, "grad_norm": 1.2912701658261259, "learning_rate": 3.701396164234893e-05, "loss": 0.3175, "step": 12151 }, { "epoch": 1.44100557334282, "grad_norm": 0.8155265666240815, "learning_rate": 3.7011856559815266e-05, "loss": 0.2086, "step": 12152 }, { "epoch": 1.4411241551049447, "grad_norm": 1.087376244295926, "learning_rate": 3.7009751366547015e-05, "loss": 0.2277, "step": 12153 }, { "epoch": 1.44124273686707, "grad_norm": 1.1233840498441974, "learning_rate": 3.7007646062563575e-05, "loss": 0.264, "step": 12154 }, { "epoch": 1.4413613186291947, "grad_norm": 1.0138217564956953, "learning_rate": 3.7005540647884374e-05, "loss": 0.208, "step": 12155 }, { "epoch": 1.44147990039132, "grad_norm": 0.9935043427247249, "learning_rate": 3.7003435122528806e-05, "loss": 0.2126, "step": 12156 }, { "epoch": 1.4415984821534449, "grad_norm": 1.4618013413565982, "learning_rate": 3.7001329486516284e-05, "loss": 0.3621, "step": 12157 }, { "epoch": 1.4417170639155699, "grad_norm": 1.717423872743809, "learning_rate": 3.699922373986622e-05, "loss": 0.3687, "step": 12158 }, { "epoch": 1.4418356456776948, "grad_norm": 0.8443764757110154, "learning_rate": 3.6997117882598036e-05, "loss": 0.1598, "step": 12159 }, { "epoch": 1.4419542274398198, "grad_norm": 1.3657522143309473, "learning_rate": 3.699501191473113e-05, "loss": 0.1981, "step": 12160 }, { "epoch": 1.4420728092019448, "grad_norm": 1.4337418602248349, "learning_rate": 3.699290583628493e-05, "loss": 0.2523, "step": 12161 }, { "epoch": 1.4421913909640698, "grad_norm": 1.0364865255841373, "learning_rate": 3.6990799647278837e-05, "loss": 0.2144, "step": 12162 }, { "epoch": 1.4423099727261948, "grad_norm": 1.7138358209049196, "learning_rate": 3.698869334773228e-05, "loss": 0.3402, "step": 12163 }, { "epoch": 1.4424285544883197, "grad_norm": 0.9941980350224968, "learning_rate": 3.698658693766467e-05, "loss": 0.2237, "step": 12164 }, { "epoch": 1.4425471362504447, "grad_norm": 1.649175940162588, "learning_rate": 3.698448041709543e-05, "loss": 0.3393, "step": 12165 }, { "epoch": 1.4426657180125697, "grad_norm": 0.9749192857771277, "learning_rate": 3.698237378604397e-05, "loss": 0.2181, "step": 12166 }, { "epoch": 1.4427842997746947, "grad_norm": 1.0291389934138202, "learning_rate": 3.698026704452972e-05, "loss": 0.2583, "step": 12167 }, { "epoch": 1.4429028815368197, "grad_norm": 2.1383302194127616, "learning_rate": 3.697816019257211e-05, "loss": 0.3335, "step": 12168 }, { "epoch": 1.4430214632989447, "grad_norm": 1.0626168768887325, "learning_rate": 3.697605323019054e-05, "loss": 0.2396, "step": 12169 }, { "epoch": 1.4431400450610696, "grad_norm": 1.5120342182606679, "learning_rate": 3.697394615740445e-05, "loss": 0.3272, "step": 12170 }, { "epoch": 1.4432586268231946, "grad_norm": 0.9601011709659812, "learning_rate": 3.6971838974233255e-05, "loss": 0.1828, "step": 12171 }, { "epoch": 1.4433772085853196, "grad_norm": 1.1148842238749943, "learning_rate": 3.696973168069639e-05, "loss": 0.2987, "step": 12172 }, { "epoch": 1.4434957903474446, "grad_norm": 1.2008868772250658, "learning_rate": 3.6967624276813275e-05, "loss": 0.2405, "step": 12173 }, { "epoch": 1.4436143721095696, "grad_norm": 0.922807572795794, "learning_rate": 3.696551676260334e-05, "loss": 0.1468, "step": 12174 }, { "epoch": 1.4437329538716945, "grad_norm": 1.0161915066318543, "learning_rate": 3.696340913808601e-05, "loss": 0.2165, "step": 12175 }, { "epoch": 1.4438515356338195, "grad_norm": 0.9652384465093231, "learning_rate": 3.6961301403280725e-05, "loss": 0.2026, "step": 12176 }, { "epoch": 1.4439701173959445, "grad_norm": 1.2685258524774872, "learning_rate": 3.695919355820691e-05, "loss": 0.2296, "step": 12177 }, { "epoch": 1.4440886991580695, "grad_norm": 1.2943352977546778, "learning_rate": 3.695708560288399e-05, "loss": 0.2132, "step": 12178 }, { "epoch": 1.4442072809201945, "grad_norm": 1.142763323018037, "learning_rate": 3.69549775373314e-05, "loss": 0.2621, "step": 12179 }, { "epoch": 1.4443258626823194, "grad_norm": 0.8225918950128126, "learning_rate": 3.695286936156859e-05, "loss": 0.1497, "step": 12180 }, { "epoch": 1.4444444444444444, "grad_norm": 1.5620511589821982, "learning_rate": 3.695076107561497e-05, "loss": 0.274, "step": 12181 }, { "epoch": 1.4445630262065694, "grad_norm": 1.2069869313018584, "learning_rate": 3.694865267948999e-05, "loss": 0.3309, "step": 12182 }, { "epoch": 1.4446816079686944, "grad_norm": 0.9333597906966883, "learning_rate": 3.694654417321309e-05, "loss": 0.1859, "step": 12183 }, { "epoch": 1.4448001897308194, "grad_norm": 1.055216022500823, "learning_rate": 3.69444355568037e-05, "loss": 0.1933, "step": 12184 }, { "epoch": 1.4449187714929443, "grad_norm": 0.9745454730887769, "learning_rate": 3.694232683028126e-05, "loss": 0.1879, "step": 12185 }, { "epoch": 1.4450373532550693, "grad_norm": 1.512631232716978, "learning_rate": 3.694021799366521e-05, "loss": 0.3394, "step": 12186 }, { "epoch": 1.4451559350171943, "grad_norm": 0.9395034124201209, "learning_rate": 3.6938109046975e-05, "loss": 0.2074, "step": 12187 }, { "epoch": 1.4452745167793193, "grad_norm": 0.9503346359768939, "learning_rate": 3.693599999023006e-05, "loss": 0.2272, "step": 12188 }, { "epoch": 1.4453930985414443, "grad_norm": 1.2195741660527957, "learning_rate": 3.693389082344984e-05, "loss": 0.2158, "step": 12189 }, { "epoch": 1.4455116803035692, "grad_norm": 1.1719409108533285, "learning_rate": 3.693178154665377e-05, "loss": 0.2581, "step": 12190 }, { "epoch": 1.4456302620656942, "grad_norm": 0.9129462030887218, "learning_rate": 3.692967215986132e-05, "loss": 0.2131, "step": 12191 }, { "epoch": 1.4457488438278192, "grad_norm": 1.0619936866245898, "learning_rate": 3.692756266309191e-05, "loss": 0.2615, "step": 12192 }, { "epoch": 1.4458674255899442, "grad_norm": 0.959478265348533, "learning_rate": 3.6925453056365e-05, "loss": 0.1997, "step": 12193 }, { "epoch": 1.4459860073520692, "grad_norm": 1.1172310299964452, "learning_rate": 3.6923343339700046e-05, "loss": 0.2481, "step": 12194 }, { "epoch": 1.4461045891141944, "grad_norm": 0.9074343287218414, "learning_rate": 3.692123351311648e-05, "loss": 0.1866, "step": 12195 }, { "epoch": 1.4462231708763191, "grad_norm": 1.2495599752376991, "learning_rate": 3.691912357663376e-05, "loss": 0.257, "step": 12196 }, { "epoch": 1.4463417526384443, "grad_norm": 0.7445929078642342, "learning_rate": 3.691701353027133e-05, "loss": 0.1257, "step": 12197 }, { "epoch": 1.446460334400569, "grad_norm": 1.1957838421968132, "learning_rate": 3.691490337404867e-05, "loss": 0.2839, "step": 12198 }, { "epoch": 1.4465789161626943, "grad_norm": 1.464660161213013, "learning_rate": 3.6912793107985185e-05, "loss": 0.3276, "step": 12199 }, { "epoch": 1.446697497924819, "grad_norm": 1.5558596614619342, "learning_rate": 3.691068273210038e-05, "loss": 0.3633, "step": 12200 }, { "epoch": 1.4468160796869443, "grad_norm": 0.9478738088155142, "learning_rate": 3.690857224641367e-05, "loss": 0.2722, "step": 12201 }, { "epoch": 1.446934661449069, "grad_norm": 0.8906842293252789, "learning_rate": 3.690646165094454e-05, "loss": 0.1762, "step": 12202 }, { "epoch": 1.4470532432111942, "grad_norm": 0.9707756252811277, "learning_rate": 3.690435094571242e-05, "loss": 0.2148, "step": 12203 }, { "epoch": 1.447171824973319, "grad_norm": 0.9978080648395476, "learning_rate": 3.69022401307368e-05, "loss": 0.2074, "step": 12204 }, { "epoch": 1.4472904067354442, "grad_norm": 1.1277481395581863, "learning_rate": 3.690012920603711e-05, "loss": 0.3017, "step": 12205 }, { "epoch": 1.447408988497569, "grad_norm": 1.0921880466961518, "learning_rate": 3.689801817163282e-05, "loss": 0.2651, "step": 12206 }, { "epoch": 1.4475275702596941, "grad_norm": 0.9383297844217009, "learning_rate": 3.689590702754339e-05, "loss": 0.1757, "step": 12207 }, { "epoch": 1.4476461520218191, "grad_norm": 1.1426513218377106, "learning_rate": 3.6893795773788296e-05, "loss": 0.2813, "step": 12208 }, { "epoch": 1.447764733783944, "grad_norm": 1.4067435698825275, "learning_rate": 3.689168441038698e-05, "loss": 0.222, "step": 12209 }, { "epoch": 1.447883315546069, "grad_norm": 0.9918571098161546, "learning_rate": 3.6889572937358925e-05, "loss": 0.2544, "step": 12210 }, { "epoch": 1.448001897308194, "grad_norm": 1.0255107367985967, "learning_rate": 3.6887461354723584e-05, "loss": 0.1987, "step": 12211 }, { "epoch": 1.448120479070319, "grad_norm": 0.8716597002361176, "learning_rate": 3.688534966250042e-05, "loss": 0.2007, "step": 12212 }, { "epoch": 1.448239060832444, "grad_norm": 1.2248894428232535, "learning_rate": 3.688323786070892e-05, "loss": 0.2144, "step": 12213 }, { "epoch": 1.448357642594569, "grad_norm": 1.2035141977478596, "learning_rate": 3.688112594936853e-05, "loss": 0.2721, "step": 12214 }, { "epoch": 1.448476224356694, "grad_norm": 1.1950343904592842, "learning_rate": 3.687901392849873e-05, "loss": 0.2582, "step": 12215 }, { "epoch": 1.448594806118819, "grad_norm": 0.9655834022501604, "learning_rate": 3.687690179811899e-05, "loss": 0.1977, "step": 12216 }, { "epoch": 1.448713387880944, "grad_norm": 0.895054909691713, "learning_rate": 3.687478955824878e-05, "loss": 0.1688, "step": 12217 }, { "epoch": 1.448831969643069, "grad_norm": 0.9687424792982577, "learning_rate": 3.687267720890757e-05, "loss": 0.2351, "step": 12218 }, { "epoch": 1.448950551405194, "grad_norm": 0.8549193893216772, "learning_rate": 3.687056475011484e-05, "loss": 0.2171, "step": 12219 }, { "epoch": 1.449069133167319, "grad_norm": 1.5131787628528983, "learning_rate": 3.6868452181890056e-05, "loss": 0.3021, "step": 12220 }, { "epoch": 1.4491877149294439, "grad_norm": 0.982503211298969, "learning_rate": 3.68663395042527e-05, "loss": 0.1963, "step": 12221 }, { "epoch": 1.4493062966915689, "grad_norm": 0.9589542043067245, "learning_rate": 3.686422671722224e-05, "loss": 0.2101, "step": 12222 }, { "epoch": 1.4494248784536938, "grad_norm": 0.9420526271178044, "learning_rate": 3.686211382081816e-05, "loss": 0.2201, "step": 12223 }, { "epoch": 1.4495434602158188, "grad_norm": 1.023615929317855, "learning_rate": 3.6860000815059936e-05, "loss": 0.207, "step": 12224 }, { "epoch": 1.4496620419779438, "grad_norm": 1.3800051521562446, "learning_rate": 3.6857887699967044e-05, "loss": 0.286, "step": 12225 }, { "epoch": 1.4497806237400688, "grad_norm": 1.276240733579864, "learning_rate": 3.685577447555898e-05, "loss": 0.3147, "step": 12226 }, { "epoch": 1.4498992055021938, "grad_norm": 0.9455233249549477, "learning_rate": 3.6853661141855206e-05, "loss": 0.2185, "step": 12227 }, { "epoch": 1.4500177872643187, "grad_norm": 1.2079810297883649, "learning_rate": 3.6851547698875216e-05, "loss": 0.2528, "step": 12228 }, { "epoch": 1.4501363690264437, "grad_norm": 1.1117682427973303, "learning_rate": 3.6849434146638486e-05, "loss": 0.2328, "step": 12229 }, { "epoch": 1.4502549507885687, "grad_norm": 1.2394147393109731, "learning_rate": 3.684732048516451e-05, "loss": 0.2592, "step": 12230 }, { "epoch": 1.4503735325506937, "grad_norm": 0.8979086673441969, "learning_rate": 3.684520671447276e-05, "loss": 0.1723, "step": 12231 }, { "epoch": 1.4504921143128187, "grad_norm": 1.0650155506413088, "learning_rate": 3.684309283458274e-05, "loss": 0.257, "step": 12232 }, { "epoch": 1.4506106960749436, "grad_norm": 0.6899012513926602, "learning_rate": 3.6840978845513914e-05, "loss": 0.1704, "step": 12233 }, { "epoch": 1.4507292778370686, "grad_norm": 1.0380791173135553, "learning_rate": 3.683886474728579e-05, "loss": 0.2231, "step": 12234 }, { "epoch": 1.4508478595991936, "grad_norm": 2.2284520112955897, "learning_rate": 3.6836750539917845e-05, "loss": 0.4795, "step": 12235 }, { "epoch": 1.4509664413613186, "grad_norm": 1.0644428517785116, "learning_rate": 3.6834636223429585e-05, "loss": 0.2043, "step": 12236 }, { "epoch": 1.4510850231234436, "grad_norm": 1.1183803323134953, "learning_rate": 3.6832521797840487e-05, "loss": 0.2395, "step": 12237 }, { "epoch": 1.4512036048855685, "grad_norm": 1.4565152055620314, "learning_rate": 3.6830407263170045e-05, "loss": 0.2995, "step": 12238 }, { "epoch": 1.4513221866476935, "grad_norm": 1.2251968985606962, "learning_rate": 3.682829261943776e-05, "loss": 0.334, "step": 12239 }, { "epoch": 1.4514407684098185, "grad_norm": 1.2502312484779226, "learning_rate": 3.682617786666312e-05, "loss": 0.2735, "step": 12240 }, { "epoch": 1.4515593501719435, "grad_norm": 0.8235032361825881, "learning_rate": 3.682406300486562e-05, "loss": 0.2104, "step": 12241 }, { "epoch": 1.4516779319340685, "grad_norm": 1.435215667390115, "learning_rate": 3.6821948034064763e-05, "loss": 0.3475, "step": 12242 }, { "epoch": 1.4517965136961934, "grad_norm": 1.25876237141467, "learning_rate": 3.681983295428004e-05, "loss": 0.2934, "step": 12243 }, { "epoch": 1.4519150954583184, "grad_norm": 1.0084153803362628, "learning_rate": 3.6817717765530954e-05, "loss": 0.2319, "step": 12244 }, { "epoch": 1.4520336772204434, "grad_norm": 0.6738907653291393, "learning_rate": 3.6815602467837e-05, "loss": 0.1469, "step": 12245 }, { "epoch": 1.4521522589825686, "grad_norm": 0.8893729620002367, "learning_rate": 3.681348706121768e-05, "loss": 0.1898, "step": 12246 }, { "epoch": 1.4522708407446934, "grad_norm": 1.0941492495264937, "learning_rate": 3.68113715456925e-05, "loss": 0.2736, "step": 12247 }, { "epoch": 1.4523894225068186, "grad_norm": 0.8828198699332169, "learning_rate": 3.680925592128095e-05, "loss": 0.2152, "step": 12248 }, { "epoch": 1.4525080042689433, "grad_norm": 1.0783744818660763, "learning_rate": 3.6807140188002545e-05, "loss": 0.2719, "step": 12249 }, { "epoch": 1.4526265860310685, "grad_norm": 1.0956960076314506, "learning_rate": 3.680502434587679e-05, "loss": 0.2181, "step": 12250 }, { "epoch": 1.4527451677931933, "grad_norm": 0.8558948218197985, "learning_rate": 3.6802908394923184e-05, "loss": 0.2197, "step": 12251 }, { "epoch": 1.4528637495553185, "grad_norm": 0.9869588028573135, "learning_rate": 3.680079233516124e-05, "loss": 0.2144, "step": 12252 }, { "epoch": 1.4529823313174433, "grad_norm": 1.1830935856323397, "learning_rate": 3.679867616661046e-05, "loss": 0.2112, "step": 12253 }, { "epoch": 1.4531009130795685, "grad_norm": 1.1187869552407637, "learning_rate": 3.679655988929035e-05, "loss": 0.2699, "step": 12254 }, { "epoch": 1.4532194948416932, "grad_norm": 0.9987588997600297, "learning_rate": 3.679444350322043e-05, "loss": 0.243, "step": 12255 }, { "epoch": 1.4533380766038184, "grad_norm": 1.1012858111109607, "learning_rate": 3.6792327008420196e-05, "loss": 0.2859, "step": 12256 }, { "epoch": 1.4534566583659434, "grad_norm": 1.0674056585774692, "learning_rate": 3.679021040490917e-05, "loss": 0.2445, "step": 12257 }, { "epoch": 1.4535752401280684, "grad_norm": 0.8539995164157435, "learning_rate": 3.678809369270687e-05, "loss": 0.1604, "step": 12258 }, { "epoch": 1.4536938218901934, "grad_norm": 0.9160646618215362, "learning_rate": 3.6785976871832795e-05, "loss": 0.1973, "step": 12259 }, { "epoch": 1.4538124036523183, "grad_norm": 1.0344879301339405, "learning_rate": 3.678385994230647e-05, "loss": 0.2065, "step": 12260 }, { "epoch": 1.4539309854144433, "grad_norm": 1.081235154701906, "learning_rate": 3.6781742904147405e-05, "loss": 0.1804, "step": 12261 }, { "epoch": 1.4540495671765683, "grad_norm": 0.8614960220176746, "learning_rate": 3.677962575737512e-05, "loss": 0.1373, "step": 12262 }, { "epoch": 1.4541681489386933, "grad_norm": 0.9684639090837732, "learning_rate": 3.677750850200912e-05, "loss": 0.172, "step": 12263 }, { "epoch": 1.4542867307008183, "grad_norm": 0.8555663981142602, "learning_rate": 3.677539113806894e-05, "loss": 0.1567, "step": 12264 }, { "epoch": 1.4544053124629432, "grad_norm": 1.0638556910569572, "learning_rate": 3.67732736655741e-05, "loss": 0.1807, "step": 12265 }, { "epoch": 1.4545238942250682, "grad_norm": 1.1661544619865754, "learning_rate": 3.677115608454411e-05, "loss": 0.228, "step": 12266 }, { "epoch": 1.4546424759871932, "grad_norm": 1.1994229884248102, "learning_rate": 3.67690383949985e-05, "loss": 0.215, "step": 12267 }, { "epoch": 1.4547610577493182, "grad_norm": 0.9827556850296882, "learning_rate": 3.676692059695678e-05, "loss": 0.2167, "step": 12268 }, { "epoch": 1.4548796395114432, "grad_norm": 2.0313707788650004, "learning_rate": 3.6764802690438486e-05, "loss": 0.4534, "step": 12269 }, { "epoch": 1.4549982212735681, "grad_norm": 1.279833434244065, "learning_rate": 3.676268467546314e-05, "loss": 0.2403, "step": 12270 }, { "epoch": 1.4551168030356931, "grad_norm": 0.8584872107195795, "learning_rate": 3.676056655205026e-05, "loss": 0.1796, "step": 12271 }, { "epoch": 1.455235384797818, "grad_norm": 0.8262855633899494, "learning_rate": 3.6758448320219384e-05, "loss": 0.2049, "step": 12272 }, { "epoch": 1.455353966559943, "grad_norm": 0.745077299437538, "learning_rate": 3.6756329979990034e-05, "loss": 0.1832, "step": 12273 }, { "epoch": 1.455472548322068, "grad_norm": 1.2322572728066494, "learning_rate": 3.675421153138173e-05, "loss": 0.2364, "step": 12274 }, { "epoch": 1.455591130084193, "grad_norm": 1.4030297853775844, "learning_rate": 3.675209297441401e-05, "loss": 0.2384, "step": 12275 }, { "epoch": 1.455709711846318, "grad_norm": 0.8482227892171417, "learning_rate": 3.674997430910641e-05, "loss": 0.1692, "step": 12276 }, { "epoch": 1.455828293608443, "grad_norm": 1.1332947876105621, "learning_rate": 3.674785553547846e-05, "loss": 0.2697, "step": 12277 }, { "epoch": 1.455946875370568, "grad_norm": 1.0615378085733242, "learning_rate": 3.6745736653549685e-05, "loss": 0.2189, "step": 12278 }, { "epoch": 1.456065457132693, "grad_norm": 1.0399396867542103, "learning_rate": 3.674361766333962e-05, "loss": 0.2197, "step": 12279 }, { "epoch": 1.456184038894818, "grad_norm": 0.878744174154464, "learning_rate": 3.67414985648678e-05, "loss": 0.1667, "step": 12280 }, { "epoch": 1.456302620656943, "grad_norm": 1.0191861916385463, "learning_rate": 3.673937935815376e-05, "loss": 0.173, "step": 12281 }, { "epoch": 1.456421202419068, "grad_norm": 0.9447760219365109, "learning_rate": 3.673726004321704e-05, "loss": 0.2254, "step": 12282 }, { "epoch": 1.456539784181193, "grad_norm": 1.1989098086515755, "learning_rate": 3.673514062007718e-05, "loss": 0.2812, "step": 12283 }, { "epoch": 1.4566583659433179, "grad_norm": 1.0240422065439412, "learning_rate": 3.673302108875371e-05, "loss": 0.2159, "step": 12284 }, { "epoch": 1.4567769477054429, "grad_norm": 1.166623398453887, "learning_rate": 3.673090144926617e-05, "loss": 0.2418, "step": 12285 }, { "epoch": 1.4568955294675678, "grad_norm": 1.202779373678851, "learning_rate": 3.6728781701634105e-05, "loss": 0.2003, "step": 12286 }, { "epoch": 1.4570141112296928, "grad_norm": 0.9741255441323444, "learning_rate": 3.672666184587706e-05, "loss": 0.1938, "step": 12287 }, { "epoch": 1.4571326929918178, "grad_norm": 1.0691014345480012, "learning_rate": 3.672454188201457e-05, "loss": 0.17, "step": 12288 }, { "epoch": 1.4572512747539428, "grad_norm": 1.0022702158888146, "learning_rate": 3.672242181006618e-05, "loss": 0.2512, "step": 12289 }, { "epoch": 1.4573698565160678, "grad_norm": 0.9228886681320742, "learning_rate": 3.672030163005145e-05, "loss": 0.2221, "step": 12290 }, { "epoch": 1.4574884382781927, "grad_norm": 0.9365219805134268, "learning_rate": 3.6718181341989896e-05, "loss": 0.1697, "step": 12291 }, { "epoch": 1.4576070200403177, "grad_norm": 0.9647917952510675, "learning_rate": 3.671606094590108e-05, "loss": 0.2032, "step": 12292 }, { "epoch": 1.4577256018024427, "grad_norm": 0.9031688426784392, "learning_rate": 3.671394044180455e-05, "loss": 0.1359, "step": 12293 }, { "epoch": 1.4578441835645677, "grad_norm": 1.0630753365979604, "learning_rate": 3.671181982971986e-05, "loss": 0.2046, "step": 12294 }, { "epoch": 1.4579627653266929, "grad_norm": 1.2857304351284575, "learning_rate": 3.6709699109666544e-05, "loss": 0.2248, "step": 12295 }, { "epoch": 1.4580813470888176, "grad_norm": 1.2261079573542604, "learning_rate": 3.6707578281664165e-05, "loss": 0.2693, "step": 12296 }, { "epoch": 1.4581999288509429, "grad_norm": 1.0895434361857286, "learning_rate": 3.670545734573228e-05, "loss": 0.2157, "step": 12297 }, { "epoch": 1.4583185106130676, "grad_norm": 1.03110542776726, "learning_rate": 3.670333630189042e-05, "loss": 0.2504, "step": 12298 }, { "epoch": 1.4584370923751928, "grad_norm": 0.7865946870586522, "learning_rate": 3.670121515015817e-05, "loss": 0.1974, "step": 12299 }, { "epoch": 1.4585556741373176, "grad_norm": 1.093334756333083, "learning_rate": 3.669909389055504e-05, "loss": 0.2367, "step": 12300 }, { "epoch": 1.4586742558994428, "grad_norm": 1.266400971550583, "learning_rate": 3.669697252310063e-05, "loss": 0.256, "step": 12301 }, { "epoch": 1.4587928376615675, "grad_norm": 1.6086711997025454, "learning_rate": 3.669485104781447e-05, "loss": 0.2698, "step": 12302 }, { "epoch": 1.4589114194236927, "grad_norm": 1.710026487805769, "learning_rate": 3.6692729464716124e-05, "loss": 0.3453, "step": 12303 }, { "epoch": 1.4590300011858175, "grad_norm": 1.1283672705388401, "learning_rate": 3.6690607773825146e-05, "loss": 0.2484, "step": 12304 }, { "epoch": 1.4591485829479427, "grad_norm": 1.5585241653931248, "learning_rate": 3.6688485975161106e-05, "loss": 0.3343, "step": 12305 }, { "epoch": 1.4592671647100675, "grad_norm": 1.1926424195174943, "learning_rate": 3.668636406874356e-05, "loss": 0.2748, "step": 12306 }, { "epoch": 1.4593857464721927, "grad_norm": 1.6112903562850456, "learning_rate": 3.6684242054592065e-05, "loss": 0.3529, "step": 12307 }, { "epoch": 1.4595043282343176, "grad_norm": 0.9594574109864177, "learning_rate": 3.668211993272619e-05, "loss": 0.1908, "step": 12308 }, { "epoch": 1.4596229099964426, "grad_norm": 0.9786564694463649, "learning_rate": 3.667999770316549e-05, "loss": 0.2018, "step": 12309 }, { "epoch": 1.4597414917585676, "grad_norm": 1.0752999349573253, "learning_rate": 3.667787536592954e-05, "loss": 0.2195, "step": 12310 }, { "epoch": 1.4598600735206926, "grad_norm": 0.996378416062834, "learning_rate": 3.6675752921037885e-05, "loss": 0.1789, "step": 12311 }, { "epoch": 1.4599786552828176, "grad_norm": 1.0461935717462898, "learning_rate": 3.667363036851012e-05, "loss": 0.2966, "step": 12312 }, { "epoch": 1.4600972370449425, "grad_norm": 1.1289552026985015, "learning_rate": 3.66715077083658e-05, "loss": 0.261, "step": 12313 }, { "epoch": 1.4602158188070675, "grad_norm": 1.081670782459618, "learning_rate": 3.6669384940624485e-05, "loss": 0.2674, "step": 12314 }, { "epoch": 1.4603344005691925, "grad_norm": 1.0737624611227476, "learning_rate": 3.666726206530575e-05, "loss": 0.1915, "step": 12315 }, { "epoch": 1.4604529823313175, "grad_norm": 0.7112217217361511, "learning_rate": 3.666513908242917e-05, "loss": 0.1848, "step": 12316 }, { "epoch": 1.4605715640934425, "grad_norm": 1.3946090787650036, "learning_rate": 3.6663015992014304e-05, "loss": 0.2832, "step": 12317 }, { "epoch": 1.4606901458555674, "grad_norm": 0.8750720839595114, "learning_rate": 3.666089279408075e-05, "loss": 0.1729, "step": 12318 }, { "epoch": 1.4608087276176924, "grad_norm": 0.9794133992751233, "learning_rate": 3.6658769488648046e-05, "loss": 0.2006, "step": 12319 }, { "epoch": 1.4609273093798174, "grad_norm": 1.0574411839047053, "learning_rate": 3.6656646075735794e-05, "loss": 0.2228, "step": 12320 }, { "epoch": 1.4610458911419424, "grad_norm": 0.9551736295748933, "learning_rate": 3.6654522555363555e-05, "loss": 0.2023, "step": 12321 }, { "epoch": 1.4611644729040674, "grad_norm": 1.0311334964706225, "learning_rate": 3.665239892755092e-05, "loss": 0.2515, "step": 12322 }, { "epoch": 1.4612830546661923, "grad_norm": 0.9440407751634907, "learning_rate": 3.6650275192317444e-05, "loss": 0.2101, "step": 12323 }, { "epoch": 1.4614016364283173, "grad_norm": 1.6382732719086555, "learning_rate": 3.664815134968272e-05, "loss": 0.3546, "step": 12324 }, { "epoch": 1.4615202181904423, "grad_norm": 0.9855442659404148, "learning_rate": 3.6646027399666325e-05, "loss": 0.2102, "step": 12325 }, { "epoch": 1.4616387999525673, "grad_norm": 0.9557453989010988, "learning_rate": 3.6643903342287835e-05, "loss": 0.1812, "step": 12326 }, { "epoch": 1.4617573817146923, "grad_norm": 1.026413728146968, "learning_rate": 3.6641779177566845e-05, "loss": 0.1916, "step": 12327 }, { "epoch": 1.4618759634768173, "grad_norm": 1.2430076907617083, "learning_rate": 3.663965490552292e-05, "loss": 0.2072, "step": 12328 }, { "epoch": 1.4619945452389422, "grad_norm": 0.8484890847817763, "learning_rate": 3.663753052617565e-05, "loss": 0.1851, "step": 12329 }, { "epoch": 1.4621131270010672, "grad_norm": 1.2053906807278507, "learning_rate": 3.6635406039544615e-05, "loss": 0.2421, "step": 12330 }, { "epoch": 1.4622317087631922, "grad_norm": 0.9865983469896434, "learning_rate": 3.663328144564942e-05, "loss": 0.2419, "step": 12331 }, { "epoch": 1.4623502905253172, "grad_norm": 1.371945372084337, "learning_rate": 3.663115674450962e-05, "loss": 0.3365, "step": 12332 }, { "epoch": 1.4624688722874422, "grad_norm": 0.9382855609483034, "learning_rate": 3.662903193614483e-05, "loss": 0.2315, "step": 12333 }, { "epoch": 1.4625874540495671, "grad_norm": 0.9655609681419519, "learning_rate": 3.662690702057462e-05, "loss": 0.1742, "step": 12334 }, { "epoch": 1.4627060358116921, "grad_norm": 1.082640250176452, "learning_rate": 3.662478199781858e-05, "loss": 0.2733, "step": 12335 }, { "epoch": 1.462824617573817, "grad_norm": 1.0180068442918755, "learning_rate": 3.662265686789631e-05, "loss": 0.2141, "step": 12336 }, { "epoch": 1.462943199335942, "grad_norm": 1.2466113593646266, "learning_rate": 3.6620531630827406e-05, "loss": 0.2338, "step": 12337 }, { "epoch": 1.463061781098067, "grad_norm": 1.2288990134975681, "learning_rate": 3.6618406286631434e-05, "loss": 0.2546, "step": 12338 }, { "epoch": 1.463180362860192, "grad_norm": 1.3810247476561341, "learning_rate": 3.661628083532801e-05, "loss": 0.253, "step": 12339 }, { "epoch": 1.463298944622317, "grad_norm": 1.2576701873601877, "learning_rate": 3.661415527693672e-05, "loss": 0.2042, "step": 12340 }, { "epoch": 1.463417526384442, "grad_norm": 1.1644194151180964, "learning_rate": 3.6612029611477164e-05, "loss": 0.2428, "step": 12341 }, { "epoch": 1.463536108146567, "grad_norm": 1.2792061911871222, "learning_rate": 3.660990383896894e-05, "loss": 0.2157, "step": 12342 }, { "epoch": 1.463654689908692, "grad_norm": 0.9887839319547241, "learning_rate": 3.6607777959431627e-05, "loss": 0.2125, "step": 12343 }, { "epoch": 1.4637732716708172, "grad_norm": 1.4860402934677306, "learning_rate": 3.660565197288484e-05, "loss": 0.3007, "step": 12344 }, { "epoch": 1.463891853432942, "grad_norm": 1.1456724446227498, "learning_rate": 3.660352587934818e-05, "loss": 0.2405, "step": 12345 }, { "epoch": 1.4640104351950671, "grad_norm": 0.9784467126715762, "learning_rate": 3.660139967884123e-05, "loss": 0.2057, "step": 12346 }, { "epoch": 1.4641290169571919, "grad_norm": 1.320205151320214, "learning_rate": 3.659927337138361e-05, "loss": 0.2149, "step": 12347 }, { "epoch": 1.464247598719317, "grad_norm": 1.0695481181267392, "learning_rate": 3.659714695699491e-05, "loss": 0.1916, "step": 12348 }, { "epoch": 1.4643661804814418, "grad_norm": 1.1138435085770009, "learning_rate": 3.659502043569474e-05, "loss": 0.2591, "step": 12349 }, { "epoch": 1.464484762243567, "grad_norm": 0.9304644851497409, "learning_rate": 3.65928938075027e-05, "loss": 0.2156, "step": 12350 }, { "epoch": 1.4646033440056918, "grad_norm": 0.8843822234770843, "learning_rate": 3.659076707243839e-05, "loss": 0.214, "step": 12351 }, { "epoch": 1.464721925767817, "grad_norm": 0.9111461652187726, "learning_rate": 3.6588640230521426e-05, "loss": 0.2048, "step": 12352 }, { "epoch": 1.4648405075299418, "grad_norm": 1.0683412783813215, "learning_rate": 3.658651328177141e-05, "loss": 0.1711, "step": 12353 }, { "epoch": 1.464959089292067, "grad_norm": 0.9112118398928412, "learning_rate": 3.6584386226207945e-05, "loss": 0.206, "step": 12354 }, { "epoch": 1.4650776710541917, "grad_norm": 0.8646674524733492, "learning_rate": 3.658225906385064e-05, "loss": 0.1876, "step": 12355 }, { "epoch": 1.465196252816317, "grad_norm": 2.1749456004029994, "learning_rate": 3.658013179471912e-05, "loss": 0.4169, "step": 12356 }, { "epoch": 1.465314834578442, "grad_norm": 1.2208833430020722, "learning_rate": 3.657800441883298e-05, "loss": 0.2355, "step": 12357 }, { "epoch": 1.465433416340567, "grad_norm": 1.0744460969899985, "learning_rate": 3.657587693621184e-05, "loss": 0.2541, "step": 12358 }, { "epoch": 1.4655519981026919, "grad_norm": 1.1294849890644532, "learning_rate": 3.657374934687531e-05, "loss": 0.2452, "step": 12359 }, { "epoch": 1.4656705798648169, "grad_norm": 0.8792809647177741, "learning_rate": 3.6571621650843005e-05, "loss": 0.1944, "step": 12360 }, { "epoch": 1.4657891616269418, "grad_norm": 0.9786865467535502, "learning_rate": 3.656949384813454e-05, "loss": 0.2454, "step": 12361 }, { "epoch": 1.4659077433890668, "grad_norm": 1.0466843105316348, "learning_rate": 3.6567365938769525e-05, "loss": 0.226, "step": 12362 }, { "epoch": 1.4660263251511918, "grad_norm": 0.9066714651993623, "learning_rate": 3.656523792276758e-05, "loss": 0.1787, "step": 12363 }, { "epoch": 1.4661449069133168, "grad_norm": 0.8569475660476885, "learning_rate": 3.656310980014832e-05, "loss": 0.2456, "step": 12364 }, { "epoch": 1.4662634886754418, "grad_norm": 0.9131221658855861, "learning_rate": 3.6560981570931376e-05, "loss": 0.2111, "step": 12365 }, { "epoch": 1.4663820704375667, "grad_norm": 0.7418893424942871, "learning_rate": 3.655885323513635e-05, "loss": 0.1748, "step": 12366 }, { "epoch": 1.4665006521996917, "grad_norm": 0.9936365346245545, "learning_rate": 3.655672479278288e-05, "loss": 0.2099, "step": 12367 }, { "epoch": 1.4666192339618167, "grad_norm": 0.9983592452090637, "learning_rate": 3.655459624389058e-05, "loss": 0.2267, "step": 12368 }, { "epoch": 1.4667378157239417, "grad_norm": 1.1621517879250578, "learning_rate": 3.655246758847907e-05, "loss": 0.2041, "step": 12369 }, { "epoch": 1.4668563974860667, "grad_norm": 1.1130794696195996, "learning_rate": 3.655033882656797e-05, "loss": 0.1886, "step": 12370 }, { "epoch": 1.4669749792481916, "grad_norm": 0.8761831765035717, "learning_rate": 3.654820995817691e-05, "loss": 0.2056, "step": 12371 }, { "epoch": 1.4670935610103166, "grad_norm": 0.8804782275581297, "learning_rate": 3.6546080983325527e-05, "loss": 0.1748, "step": 12372 }, { "epoch": 1.4672121427724416, "grad_norm": 0.9795086321613173, "learning_rate": 3.6543951902033426e-05, "loss": 0.2027, "step": 12373 }, { "epoch": 1.4673307245345666, "grad_norm": 1.5240947843516957, "learning_rate": 3.654182271432024e-05, "loss": 0.2882, "step": 12374 }, { "epoch": 1.4674493062966916, "grad_norm": 1.06657230058152, "learning_rate": 3.653969342020561e-05, "loss": 0.2361, "step": 12375 }, { "epoch": 1.4675678880588165, "grad_norm": 1.242852902688949, "learning_rate": 3.653756401970916e-05, "loss": 0.2716, "step": 12376 }, { "epoch": 1.4676864698209415, "grad_norm": 1.348213375793061, "learning_rate": 3.6535434512850507e-05, "loss": 0.2682, "step": 12377 }, { "epoch": 1.4678050515830665, "grad_norm": 0.8727882389060837, "learning_rate": 3.653330489964931e-05, "loss": 0.268, "step": 12378 }, { "epoch": 1.4679236333451915, "grad_norm": 1.09671254536131, "learning_rate": 3.653117518012517e-05, "loss": 0.2483, "step": 12379 }, { "epoch": 1.4680422151073165, "grad_norm": 0.768070156041988, "learning_rate": 3.6529045354297746e-05, "loss": 0.2137, "step": 12380 }, { "epoch": 1.4681607968694415, "grad_norm": 0.9090849554469114, "learning_rate": 3.652691542218666e-05, "loss": 0.2335, "step": 12381 }, { "epoch": 1.4682793786315664, "grad_norm": 0.9212650772234733, "learning_rate": 3.652478538381154e-05, "loss": 0.1805, "step": 12382 }, { "epoch": 1.4683979603936914, "grad_norm": 1.36235647359699, "learning_rate": 3.652265523919204e-05, "loss": 0.2686, "step": 12383 }, { "epoch": 1.4685165421558164, "grad_norm": 1.3071815422753112, "learning_rate": 3.652052498834778e-05, "loss": 0.2551, "step": 12384 }, { "epoch": 1.4686351239179414, "grad_norm": 0.9676389372007833, "learning_rate": 3.6518394631298415e-05, "loss": 0.2129, "step": 12385 }, { "epoch": 1.4687537056800664, "grad_norm": 0.7869503222819934, "learning_rate": 3.651626416806357e-05, "loss": 0.1679, "step": 12386 }, { "epoch": 1.4688722874421913, "grad_norm": 1.389054288855431, "learning_rate": 3.6514133598662904e-05, "loss": 0.3462, "step": 12387 }, { "epoch": 1.4689908692043163, "grad_norm": 0.7923394330130631, "learning_rate": 3.6512002923116026e-05, "loss": 0.1799, "step": 12388 }, { "epoch": 1.4691094509664413, "grad_norm": 0.9483388408560601, "learning_rate": 3.650987214144262e-05, "loss": 0.1484, "step": 12389 }, { "epoch": 1.4692280327285663, "grad_norm": 0.8931070707398896, "learning_rate": 3.650774125366229e-05, "loss": 0.1873, "step": 12390 }, { "epoch": 1.4693466144906913, "grad_norm": 1.0986234083476598, "learning_rate": 3.650561025979471e-05, "loss": 0.2329, "step": 12391 }, { "epoch": 1.4694651962528162, "grad_norm": 1.5051881187709824, "learning_rate": 3.650347915985951e-05, "loss": 0.2832, "step": 12392 }, { "epoch": 1.4695837780149412, "grad_norm": 1.039350408082825, "learning_rate": 3.650134795387633e-05, "loss": 0.2306, "step": 12393 }, { "epoch": 1.4697023597770662, "grad_norm": 1.0206816627286972, "learning_rate": 3.6499216641864825e-05, "loss": 0.2229, "step": 12394 }, { "epoch": 1.4698209415391914, "grad_norm": 1.4503479416744751, "learning_rate": 3.649708522384465e-05, "loss": 0.3339, "step": 12395 }, { "epoch": 1.4699395233013162, "grad_norm": 1.054467065350489, "learning_rate": 3.649495369983545e-05, "loss": 0.229, "step": 12396 }, { "epoch": 1.4700581050634414, "grad_norm": 0.8074766829708945, "learning_rate": 3.649282206985687e-05, "loss": 0.1415, "step": 12397 }, { "epoch": 1.4701766868255661, "grad_norm": 1.3502874934598594, "learning_rate": 3.649069033392857e-05, "loss": 0.3511, "step": 12398 }, { "epoch": 1.4702952685876913, "grad_norm": 1.4676275054539967, "learning_rate": 3.6488558492070184e-05, "loss": 0.3193, "step": 12399 }, { "epoch": 1.470413850349816, "grad_norm": 0.9936680885892792, "learning_rate": 3.648642654430139e-05, "loss": 0.1672, "step": 12400 }, { "epoch": 1.4705324321119413, "grad_norm": 0.9197420923804648, "learning_rate": 3.648429449064182e-05, "loss": 0.1663, "step": 12401 }, { "epoch": 1.470651013874066, "grad_norm": 1.1302591130788422, "learning_rate": 3.648216233111114e-05, "loss": 0.2338, "step": 12402 }, { "epoch": 1.4707695956361913, "grad_norm": 0.8516339670153439, "learning_rate": 3.6480030065728996e-05, "loss": 0.1557, "step": 12403 }, { "epoch": 1.470888177398316, "grad_norm": 1.0503372138008173, "learning_rate": 3.647789769451506e-05, "loss": 0.2837, "step": 12404 }, { "epoch": 1.4710067591604412, "grad_norm": 1.372206071615678, "learning_rate": 3.647576521748898e-05, "loss": 0.3297, "step": 12405 }, { "epoch": 1.471125340922566, "grad_norm": 1.238263687132589, "learning_rate": 3.647363263467043e-05, "loss": 0.1871, "step": 12406 }, { "epoch": 1.4712439226846912, "grad_norm": 1.4605723713583092, "learning_rate": 3.647149994607904e-05, "loss": 0.2479, "step": 12407 }, { "epoch": 1.4713625044468162, "grad_norm": 1.0635405764908195, "learning_rate": 3.6469367151734504e-05, "loss": 0.2066, "step": 12408 }, { "epoch": 1.4714810862089411, "grad_norm": 0.9274213856134043, "learning_rate": 3.646723425165645e-05, "loss": 0.2299, "step": 12409 }, { "epoch": 1.4715996679710661, "grad_norm": 1.0937411147375538, "learning_rate": 3.646510124586456e-05, "loss": 0.2441, "step": 12410 }, { "epoch": 1.471718249733191, "grad_norm": 0.9969916705772012, "learning_rate": 3.646296813437851e-05, "loss": 0.2307, "step": 12411 }, { "epoch": 1.471836831495316, "grad_norm": 0.9437460272518766, "learning_rate": 3.646083491721794e-05, "loss": 0.192, "step": 12412 }, { "epoch": 1.471955413257441, "grad_norm": 0.8676949075905334, "learning_rate": 3.645870159440253e-05, "loss": 0.1588, "step": 12413 }, { "epoch": 1.472073995019566, "grad_norm": 1.0519203391299206, "learning_rate": 3.645656816595194e-05, "loss": 0.2506, "step": 12414 }, { "epoch": 1.472192576781691, "grad_norm": 1.2645443384930473, "learning_rate": 3.645443463188585e-05, "loss": 0.2917, "step": 12415 }, { "epoch": 1.472311158543816, "grad_norm": 1.3525097065132112, "learning_rate": 3.645230099222391e-05, "loss": 0.36, "step": 12416 }, { "epoch": 1.472429740305941, "grad_norm": 0.754780939932602, "learning_rate": 3.64501672469858e-05, "loss": 0.1455, "step": 12417 }, { "epoch": 1.472548322068066, "grad_norm": 1.2525221686339192, "learning_rate": 3.644803339619118e-05, "loss": 0.2564, "step": 12418 }, { "epoch": 1.472666903830191, "grad_norm": 1.0725574806792075, "learning_rate": 3.644589943985975e-05, "loss": 0.2402, "step": 12419 }, { "epoch": 1.472785485592316, "grad_norm": 1.0028510734988503, "learning_rate": 3.644376537801115e-05, "loss": 0.2399, "step": 12420 }, { "epoch": 1.472904067354441, "grad_norm": 0.9980838774278437, "learning_rate": 3.6441631210665075e-05, "loss": 0.2023, "step": 12421 }, { "epoch": 1.4730226491165659, "grad_norm": 1.1304144794476616, "learning_rate": 3.643949693784118e-05, "loss": 0.2368, "step": 12422 }, { "epoch": 1.4731412308786909, "grad_norm": 1.2179398376159196, "learning_rate": 3.6437362559559154e-05, "loss": 0.2107, "step": 12423 }, { "epoch": 1.4732598126408158, "grad_norm": 0.9697856249318505, "learning_rate": 3.643522807583867e-05, "loss": 0.2347, "step": 12424 }, { "epoch": 1.4733783944029408, "grad_norm": 1.297657178624559, "learning_rate": 3.643309348669941e-05, "loss": 0.2771, "step": 12425 }, { "epoch": 1.4734969761650658, "grad_norm": 1.3128479277369434, "learning_rate": 3.643095879216105e-05, "loss": 0.2614, "step": 12426 }, { "epoch": 1.4736155579271908, "grad_norm": 1.3318712786624136, "learning_rate": 3.6428823992243264e-05, "loss": 0.2586, "step": 12427 }, { "epoch": 1.4737341396893158, "grad_norm": 0.98877584666035, "learning_rate": 3.642668908696574e-05, "loss": 0.2126, "step": 12428 }, { "epoch": 1.4738527214514408, "grad_norm": 0.9073026977961455, "learning_rate": 3.642455407634815e-05, "loss": 0.2114, "step": 12429 }, { "epoch": 1.4739713032135657, "grad_norm": 0.9348074287867645, "learning_rate": 3.6422418960410186e-05, "loss": 0.2542, "step": 12430 }, { "epoch": 1.4740898849756907, "grad_norm": 0.8656236461985849, "learning_rate": 3.6420283739171515e-05, "loss": 0.194, "step": 12431 }, { "epoch": 1.4742084667378157, "grad_norm": 1.0840098412195487, "learning_rate": 3.641814841265185e-05, "loss": 0.2518, "step": 12432 }, { "epoch": 1.4743270484999407, "grad_norm": 0.9228113613391247, "learning_rate": 3.6416012980870846e-05, "loss": 0.2275, "step": 12433 }, { "epoch": 1.4744456302620657, "grad_norm": 0.7165140416358798, "learning_rate": 3.6413877443848196e-05, "loss": 0.1349, "step": 12434 }, { "epoch": 1.4745642120241906, "grad_norm": 0.9864080702187864, "learning_rate": 3.6411741801603604e-05, "loss": 0.2014, "step": 12435 }, { "epoch": 1.4746827937863156, "grad_norm": 1.1101378494153227, "learning_rate": 3.6409606054156746e-05, "loss": 0.2731, "step": 12436 }, { "epoch": 1.4748013755484406, "grad_norm": 1.2467531846024609, "learning_rate": 3.640747020152731e-05, "loss": 0.2451, "step": 12437 }, { "epoch": 1.4749199573105656, "grad_norm": 1.4186220960739688, "learning_rate": 3.640533424373499e-05, "loss": 0.2527, "step": 12438 }, { "epoch": 1.4750385390726906, "grad_norm": 0.8973670679205755, "learning_rate": 3.640319818079947e-05, "loss": 0.153, "step": 12439 }, { "epoch": 1.4751571208348155, "grad_norm": 1.1395969539525421, "learning_rate": 3.640106201274044e-05, "loss": 0.2302, "step": 12440 }, { "epoch": 1.4752757025969405, "grad_norm": 1.112543196174642, "learning_rate": 3.639892573957761e-05, "loss": 0.2363, "step": 12441 }, { "epoch": 1.4753942843590655, "grad_norm": 0.8945795457940593, "learning_rate": 3.639678936133066e-05, "loss": 0.1716, "step": 12442 }, { "epoch": 1.4755128661211905, "grad_norm": 0.9250658709926433, "learning_rate": 3.6394652878019284e-05, "loss": 0.1678, "step": 12443 }, { "epoch": 1.4756314478833157, "grad_norm": 1.279129598508207, "learning_rate": 3.639251628966318e-05, "loss": 0.3379, "step": 12444 }, { "epoch": 1.4757500296454404, "grad_norm": 1.2440209188224252, "learning_rate": 3.639037959628206e-05, "loss": 0.2611, "step": 12445 }, { "epoch": 1.4758686114075656, "grad_norm": 0.9385006146202874, "learning_rate": 3.63882427978956e-05, "loss": 0.1992, "step": 12446 }, { "epoch": 1.4759871931696904, "grad_norm": 1.2267240848160825, "learning_rate": 3.6386105894523504e-05, "loss": 0.255, "step": 12447 }, { "epoch": 1.4761057749318156, "grad_norm": 1.3324714224050949, "learning_rate": 3.638396888618547e-05, "loss": 0.3004, "step": 12448 }, { "epoch": 1.4762243566939404, "grad_norm": 0.977868428806666, "learning_rate": 3.6381831772901216e-05, "loss": 0.2219, "step": 12449 }, { "epoch": 1.4763429384560656, "grad_norm": 1.1041643860449097, "learning_rate": 3.637969455469042e-05, "loss": 0.2066, "step": 12450 }, { "epoch": 1.4764615202181903, "grad_norm": 0.8463756277359401, "learning_rate": 3.63775572315728e-05, "loss": 0.169, "step": 12451 }, { "epoch": 1.4765801019803155, "grad_norm": 0.9605706552964786, "learning_rate": 3.637541980356805e-05, "loss": 0.1966, "step": 12452 }, { "epoch": 1.4766986837424403, "grad_norm": 1.397046646140779, "learning_rate": 3.637328227069588e-05, "loss": 0.2858, "step": 12453 }, { "epoch": 1.4768172655045655, "grad_norm": 0.9515588338146465, "learning_rate": 3.6371144632975994e-05, "loss": 0.1954, "step": 12454 }, { "epoch": 1.4769358472666902, "grad_norm": 1.3435691681283766, "learning_rate": 3.63690068904281e-05, "loss": 0.334, "step": 12455 }, { "epoch": 1.4770544290288155, "grad_norm": 0.8560281340626763, "learning_rate": 3.6366869043071904e-05, "loss": 0.1895, "step": 12456 }, { "epoch": 1.4771730107909404, "grad_norm": 0.9109579343806663, "learning_rate": 3.6364731090927116e-05, "loss": 0.1811, "step": 12457 }, { "epoch": 1.4772915925530654, "grad_norm": 1.017585492131578, "learning_rate": 3.6362593034013446e-05, "loss": 0.2214, "step": 12458 }, { "epoch": 1.4774101743151904, "grad_norm": 1.5212850716694017, "learning_rate": 3.63604548723506e-05, "loss": 0.3145, "step": 12459 }, { "epoch": 1.4775287560773154, "grad_norm": 0.9918285750594532, "learning_rate": 3.635831660595829e-05, "loss": 0.2013, "step": 12460 }, { "epoch": 1.4776473378394404, "grad_norm": 0.7132416155399051, "learning_rate": 3.6356178234856225e-05, "loss": 0.1882, "step": 12461 }, { "epoch": 1.4777659196015653, "grad_norm": 0.8422407419923301, "learning_rate": 3.635403975906413e-05, "loss": 0.1648, "step": 12462 }, { "epoch": 1.4778845013636903, "grad_norm": 0.9411505505316864, "learning_rate": 3.6351901178601704e-05, "loss": 0.2378, "step": 12463 }, { "epoch": 1.4780030831258153, "grad_norm": 0.995927740783831, "learning_rate": 3.634976249348867e-05, "loss": 0.2302, "step": 12464 }, { "epoch": 1.4781216648879403, "grad_norm": 1.021097245088831, "learning_rate": 3.634762370374475e-05, "loss": 0.2251, "step": 12465 }, { "epoch": 1.4782402466500653, "grad_norm": 0.7171493944627431, "learning_rate": 3.6345484809389654e-05, "loss": 0.1612, "step": 12466 }, { "epoch": 1.4783588284121902, "grad_norm": 1.2356665077426845, "learning_rate": 3.6343345810443094e-05, "loss": 0.2796, "step": 12467 }, { "epoch": 1.4784774101743152, "grad_norm": 1.0981080196042237, "learning_rate": 3.63412067069248e-05, "loss": 0.2413, "step": 12468 }, { "epoch": 1.4785959919364402, "grad_norm": 0.8413112290919859, "learning_rate": 3.633906749885449e-05, "loss": 0.177, "step": 12469 }, { "epoch": 1.4787145736985652, "grad_norm": 1.4706263723380035, "learning_rate": 3.6336928186251884e-05, "loss": 0.2288, "step": 12470 }, { "epoch": 1.4788331554606902, "grad_norm": 0.9957279370783017, "learning_rate": 3.6334788769136704e-05, "loss": 0.2114, "step": 12471 }, { "epoch": 1.4789517372228151, "grad_norm": 1.0455768430363104, "learning_rate": 3.633264924752866e-05, "loss": 0.1975, "step": 12472 }, { "epoch": 1.4790703189849401, "grad_norm": 0.9269204100881278, "learning_rate": 3.633050962144749e-05, "loss": 0.2093, "step": 12473 }, { "epoch": 1.479188900747065, "grad_norm": 1.0141735195692683, "learning_rate": 3.6328369890912924e-05, "loss": 0.205, "step": 12474 }, { "epoch": 1.47930748250919, "grad_norm": 1.028886460352578, "learning_rate": 3.6326230055944676e-05, "loss": 0.2141, "step": 12475 }, { "epoch": 1.479426064271315, "grad_norm": 0.7967084948134183, "learning_rate": 3.632409011656247e-05, "loss": 0.1552, "step": 12476 }, { "epoch": 1.47954464603344, "grad_norm": 0.9221619168543299, "learning_rate": 3.632195007278605e-05, "loss": 0.1822, "step": 12477 }, { "epoch": 1.479663227795565, "grad_norm": 0.9637858330708172, "learning_rate": 3.6319809924635126e-05, "loss": 0.2571, "step": 12478 }, { "epoch": 1.47978180955769, "grad_norm": 0.942893702002522, "learning_rate": 3.631766967212944e-05, "loss": 0.1836, "step": 12479 }, { "epoch": 1.479900391319815, "grad_norm": 1.0911865620532917, "learning_rate": 3.631552931528872e-05, "loss": 0.2303, "step": 12480 }, { "epoch": 1.48001897308194, "grad_norm": 1.1003223012559828, "learning_rate": 3.6313388854132694e-05, "loss": 0.2011, "step": 12481 }, { "epoch": 1.480137554844065, "grad_norm": 1.0655279993955205, "learning_rate": 3.631124828868109e-05, "loss": 0.2657, "step": 12482 }, { "epoch": 1.48025613660619, "grad_norm": 0.9992036082307079, "learning_rate": 3.630910761895365e-05, "loss": 0.2185, "step": 12483 }, { "epoch": 1.480374718368315, "grad_norm": 0.8354344128756752, "learning_rate": 3.630696684497011e-05, "loss": 0.2127, "step": 12484 }, { "epoch": 1.48049330013044, "grad_norm": 0.9669393571337663, "learning_rate": 3.630482596675019e-05, "loss": 0.1637, "step": 12485 }, { "epoch": 1.4806118818925649, "grad_norm": 1.2526734700192843, "learning_rate": 3.6302684984313654e-05, "loss": 0.2422, "step": 12486 }, { "epoch": 1.4807304636546899, "grad_norm": 0.9375729049406398, "learning_rate": 3.630054389768022e-05, "loss": 0.1831, "step": 12487 }, { "epoch": 1.4808490454168148, "grad_norm": 1.1966893452813174, "learning_rate": 3.6298402706869625e-05, "loss": 0.1978, "step": 12488 }, { "epoch": 1.4809676271789398, "grad_norm": 0.980544197028932, "learning_rate": 3.6296261411901606e-05, "loss": 0.1953, "step": 12489 }, { "epoch": 1.4810862089410648, "grad_norm": 1.0221922500099783, "learning_rate": 3.629412001279592e-05, "loss": 0.2557, "step": 12490 }, { "epoch": 1.4812047907031898, "grad_norm": 1.433298756513002, "learning_rate": 3.629197850957229e-05, "loss": 0.2642, "step": 12491 }, { "epoch": 1.4813233724653148, "grad_norm": 0.8804183901197368, "learning_rate": 3.628983690225046e-05, "loss": 0.2342, "step": 12492 }, { "epoch": 1.4814419542274397, "grad_norm": 1.1975600411541043, "learning_rate": 3.6287695190850185e-05, "loss": 0.1942, "step": 12493 }, { "epoch": 1.4815605359895647, "grad_norm": 1.2119105706971645, "learning_rate": 3.628555337539121e-05, "loss": 0.2821, "step": 12494 }, { "epoch": 1.48167911775169, "grad_norm": 1.1239705429258462, "learning_rate": 3.628341145589326e-05, "loss": 0.2352, "step": 12495 }, { "epoch": 1.4817976995138147, "grad_norm": 1.3032473701700407, "learning_rate": 3.6281269432376096e-05, "loss": 0.1965, "step": 12496 }, { "epoch": 1.4819162812759399, "grad_norm": 1.0261534787718256, "learning_rate": 3.627912730485947e-05, "loss": 0.1729, "step": 12497 }, { "epoch": 1.4820348630380646, "grad_norm": 1.3036937870174787, "learning_rate": 3.627698507336311e-05, "loss": 0.2384, "step": 12498 }, { "epoch": 1.4821534448001898, "grad_norm": 1.0885567084075596, "learning_rate": 3.627484273790678e-05, "loss": 0.2466, "step": 12499 }, { "epoch": 1.4822720265623146, "grad_norm": 0.823122156000076, "learning_rate": 3.627270029851023e-05, "loss": 0.1875, "step": 12500 }, { "epoch": 1.4823906083244398, "grad_norm": 1.1253713901331053, "learning_rate": 3.627055775519321e-05, "loss": 0.2838, "step": 12501 }, { "epoch": 1.4825091900865646, "grad_norm": 1.1595265254799287, "learning_rate": 3.626841510797546e-05, "loss": 0.2422, "step": 12502 }, { "epoch": 1.4826277718486898, "grad_norm": 1.0548102211037107, "learning_rate": 3.626627235687674e-05, "loss": 0.2168, "step": 12503 }, { "epoch": 1.4827463536108145, "grad_norm": 0.959095612120881, "learning_rate": 3.62641295019168e-05, "loss": 0.2089, "step": 12504 }, { "epoch": 1.4828649353729397, "grad_norm": 1.2381655666565883, "learning_rate": 3.626198654311542e-05, "loss": 0.231, "step": 12505 }, { "epoch": 1.4829835171350645, "grad_norm": 1.1733002141339182, "learning_rate": 3.625984348049232e-05, "loss": 0.2397, "step": 12506 }, { "epoch": 1.4831020988971897, "grad_norm": 0.915637655659585, "learning_rate": 3.625770031406727e-05, "loss": 0.1861, "step": 12507 }, { "epoch": 1.4832206806593147, "grad_norm": 0.8388298136180768, "learning_rate": 3.625555704386003e-05, "loss": 0.207, "step": 12508 }, { "epoch": 1.4833392624214397, "grad_norm": 0.8942597666724773, "learning_rate": 3.625341366989036e-05, "loss": 0.1869, "step": 12509 }, { "epoch": 1.4834578441835646, "grad_norm": 1.0968714669364508, "learning_rate": 3.625127019217801e-05, "loss": 0.2551, "step": 12510 }, { "epoch": 1.4835764259456896, "grad_norm": 1.5023246366262994, "learning_rate": 3.624912661074275e-05, "loss": 0.3157, "step": 12511 }, { "epoch": 1.4836950077078146, "grad_norm": 1.0865127044655227, "learning_rate": 3.624698292560434e-05, "loss": 0.1946, "step": 12512 }, { "epoch": 1.4838135894699396, "grad_norm": 1.025972202866654, "learning_rate": 3.6244839136782535e-05, "loss": 0.1731, "step": 12513 }, { "epoch": 1.4839321712320646, "grad_norm": 0.7677167243015839, "learning_rate": 3.62426952442971e-05, "loss": 0.1831, "step": 12514 }, { "epoch": 1.4840507529941895, "grad_norm": 0.9334833474781452, "learning_rate": 3.6240551248167805e-05, "loss": 0.17, "step": 12515 }, { "epoch": 1.4841693347563145, "grad_norm": 1.1722849860822604, "learning_rate": 3.623840714841441e-05, "loss": 0.2345, "step": 12516 }, { "epoch": 1.4842879165184395, "grad_norm": 1.0008074501035849, "learning_rate": 3.623626294505668e-05, "loss": 0.2064, "step": 12517 }, { "epoch": 1.4844064982805645, "grad_norm": 1.3115329619532414, "learning_rate": 3.6234118638114394e-05, "loss": 0.2785, "step": 12518 }, { "epoch": 1.4845250800426895, "grad_norm": 1.0288710804222867, "learning_rate": 3.6231974227607304e-05, "loss": 0.2433, "step": 12519 }, { "epoch": 1.4846436618048144, "grad_norm": 0.8558148079478822, "learning_rate": 3.622982971355519e-05, "loss": 0.1648, "step": 12520 }, { "epoch": 1.4847622435669394, "grad_norm": 0.9391035187728556, "learning_rate": 3.622768509597781e-05, "loss": 0.1606, "step": 12521 }, { "epoch": 1.4848808253290644, "grad_norm": 0.8539685425556743, "learning_rate": 3.622554037489494e-05, "loss": 0.1644, "step": 12522 }, { "epoch": 1.4849994070911894, "grad_norm": 0.957305554091712, "learning_rate": 3.6223395550326355e-05, "loss": 0.1888, "step": 12523 }, { "epoch": 1.4851179888533144, "grad_norm": 1.1046943287936888, "learning_rate": 3.622125062229184e-05, "loss": 0.2145, "step": 12524 }, { "epoch": 1.4852365706154393, "grad_norm": 0.900683564676804, "learning_rate": 3.621910559081114e-05, "loss": 0.2458, "step": 12525 }, { "epoch": 1.4853551523775643, "grad_norm": 1.0743443058517064, "learning_rate": 3.6216960455904045e-05, "loss": 0.2434, "step": 12526 }, { "epoch": 1.4854737341396893, "grad_norm": 1.479247341840166, "learning_rate": 3.621481521759033e-05, "loss": 0.2649, "step": 12527 }, { "epoch": 1.4855923159018143, "grad_norm": 1.1742132822795739, "learning_rate": 3.6212669875889776e-05, "loss": 0.2142, "step": 12528 }, { "epoch": 1.4857108976639393, "grad_norm": 1.314623231535018, "learning_rate": 3.6210524430822154e-05, "loss": 0.2518, "step": 12529 }, { "epoch": 1.4858294794260642, "grad_norm": 1.2055070910916248, "learning_rate": 3.620837888240724e-05, "loss": 0.2099, "step": 12530 }, { "epoch": 1.4859480611881892, "grad_norm": 1.1833824442862528, "learning_rate": 3.620623323066483e-05, "loss": 0.2153, "step": 12531 }, { "epoch": 1.4860666429503142, "grad_norm": 1.44896967879138, "learning_rate": 3.620408747561468e-05, "loss": 0.2391, "step": 12532 }, { "epoch": 1.4861852247124392, "grad_norm": 1.0616965167694405, "learning_rate": 3.620194161727658e-05, "loss": 0.2219, "step": 12533 }, { "epoch": 1.4863038064745642, "grad_norm": 0.7734856586638722, "learning_rate": 3.619979565567032e-05, "loss": 0.1686, "step": 12534 }, { "epoch": 1.4864223882366892, "grad_norm": 1.022202046729624, "learning_rate": 3.619764959081568e-05, "loss": 0.2188, "step": 12535 }, { "epoch": 1.4865409699988141, "grad_norm": 0.9983228780253296, "learning_rate": 3.6195503422732444e-05, "loss": 0.1972, "step": 12536 }, { "epoch": 1.4866595517609391, "grad_norm": 0.8178831607657975, "learning_rate": 3.619335715144039e-05, "loss": 0.1114, "step": 12537 }, { "epoch": 1.486778133523064, "grad_norm": 1.1568636960256862, "learning_rate": 3.619121077695931e-05, "loss": 0.2328, "step": 12538 }, { "epoch": 1.486896715285189, "grad_norm": 0.8298704715985096, "learning_rate": 3.618906429930899e-05, "loss": 0.201, "step": 12539 }, { "epoch": 1.487015297047314, "grad_norm": 1.4860546768890166, "learning_rate": 3.618691771850922e-05, "loss": 0.261, "step": 12540 }, { "epoch": 1.487133878809439, "grad_norm": 1.813371708498617, "learning_rate": 3.618477103457978e-05, "loss": 0.272, "step": 12541 }, { "epoch": 1.487252460571564, "grad_norm": 0.8891833819687782, "learning_rate": 3.618262424754048e-05, "loss": 0.17, "step": 12542 }, { "epoch": 1.487371042333689, "grad_norm": 0.813837061796493, "learning_rate": 3.618047735741109e-05, "loss": 0.1798, "step": 12543 }, { "epoch": 1.4874896240958142, "grad_norm": 1.2676088084814578, "learning_rate": 3.617833036421141e-05, "loss": 0.2554, "step": 12544 }, { "epoch": 1.487608205857939, "grad_norm": 0.976814345912257, "learning_rate": 3.6176183267961225e-05, "loss": 0.1982, "step": 12545 }, { "epoch": 1.4877267876200642, "grad_norm": 0.8997407735825887, "learning_rate": 3.617403606868035e-05, "loss": 0.2005, "step": 12546 }, { "epoch": 1.487845369382189, "grad_norm": 0.8424836208026085, "learning_rate": 3.617188876638855e-05, "loss": 0.1575, "step": 12547 }, { "epoch": 1.4879639511443141, "grad_norm": 0.8718438372426466, "learning_rate": 3.616974136110565e-05, "loss": 0.2046, "step": 12548 }, { "epoch": 1.4880825329064389, "grad_norm": 0.8797043689288352, "learning_rate": 3.616759385285142e-05, "loss": 0.1996, "step": 12549 }, { "epoch": 1.488201114668564, "grad_norm": 0.7190997805148828, "learning_rate": 3.6165446241645676e-05, "loss": 0.1538, "step": 12550 }, { "epoch": 1.4883196964306888, "grad_norm": 0.7927637806908981, "learning_rate": 3.616329852750821e-05, "loss": 0.1477, "step": 12551 }, { "epoch": 1.488438278192814, "grad_norm": 1.0218591937495884, "learning_rate": 3.616115071045881e-05, "loss": 0.2455, "step": 12552 }, { "epoch": 1.4885568599549388, "grad_norm": 1.2593884760339633, "learning_rate": 3.615900279051729e-05, "loss": 0.2261, "step": 12553 }, { "epoch": 1.488675441717064, "grad_norm": 1.1949988764165909, "learning_rate": 3.615685476770346e-05, "loss": 0.2802, "step": 12554 }, { "epoch": 1.4887940234791888, "grad_norm": 1.4096176486570557, "learning_rate": 3.615470664203711e-05, "loss": 0.2924, "step": 12555 }, { "epoch": 1.488912605241314, "grad_norm": 1.105267933848277, "learning_rate": 3.615255841353803e-05, "loss": 0.191, "step": 12556 }, { "epoch": 1.489031187003439, "grad_norm": 1.4288877741501853, "learning_rate": 3.615041008222605e-05, "loss": 0.3227, "step": 12557 }, { "epoch": 1.489149768765564, "grad_norm": 0.7541964797264543, "learning_rate": 3.6148261648120955e-05, "loss": 0.1365, "step": 12558 }, { "epoch": 1.489268350527689, "grad_norm": 1.0009212228984603, "learning_rate": 3.614611311124256e-05, "loss": 0.19, "step": 12559 }, { "epoch": 1.489386932289814, "grad_norm": 0.8351160284616331, "learning_rate": 3.614396447161067e-05, "loss": 0.1643, "step": 12560 }, { "epoch": 1.4895055140519389, "grad_norm": 0.7502958828860733, "learning_rate": 3.61418157292451e-05, "loss": 0.1543, "step": 12561 }, { "epoch": 1.4896240958140639, "grad_norm": 0.9808135657690321, "learning_rate": 3.613966688416565e-05, "loss": 0.2274, "step": 12562 }, { "epoch": 1.4897426775761888, "grad_norm": 1.2620717968628377, "learning_rate": 3.613751793639212e-05, "loss": 0.2016, "step": 12563 }, { "epoch": 1.4898612593383138, "grad_norm": 0.8889006716876188, "learning_rate": 3.6135368885944345e-05, "loss": 0.218, "step": 12564 }, { "epoch": 1.4899798411004388, "grad_norm": 1.0098338107825695, "learning_rate": 3.6133219732842125e-05, "loss": 0.22, "step": 12565 }, { "epoch": 1.4900984228625638, "grad_norm": 0.9522662261250159, "learning_rate": 3.613107047710526e-05, "loss": 0.2256, "step": 12566 }, { "epoch": 1.4902170046246888, "grad_norm": 1.122116959354855, "learning_rate": 3.6128921118753575e-05, "loss": 0.198, "step": 12567 }, { "epoch": 1.4903355863868137, "grad_norm": 0.9721874764370407, "learning_rate": 3.61267716578069e-05, "loss": 0.2089, "step": 12568 }, { "epoch": 1.4904541681489387, "grad_norm": 0.9593089074932214, "learning_rate": 3.612462209428502e-05, "loss": 0.1957, "step": 12569 }, { "epoch": 1.4905727499110637, "grad_norm": 0.9658133681044773, "learning_rate": 3.612247242820778e-05, "loss": 0.1776, "step": 12570 }, { "epoch": 1.4906913316731887, "grad_norm": 0.8738658422598973, "learning_rate": 3.612032265959497e-05, "loss": 0.1922, "step": 12571 }, { "epoch": 1.4908099134353137, "grad_norm": 1.0680258374720017, "learning_rate": 3.611817278846643e-05, "loss": 0.2106, "step": 12572 }, { "epoch": 1.4909284951974386, "grad_norm": 1.3315986080535107, "learning_rate": 3.6116022814841966e-05, "loss": 0.3238, "step": 12573 }, { "epoch": 1.4910470769595636, "grad_norm": 0.719450488562133, "learning_rate": 3.611387273874141e-05, "loss": 0.1076, "step": 12574 }, { "epoch": 1.4911656587216886, "grad_norm": 1.1534912725600186, "learning_rate": 3.611172256018457e-05, "loss": 0.2845, "step": 12575 }, { "epoch": 1.4912842404838136, "grad_norm": 1.0151010020065765, "learning_rate": 3.6109572279191276e-05, "loss": 0.2374, "step": 12576 }, { "epoch": 1.4914028222459386, "grad_norm": 0.8969565513403346, "learning_rate": 3.6107421895781343e-05, "loss": 0.1494, "step": 12577 }, { "epoch": 1.4915214040080635, "grad_norm": 1.0743650770776538, "learning_rate": 3.6105271409974614e-05, "loss": 0.1814, "step": 12578 }, { "epoch": 1.4916399857701885, "grad_norm": 0.8821877800268692, "learning_rate": 3.610312082179089e-05, "loss": 0.181, "step": 12579 }, { "epoch": 1.4917585675323135, "grad_norm": 1.0306604561361838, "learning_rate": 3.610097013125001e-05, "loss": 0.2158, "step": 12580 }, { "epoch": 1.4918771492944385, "grad_norm": 1.0527085906254638, "learning_rate": 3.60988193383718e-05, "loss": 0.2456, "step": 12581 }, { "epoch": 1.4919957310565635, "grad_norm": 0.8784492303955894, "learning_rate": 3.609666844317608e-05, "loss": 0.1543, "step": 12582 }, { "epoch": 1.4921143128186884, "grad_norm": 0.8871025299571554, "learning_rate": 3.6094517445682694e-05, "loss": 0.1514, "step": 12583 }, { "epoch": 1.4922328945808134, "grad_norm": 0.999770377702513, "learning_rate": 3.609236634591145e-05, "loss": 0.2195, "step": 12584 }, { "epoch": 1.4923514763429384, "grad_norm": 1.3742151018841378, "learning_rate": 3.60902151438822e-05, "loss": 0.2844, "step": 12585 }, { "epoch": 1.4924700581050634, "grad_norm": 1.0120497572403335, "learning_rate": 3.6088063839614765e-05, "loss": 0.2328, "step": 12586 }, { "epoch": 1.4925886398671884, "grad_norm": 0.8015996656086325, "learning_rate": 3.608591243312898e-05, "loss": 0.1814, "step": 12587 }, { "epoch": 1.4927072216293134, "grad_norm": 0.955404143670177, "learning_rate": 3.6083760924444676e-05, "loss": 0.16, "step": 12588 }, { "epoch": 1.4928258033914383, "grad_norm": 1.0674300081176327, "learning_rate": 3.608160931358169e-05, "loss": 0.2335, "step": 12589 }, { "epoch": 1.4929443851535633, "grad_norm": 0.7979566644373979, "learning_rate": 3.6079457600559846e-05, "loss": 0.1414, "step": 12590 }, { "epoch": 1.4930629669156883, "grad_norm": 0.6621274275247155, "learning_rate": 3.6077305785399e-05, "loss": 0.1278, "step": 12591 }, { "epoch": 1.4931815486778133, "grad_norm": 1.0792476586353321, "learning_rate": 3.607515386811897e-05, "loss": 0.1909, "step": 12592 }, { "epoch": 1.4933001304399383, "grad_norm": 0.8761876557045626, "learning_rate": 3.6073001848739605e-05, "loss": 0.2194, "step": 12593 }, { "epoch": 1.4934187122020632, "grad_norm": 1.0318954337543318, "learning_rate": 3.6070849727280745e-05, "loss": 0.1785, "step": 12594 }, { "epoch": 1.4935372939641884, "grad_norm": 1.6056878064211921, "learning_rate": 3.606869750376223e-05, "loss": 0.2929, "step": 12595 }, { "epoch": 1.4936558757263132, "grad_norm": 0.9789292456005341, "learning_rate": 3.606654517820389e-05, "loss": 0.1861, "step": 12596 }, { "epoch": 1.4937744574884384, "grad_norm": 0.9409560999962505, "learning_rate": 3.606439275062557e-05, "loss": 0.1856, "step": 12597 }, { "epoch": 1.4938930392505632, "grad_norm": 0.8612320423133016, "learning_rate": 3.606224022104713e-05, "loss": 0.1566, "step": 12598 }, { "epoch": 1.4940116210126884, "grad_norm": 1.092400620800352, "learning_rate": 3.606008758948839e-05, "loss": 0.2441, "step": 12599 }, { "epoch": 1.4941302027748131, "grad_norm": 1.0630343313069401, "learning_rate": 3.605793485596921e-05, "loss": 0.2005, "step": 12600 }, { "epoch": 1.4942487845369383, "grad_norm": 0.9923567375773299, "learning_rate": 3.6055782020509424e-05, "loss": 0.1876, "step": 12601 }, { "epoch": 1.494367366299063, "grad_norm": 0.7733036391998498, "learning_rate": 3.6053629083128893e-05, "loss": 0.1132, "step": 12602 }, { "epoch": 1.4944859480611883, "grad_norm": 1.1053666390174948, "learning_rate": 3.605147604384745e-05, "loss": 0.2224, "step": 12603 }, { "epoch": 1.494604529823313, "grad_norm": 0.9426538131127932, "learning_rate": 3.6049322902684964e-05, "loss": 0.1985, "step": 12604 }, { "epoch": 1.4947231115854382, "grad_norm": 1.4605190144606082, "learning_rate": 3.6047169659661254e-05, "loss": 0.3755, "step": 12605 }, { "epoch": 1.4948416933475632, "grad_norm": 1.2768797332159922, "learning_rate": 3.60450163147962e-05, "loss": 0.2523, "step": 12606 }, { "epoch": 1.4949602751096882, "grad_norm": 1.2022141252166887, "learning_rate": 3.604286286810963e-05, "loss": 0.2357, "step": 12607 }, { "epoch": 1.4950788568718132, "grad_norm": 1.0429718571499795, "learning_rate": 3.604070931962141e-05, "loss": 0.2251, "step": 12608 }, { "epoch": 1.4951974386339382, "grad_norm": 1.4083343238106147, "learning_rate": 3.6038555669351396e-05, "loss": 0.2112, "step": 12609 }, { "epoch": 1.4953160203960632, "grad_norm": 0.9725806044719973, "learning_rate": 3.603640191731942e-05, "loss": 0.1968, "step": 12610 }, { "epoch": 1.4954346021581881, "grad_norm": 1.0557482526853013, "learning_rate": 3.603424806354536e-05, "loss": 0.1873, "step": 12611 }, { "epoch": 1.4955531839203131, "grad_norm": 1.1002983761386187, "learning_rate": 3.603209410804906e-05, "loss": 0.2589, "step": 12612 }, { "epoch": 1.495671765682438, "grad_norm": 0.7362958170037772, "learning_rate": 3.6029940050850384e-05, "loss": 0.1652, "step": 12613 }, { "epoch": 1.495790347444563, "grad_norm": 0.8504087934669863, "learning_rate": 3.602778589196919e-05, "loss": 0.1619, "step": 12614 }, { "epoch": 1.495908929206688, "grad_norm": 1.4990842461779252, "learning_rate": 3.602563163142533e-05, "loss": 0.2575, "step": 12615 }, { "epoch": 1.496027510968813, "grad_norm": 1.0550744083288, "learning_rate": 3.602347726923867e-05, "loss": 0.1992, "step": 12616 }, { "epoch": 1.496146092730938, "grad_norm": 1.0593378710844725, "learning_rate": 3.602132280542906e-05, "loss": 0.2507, "step": 12617 }, { "epoch": 1.496264674493063, "grad_norm": 0.895963069638438, "learning_rate": 3.6019168240016375e-05, "loss": 0.1629, "step": 12618 }, { "epoch": 1.496383256255188, "grad_norm": 0.9690363991296089, "learning_rate": 3.601701357302047e-05, "loss": 0.2676, "step": 12619 }, { "epoch": 1.496501838017313, "grad_norm": 0.7677250131045302, "learning_rate": 3.601485880446122e-05, "loss": 0.1577, "step": 12620 }, { "epoch": 1.496620419779438, "grad_norm": 1.3064602271140566, "learning_rate": 3.601270393435846e-05, "loss": 0.2986, "step": 12621 }, { "epoch": 1.496739001541563, "grad_norm": 1.001882508900169, "learning_rate": 3.6010548962732086e-05, "loss": 0.2464, "step": 12622 }, { "epoch": 1.496857583303688, "grad_norm": 0.8103418335676881, "learning_rate": 3.600839388960195e-05, "loss": 0.1733, "step": 12623 }, { "epoch": 1.4969761650658129, "grad_norm": 0.7912497892948075, "learning_rate": 3.6006238714987915e-05, "loss": 0.1864, "step": 12624 }, { "epoch": 1.4970947468279379, "grad_norm": 1.7423952948251757, "learning_rate": 3.6004083438909864e-05, "loss": 0.3414, "step": 12625 }, { "epoch": 1.4972133285900628, "grad_norm": 0.6971631711840531, "learning_rate": 3.600192806138766e-05, "loss": 0.154, "step": 12626 }, { "epoch": 1.4973319103521878, "grad_norm": 0.9460583035350895, "learning_rate": 3.599977258244116e-05, "loss": 0.2225, "step": 12627 }, { "epoch": 1.4974504921143128, "grad_norm": 1.0266047690379483, "learning_rate": 3.599761700209026e-05, "loss": 0.2217, "step": 12628 }, { "epoch": 1.4975690738764378, "grad_norm": 0.9579189557211468, "learning_rate": 3.5995461320354804e-05, "loss": 0.2062, "step": 12629 }, { "epoch": 1.4976876556385628, "grad_norm": 0.7504954577766069, "learning_rate": 3.5993305537254696e-05, "loss": 0.1429, "step": 12630 }, { "epoch": 1.4978062374006877, "grad_norm": 1.0403688730345135, "learning_rate": 3.599114965280977e-05, "loss": 0.2947, "step": 12631 }, { "epoch": 1.4979248191628127, "grad_norm": 1.2132742867301396, "learning_rate": 3.598899366703994e-05, "loss": 0.2891, "step": 12632 }, { "epoch": 1.4980434009249377, "grad_norm": 0.9355481903629069, "learning_rate": 3.598683757996505e-05, "loss": 0.1918, "step": 12633 }, { "epoch": 1.4981619826870627, "grad_norm": 0.903390695598067, "learning_rate": 3.5984681391605005e-05, "loss": 0.168, "step": 12634 }, { "epoch": 1.4982805644491877, "grad_norm": 0.759154754908187, "learning_rate": 3.5982525101979655e-05, "loss": 0.1447, "step": 12635 }, { "epoch": 1.4983991462113126, "grad_norm": 1.0178203191224262, "learning_rate": 3.59803687111089e-05, "loss": 0.2163, "step": 12636 }, { "epoch": 1.4985177279734376, "grad_norm": 0.9523219152631872, "learning_rate": 3.59782122190126e-05, "loss": 0.1788, "step": 12637 }, { "epoch": 1.4986363097355626, "grad_norm": 1.5725955763603392, "learning_rate": 3.597605562571066e-05, "loss": 0.3321, "step": 12638 }, { "epoch": 1.4987548914976876, "grad_norm": 0.9666149871688506, "learning_rate": 3.597389893122294e-05, "loss": 0.2157, "step": 12639 }, { "epoch": 1.4988734732598126, "grad_norm": 1.0269125463620583, "learning_rate": 3.597174213556932e-05, "loss": 0.1952, "step": 12640 }, { "epoch": 1.4989920550219376, "grad_norm": 0.8218450559601408, "learning_rate": 3.5969585238769705e-05, "loss": 0.1669, "step": 12641 }, { "epoch": 1.4991106367840625, "grad_norm": 1.1256686136681557, "learning_rate": 3.596742824084396e-05, "loss": 0.2388, "step": 12642 }, { "epoch": 1.4992292185461875, "grad_norm": 1.1717839995730541, "learning_rate": 3.596527114181197e-05, "loss": 0.2529, "step": 12643 }, { "epoch": 1.4993478003083127, "grad_norm": 0.9759498373038274, "learning_rate": 3.596311394169363e-05, "loss": 0.1605, "step": 12644 }, { "epoch": 1.4994663820704375, "grad_norm": 1.006036111038569, "learning_rate": 3.5960956640508835e-05, "loss": 0.1941, "step": 12645 }, { "epoch": 1.4995849638325627, "grad_norm": 1.2970927149255758, "learning_rate": 3.595879923827745e-05, "loss": 0.2054, "step": 12646 }, { "epoch": 1.4997035455946874, "grad_norm": 1.2648163766565057, "learning_rate": 3.595664173501938e-05, "loss": 0.2483, "step": 12647 }, { "epoch": 1.4998221273568126, "grad_norm": 1.1450626413133513, "learning_rate": 3.595448413075451e-05, "loss": 0.2324, "step": 12648 }, { "epoch": 1.4999407091189374, "grad_norm": 1.3118284458463267, "learning_rate": 3.595232642550273e-05, "loss": 0.3055, "step": 12649 }, { "epoch": 1.5000592908810626, "grad_norm": 0.7700136496247144, "learning_rate": 3.595016861928392e-05, "loss": 0.1843, "step": 12650 }, { "epoch": 1.5001778726431874, "grad_norm": 0.8578225116222609, "learning_rate": 3.594801071211798e-05, "loss": 0.2396, "step": 12651 }, { "epoch": 1.5002964544053126, "grad_norm": 0.8298136567741505, "learning_rate": 3.594585270402482e-05, "loss": 0.169, "step": 12652 }, { "epoch": 1.5004150361674373, "grad_norm": 1.374648778316164, "learning_rate": 3.594369459502432e-05, "loss": 0.2253, "step": 12653 }, { "epoch": 1.5005336179295625, "grad_norm": 1.1007666257424544, "learning_rate": 3.5941536385136374e-05, "loss": 0.2605, "step": 12654 }, { "epoch": 1.5006521996916873, "grad_norm": 1.0056273571291243, "learning_rate": 3.5939378074380873e-05, "loss": 0.2513, "step": 12655 }, { "epoch": 1.5007707814538125, "grad_norm": 0.9417307630364411, "learning_rate": 3.593721966277773e-05, "loss": 0.1983, "step": 12656 }, { "epoch": 1.5008893632159372, "grad_norm": 0.8769478066326779, "learning_rate": 3.593506115034683e-05, "loss": 0.189, "step": 12657 }, { "epoch": 1.5010079449780624, "grad_norm": 1.101941050978377, "learning_rate": 3.593290253710808e-05, "loss": 0.2084, "step": 12658 }, { "epoch": 1.5011265267401872, "grad_norm": 0.812628127232092, "learning_rate": 3.593074382308138e-05, "loss": 0.2231, "step": 12659 }, { "epoch": 1.5012451085023124, "grad_norm": 1.0921489888679725, "learning_rate": 3.592858500828661e-05, "loss": 0.2112, "step": 12660 }, { "epoch": 1.5013636902644372, "grad_norm": 1.1174056829200203, "learning_rate": 3.59264260927437e-05, "loss": 0.2331, "step": 12661 }, { "epoch": 1.5014822720265624, "grad_norm": 1.1015846066809256, "learning_rate": 3.5924267076472534e-05, "loss": 0.2562, "step": 12662 }, { "epoch": 1.5016008537886874, "grad_norm": 1.0987097262847525, "learning_rate": 3.592210795949302e-05, "loss": 0.1901, "step": 12663 }, { "epoch": 1.5017194355508123, "grad_norm": 1.1797912937646826, "learning_rate": 3.591994874182507e-05, "loss": 0.2419, "step": 12664 }, { "epoch": 1.5018380173129373, "grad_norm": 1.2028485831376787, "learning_rate": 3.5917789423488576e-05, "loss": 0.2116, "step": 12665 }, { "epoch": 1.5019565990750623, "grad_norm": 1.1185386069226784, "learning_rate": 3.5915630004503465e-05, "loss": 0.3004, "step": 12666 }, { "epoch": 1.5020751808371873, "grad_norm": 0.8148685135378724, "learning_rate": 3.591347048488962e-05, "loss": 0.2608, "step": 12667 }, { "epoch": 1.5021937625993123, "grad_norm": 0.9867738434580418, "learning_rate": 3.5911310864666964e-05, "loss": 0.176, "step": 12668 }, { "epoch": 1.5023123443614372, "grad_norm": 1.3080999690399038, "learning_rate": 3.5909151143855405e-05, "loss": 0.3157, "step": 12669 }, { "epoch": 1.5024309261235622, "grad_norm": 1.057046321600237, "learning_rate": 3.590699132247485e-05, "loss": 0.2015, "step": 12670 }, { "epoch": 1.5025495078856872, "grad_norm": 0.9705565590843959, "learning_rate": 3.5904831400545205e-05, "loss": 0.193, "step": 12671 }, { "epoch": 1.5026680896478122, "grad_norm": 1.5329197079519206, "learning_rate": 3.590267137808639e-05, "loss": 0.3042, "step": 12672 }, { "epoch": 1.5027866714099372, "grad_norm": 0.8070296028475691, "learning_rate": 3.590051125511831e-05, "loss": 0.1445, "step": 12673 }, { "epoch": 1.5029052531720621, "grad_norm": 1.6355556467491148, "learning_rate": 3.589835103166088e-05, "loss": 0.314, "step": 12674 }, { "epoch": 1.5030238349341871, "grad_norm": 0.9362825478460693, "learning_rate": 3.589619070773403e-05, "loss": 0.199, "step": 12675 }, { "epoch": 1.503142416696312, "grad_norm": 1.3420605522320408, "learning_rate": 3.589403028335766e-05, "loss": 0.2499, "step": 12676 }, { "epoch": 1.503260998458437, "grad_norm": 0.8937738065772499, "learning_rate": 3.5891869758551685e-05, "loss": 0.1513, "step": 12677 }, { "epoch": 1.503379580220562, "grad_norm": 0.9729036628692663, "learning_rate": 3.588970913333604e-05, "loss": 0.1645, "step": 12678 }, { "epoch": 1.503498161982687, "grad_norm": 0.895990560649564, "learning_rate": 3.588754840773062e-05, "loss": 0.1911, "step": 12679 }, { "epoch": 1.503616743744812, "grad_norm": 0.8486450881295491, "learning_rate": 3.588538758175535e-05, "loss": 0.2025, "step": 12680 }, { "epoch": 1.503735325506937, "grad_norm": 1.4038210600664502, "learning_rate": 3.588322665543016e-05, "loss": 0.2347, "step": 12681 }, { "epoch": 1.503853907269062, "grad_norm": 1.0335139311397177, "learning_rate": 3.588106562877497e-05, "loss": 0.2504, "step": 12682 }, { "epoch": 1.503972489031187, "grad_norm": 0.9613026132621915, "learning_rate": 3.587890450180969e-05, "loss": 0.2092, "step": 12683 }, { "epoch": 1.504091070793312, "grad_norm": 1.082650148671724, "learning_rate": 3.587674327455426e-05, "loss": 0.2239, "step": 12684 }, { "epoch": 1.504209652555437, "grad_norm": 1.482892566998799, "learning_rate": 3.587458194702859e-05, "loss": 0.3049, "step": 12685 }, { "epoch": 1.504328234317562, "grad_norm": 1.3724690523493046, "learning_rate": 3.587242051925262e-05, "loss": 0.2959, "step": 12686 }, { "epoch": 1.504446816079687, "grad_norm": 1.2107173204261448, "learning_rate": 3.587025899124625e-05, "loss": 0.2557, "step": 12687 }, { "epoch": 1.5045653978418119, "grad_norm": 1.0057665229375403, "learning_rate": 3.586809736302943e-05, "loss": 0.2032, "step": 12688 }, { "epoch": 1.504683979603937, "grad_norm": 0.9730366139698524, "learning_rate": 3.586593563462207e-05, "loss": 0.1655, "step": 12689 }, { "epoch": 1.5048025613660618, "grad_norm": 0.896697560374444, "learning_rate": 3.586377380604413e-05, "loss": 0.2311, "step": 12690 }, { "epoch": 1.504921143128187, "grad_norm": 0.908972362967689, "learning_rate": 3.58616118773155e-05, "loss": 0.1797, "step": 12691 }, { "epoch": 1.5050397248903118, "grad_norm": 1.01150434747925, "learning_rate": 3.585944984845613e-05, "loss": 0.2576, "step": 12692 }, { "epoch": 1.505158306652437, "grad_norm": 1.0485064445565822, "learning_rate": 3.5857287719485944e-05, "loss": 0.2131, "step": 12693 }, { "epoch": 1.5052768884145618, "grad_norm": 0.9263667666373326, "learning_rate": 3.585512549042489e-05, "loss": 0.1919, "step": 12694 }, { "epoch": 1.505395470176687, "grad_norm": 0.9327124370108697, "learning_rate": 3.5852963161292884e-05, "loss": 0.2211, "step": 12695 }, { "epoch": 1.5055140519388117, "grad_norm": 1.1494352599470572, "learning_rate": 3.585080073210987e-05, "loss": 0.1641, "step": 12696 }, { "epoch": 1.505632633700937, "grad_norm": 0.9219666045613872, "learning_rate": 3.584863820289578e-05, "loss": 0.2221, "step": 12697 }, { "epoch": 1.5057512154630617, "grad_norm": 0.6243764845791333, "learning_rate": 3.584647557367055e-05, "loss": 0.127, "step": 12698 }, { "epoch": 1.5058697972251869, "grad_norm": 1.6516312681187, "learning_rate": 3.5844312844454116e-05, "loss": 0.3941, "step": 12699 }, { "epoch": 1.5059883789873116, "grad_norm": 0.9651082863440835, "learning_rate": 3.5842150015266404e-05, "loss": 0.2077, "step": 12700 }, { "epoch": 1.5061069607494368, "grad_norm": 0.9648012573550039, "learning_rate": 3.583998708612738e-05, "loss": 0.1866, "step": 12701 }, { "epoch": 1.5062255425115616, "grad_norm": 1.0559260349317146, "learning_rate": 3.5837824057056956e-05, "loss": 0.1836, "step": 12702 }, { "epoch": 1.5063441242736868, "grad_norm": 0.9793180490311588, "learning_rate": 3.583566092807509e-05, "loss": 0.1857, "step": 12703 }, { "epoch": 1.5064627060358116, "grad_norm": 0.9292965210561899, "learning_rate": 3.5833497699201724e-05, "loss": 0.2305, "step": 12704 }, { "epoch": 1.5065812877979368, "grad_norm": 1.1836461714445512, "learning_rate": 3.583133437045679e-05, "loss": 0.2775, "step": 12705 }, { "epoch": 1.5066998695600615, "grad_norm": 1.081110013626963, "learning_rate": 3.582917094186023e-05, "loss": 0.2016, "step": 12706 }, { "epoch": 1.5068184513221867, "grad_norm": 1.123167462431224, "learning_rate": 3.5827007413432e-05, "loss": 0.206, "step": 12707 }, { "epoch": 1.5069370330843115, "grad_norm": 0.8189275792920854, "learning_rate": 3.582484378519203e-05, "loss": 0.1571, "step": 12708 }, { "epoch": 1.5070556148464367, "grad_norm": 1.3267763623701139, "learning_rate": 3.582268005716029e-05, "loss": 0.2758, "step": 12709 }, { "epoch": 1.5071741966085614, "grad_norm": 0.8957207785228737, "learning_rate": 3.58205162293567e-05, "loss": 0.2109, "step": 12710 }, { "epoch": 1.5072927783706866, "grad_norm": 0.8071825510501123, "learning_rate": 3.581835230180122e-05, "loss": 0.174, "step": 12711 }, { "epoch": 1.5074113601328116, "grad_norm": 0.9555791255856387, "learning_rate": 3.5816188274513795e-05, "loss": 0.1729, "step": 12712 }, { "epoch": 1.5075299418949366, "grad_norm": 1.3383101815550953, "learning_rate": 3.581402414751438e-05, "loss": 0.3131, "step": 12713 }, { "epoch": 1.5076485236570616, "grad_norm": 1.0330580258777353, "learning_rate": 3.5811859920822923e-05, "loss": 0.1927, "step": 12714 }, { "epoch": 1.5077671054191866, "grad_norm": 1.12046174710847, "learning_rate": 3.580969559445938e-05, "loss": 0.2016, "step": 12715 }, { "epoch": 1.5078856871813116, "grad_norm": 1.1177620117351186, "learning_rate": 3.58075311684437e-05, "loss": 0.2542, "step": 12716 }, { "epoch": 1.5080042689434365, "grad_norm": 1.2863836303970093, "learning_rate": 3.5805366642795836e-05, "loss": 0.3188, "step": 12717 }, { "epoch": 1.5081228507055615, "grad_norm": 0.8484385628616193, "learning_rate": 3.580320201753574e-05, "loss": 0.2534, "step": 12718 }, { "epoch": 1.5082414324676865, "grad_norm": 1.1733804698709354, "learning_rate": 3.5801037292683364e-05, "loss": 0.224, "step": 12719 }, { "epoch": 1.5083600142298115, "grad_norm": 1.3316265408263062, "learning_rate": 3.579887246825868e-05, "loss": 0.2651, "step": 12720 }, { "epoch": 1.5084785959919365, "grad_norm": 0.8221191835053854, "learning_rate": 3.579670754428163e-05, "loss": 0.1677, "step": 12721 }, { "epoch": 1.5085971777540614, "grad_norm": 1.3228755778591417, "learning_rate": 3.5794542520772166e-05, "loss": 0.2092, "step": 12722 }, { "epoch": 1.5087157595161864, "grad_norm": 0.8407258942931625, "learning_rate": 3.5792377397750263e-05, "loss": 0.1633, "step": 12723 }, { "epoch": 1.5088343412783114, "grad_norm": 1.4339249855501721, "learning_rate": 3.5790212175235885e-05, "loss": 0.2937, "step": 12724 }, { "epoch": 1.5089529230404364, "grad_norm": 0.6985406406124984, "learning_rate": 3.5788046853248966e-05, "loss": 0.134, "step": 12725 }, { "epoch": 1.5090715048025614, "grad_norm": 0.9221124110335374, "learning_rate": 3.5785881431809496e-05, "loss": 0.1712, "step": 12726 }, { "epoch": 1.5091900865646863, "grad_norm": 0.9720867769212468, "learning_rate": 3.5783715910937425e-05, "loss": 0.2186, "step": 12727 }, { "epoch": 1.5093086683268113, "grad_norm": 1.14200566334914, "learning_rate": 3.5781550290652715e-05, "loss": 0.1921, "step": 12728 }, { "epoch": 1.5094272500889363, "grad_norm": 1.0326654965705222, "learning_rate": 3.5779384570975336e-05, "loss": 0.2012, "step": 12729 }, { "epoch": 1.5095458318510613, "grad_norm": 1.585427265995397, "learning_rate": 3.5777218751925245e-05, "loss": 0.3802, "step": 12730 }, { "epoch": 1.5096644136131863, "grad_norm": 1.6031332938147655, "learning_rate": 3.577505283352241e-05, "loss": 0.2549, "step": 12731 }, { "epoch": 1.5097829953753112, "grad_norm": 1.5074725697006583, "learning_rate": 3.57728868157868e-05, "loss": 0.3201, "step": 12732 }, { "epoch": 1.5099015771374362, "grad_norm": 1.1468015619528944, "learning_rate": 3.57707206987384e-05, "loss": 0.2379, "step": 12733 }, { "epoch": 1.5100201588995612, "grad_norm": 0.8796726688261193, "learning_rate": 3.576855448239715e-05, "loss": 0.1593, "step": 12734 }, { "epoch": 1.5101387406616862, "grad_norm": 0.800440393512957, "learning_rate": 3.576638816678304e-05, "loss": 0.1915, "step": 12735 }, { "epoch": 1.5102573224238112, "grad_norm": 1.0511677043974554, "learning_rate": 3.576422175191602e-05, "loss": 0.21, "step": 12736 }, { "epoch": 1.5103759041859361, "grad_norm": 1.0084603302683448, "learning_rate": 3.576205523781609e-05, "loss": 0.1821, "step": 12737 }, { "epoch": 1.5104944859480613, "grad_norm": 1.0619121275524137, "learning_rate": 3.57598886245032e-05, "loss": 0.2232, "step": 12738 }, { "epoch": 1.510613067710186, "grad_norm": 0.9403269366216286, "learning_rate": 3.5757721911997334e-05, "loss": 0.2026, "step": 12739 }, { "epoch": 1.5107316494723113, "grad_norm": 1.3718333260681264, "learning_rate": 3.5755555100318466e-05, "loss": 0.2399, "step": 12740 }, { "epoch": 1.510850231234436, "grad_norm": 0.8205815229321826, "learning_rate": 3.575338818948657e-05, "loss": 0.1642, "step": 12741 }, { "epoch": 1.5109688129965613, "grad_norm": 1.3744135586803867, "learning_rate": 3.5751221179521615e-05, "loss": 0.2349, "step": 12742 }, { "epoch": 1.511087394758686, "grad_norm": 1.1018175961645407, "learning_rate": 3.574905407044359e-05, "loss": 0.1886, "step": 12743 }, { "epoch": 1.5112059765208112, "grad_norm": 0.7697956211017584, "learning_rate": 3.574688686227247e-05, "loss": 0.2052, "step": 12744 }, { "epoch": 1.511324558282936, "grad_norm": 1.1286263952416127, "learning_rate": 3.574471955502822e-05, "loss": 0.2805, "step": 12745 }, { "epoch": 1.5114431400450612, "grad_norm": 1.6411763318216321, "learning_rate": 3.574255214873085e-05, "loss": 0.3264, "step": 12746 }, { "epoch": 1.511561721807186, "grad_norm": 1.3780513804687262, "learning_rate": 3.57403846434003e-05, "loss": 0.265, "step": 12747 }, { "epoch": 1.5116803035693112, "grad_norm": 1.080941303019903, "learning_rate": 3.57382170390566e-05, "loss": 0.2146, "step": 12748 }, { "epoch": 1.511798885331436, "grad_norm": 0.9524227195986534, "learning_rate": 3.573604933571969e-05, "loss": 0.2263, "step": 12749 }, { "epoch": 1.5119174670935611, "grad_norm": 0.8713312090969385, "learning_rate": 3.573388153340958e-05, "loss": 0.1846, "step": 12750 }, { "epoch": 1.5120360488556859, "grad_norm": 1.3548831406054143, "learning_rate": 3.5731713632146236e-05, "loss": 0.2935, "step": 12751 }, { "epoch": 1.512154630617811, "grad_norm": 0.9141544501953268, "learning_rate": 3.572954563194966e-05, "loss": 0.188, "step": 12752 }, { "epoch": 1.5122732123799358, "grad_norm": 1.4039703771087766, "learning_rate": 3.5727377532839814e-05, "loss": 0.2881, "step": 12753 }, { "epoch": 1.512391794142061, "grad_norm": 0.9734263237149824, "learning_rate": 3.572520933483672e-05, "loss": 0.185, "step": 12754 }, { "epoch": 1.5125103759041858, "grad_norm": 1.0575640639264685, "learning_rate": 3.572304103796034e-05, "loss": 0.1642, "step": 12755 }, { "epoch": 1.512628957666311, "grad_norm": 0.9887121539493257, "learning_rate": 3.5720872642230676e-05, "loss": 0.1968, "step": 12756 }, { "epoch": 1.5127475394284358, "grad_norm": 1.1430292849467827, "learning_rate": 3.571870414766772e-05, "loss": 0.2288, "step": 12757 }, { "epoch": 1.512866121190561, "grad_norm": 0.7461829510504894, "learning_rate": 3.571653555429144e-05, "loss": 0.1473, "step": 12758 }, { "epoch": 1.5129847029526857, "grad_norm": 1.0848394272005284, "learning_rate": 3.571436686212185e-05, "loss": 0.1939, "step": 12759 }, { "epoch": 1.513103284714811, "grad_norm": 1.0639703071039854, "learning_rate": 3.571219807117894e-05, "loss": 0.2179, "step": 12760 }, { "epoch": 1.5132218664769357, "grad_norm": 1.0195298847724614, "learning_rate": 3.571002918148269e-05, "loss": 0.2185, "step": 12761 }, { "epoch": 1.5133404482390609, "grad_norm": 0.9877842918873289, "learning_rate": 3.570786019305311e-05, "loss": 0.2119, "step": 12762 }, { "epoch": 1.5134590300011859, "grad_norm": 1.0039923058993818, "learning_rate": 3.5705691105910196e-05, "loss": 0.1891, "step": 12763 }, { "epoch": 1.5135776117633108, "grad_norm": 1.2253095781652132, "learning_rate": 3.570352192007393e-05, "loss": 0.2881, "step": 12764 }, { "epoch": 1.5136961935254358, "grad_norm": 1.0466714916040571, "learning_rate": 3.570135263556432e-05, "loss": 0.2299, "step": 12765 }, { "epoch": 1.5138147752875608, "grad_norm": 1.0881499181607956, "learning_rate": 3.569918325240136e-05, "loss": 0.1716, "step": 12766 }, { "epoch": 1.5139333570496858, "grad_norm": 0.9196742096788857, "learning_rate": 3.569701377060506e-05, "loss": 0.184, "step": 12767 }, { "epoch": 1.5140519388118108, "grad_norm": 0.9819596299384474, "learning_rate": 3.56948441901954e-05, "loss": 0.1722, "step": 12768 }, { "epoch": 1.5141705205739358, "grad_norm": 0.9712096266777772, "learning_rate": 3.56926745111924e-05, "loss": 0.2058, "step": 12769 }, { "epoch": 1.5142891023360607, "grad_norm": 1.2521347303277481, "learning_rate": 3.5690504733616046e-05, "loss": 0.234, "step": 12770 }, { "epoch": 1.5144076840981857, "grad_norm": 0.8785940606144099, "learning_rate": 3.568833485748635e-05, "loss": 0.178, "step": 12771 }, { "epoch": 1.5145262658603107, "grad_norm": 0.9244786622288234, "learning_rate": 3.5686164882823314e-05, "loss": 0.2428, "step": 12772 }, { "epoch": 1.5146448476224357, "grad_norm": 0.9541151069020402, "learning_rate": 3.5683994809646935e-05, "loss": 0.1546, "step": 12773 }, { "epoch": 1.5147634293845607, "grad_norm": 1.3890041878498254, "learning_rate": 3.5681824637977234e-05, "loss": 0.2848, "step": 12774 }, { "epoch": 1.5148820111466856, "grad_norm": 1.2850590229296657, "learning_rate": 3.5679654367834206e-05, "loss": 0.2439, "step": 12775 }, { "epoch": 1.5150005929088106, "grad_norm": 0.8357224549051292, "learning_rate": 3.567748399923787e-05, "loss": 0.187, "step": 12776 }, { "epoch": 1.5151191746709356, "grad_norm": 0.87767497569854, "learning_rate": 3.567531353220821e-05, "loss": 0.1989, "step": 12777 }, { "epoch": 1.5152377564330606, "grad_norm": 1.3875731205839268, "learning_rate": 3.567314296676526e-05, "loss": 0.3499, "step": 12778 }, { "epoch": 1.5153563381951856, "grad_norm": 0.9583110660547284, "learning_rate": 3.5670972302929016e-05, "loss": 0.1859, "step": 12779 }, { "epoch": 1.5154749199573105, "grad_norm": 0.9119177102885186, "learning_rate": 3.566880154071949e-05, "loss": 0.1843, "step": 12780 }, { "epoch": 1.5155935017194355, "grad_norm": 0.9146909844251435, "learning_rate": 3.5666630680156707e-05, "loss": 0.1467, "step": 12781 }, { "epoch": 1.5157120834815605, "grad_norm": 1.376319594047053, "learning_rate": 3.5664459721260655e-05, "loss": 0.2491, "step": 12782 }, { "epoch": 1.5158306652436855, "grad_norm": 1.1523574897344098, "learning_rate": 3.566228866405138e-05, "loss": 0.2368, "step": 12783 }, { "epoch": 1.5159492470058105, "grad_norm": 0.9313533398486757, "learning_rate": 3.5660117508548865e-05, "loss": 0.1732, "step": 12784 }, { "epoch": 1.5160678287679354, "grad_norm": 1.1577927985050922, "learning_rate": 3.565794625477315e-05, "loss": 0.2541, "step": 12785 }, { "epoch": 1.5161864105300604, "grad_norm": 0.87658166295317, "learning_rate": 3.565577490274423e-05, "loss": 0.171, "step": 12786 }, { "epoch": 1.5163049922921856, "grad_norm": 1.1116663433160037, "learning_rate": 3.565360345248213e-05, "loss": 0.2247, "step": 12787 }, { "epoch": 1.5164235740543104, "grad_norm": 0.8389218221492477, "learning_rate": 3.565143190400688e-05, "loss": 0.1396, "step": 12788 }, { "epoch": 1.5165421558164356, "grad_norm": 0.8041738989978519, "learning_rate": 3.5649260257338484e-05, "loss": 0.1862, "step": 12789 }, { "epoch": 1.5166607375785603, "grad_norm": 0.7829207015899472, "learning_rate": 3.5647088512496966e-05, "loss": 0.155, "step": 12790 }, { "epoch": 1.5167793193406856, "grad_norm": 1.1683134558381554, "learning_rate": 3.564491666950235e-05, "loss": 0.2808, "step": 12791 }, { "epoch": 1.5168979011028103, "grad_norm": 1.005125876390253, "learning_rate": 3.5642744728374653e-05, "loss": 0.2282, "step": 12792 }, { "epoch": 1.5170164828649355, "grad_norm": 1.2333536440769755, "learning_rate": 3.564057268913391e-05, "loss": 0.2006, "step": 12793 }, { "epoch": 1.5171350646270603, "grad_norm": 1.2571839384608696, "learning_rate": 3.563840055180013e-05, "loss": 0.3047, "step": 12794 }, { "epoch": 1.5172536463891855, "grad_norm": 0.8756647070688247, "learning_rate": 3.563622831639334e-05, "loss": 0.1727, "step": 12795 }, { "epoch": 1.5173722281513102, "grad_norm": 1.1368674819383315, "learning_rate": 3.563405598293357e-05, "loss": 0.2427, "step": 12796 }, { "epoch": 1.5174908099134354, "grad_norm": 1.0941870249127739, "learning_rate": 3.5631883551440845e-05, "loss": 0.2474, "step": 12797 }, { "epoch": 1.5176093916755602, "grad_norm": 0.767669798087506, "learning_rate": 3.562971102193519e-05, "loss": 0.1733, "step": 12798 }, { "epoch": 1.5177279734376854, "grad_norm": 1.1476021758264907, "learning_rate": 3.562753839443664e-05, "loss": 0.2104, "step": 12799 }, { "epoch": 1.5178465551998102, "grad_norm": 1.6056432700605283, "learning_rate": 3.562536566896522e-05, "loss": 0.2675, "step": 12800 }, { "epoch": 1.5179651369619354, "grad_norm": 1.332576526945381, "learning_rate": 3.562319284554094e-05, "loss": 0.3004, "step": 12801 }, { "epoch": 1.5180837187240601, "grad_norm": 0.9714670889405904, "learning_rate": 3.562101992418386e-05, "loss": 0.1841, "step": 12802 }, { "epoch": 1.5182023004861853, "grad_norm": 1.0212972628416346, "learning_rate": 3.5618846904914e-05, "loss": 0.2518, "step": 12803 }, { "epoch": 1.51832088224831, "grad_norm": 0.9350185102823155, "learning_rate": 3.5616673787751395e-05, "loss": 0.2187, "step": 12804 }, { "epoch": 1.5184394640104353, "grad_norm": 0.8978425614766266, "learning_rate": 3.561450057271608e-05, "loss": 0.1957, "step": 12805 }, { "epoch": 1.51855804577256, "grad_norm": 1.1459662295568913, "learning_rate": 3.561232725982808e-05, "loss": 0.2757, "step": 12806 }, { "epoch": 1.5186766275346852, "grad_norm": 0.8769609680392908, "learning_rate": 3.5610153849107444e-05, "loss": 0.2056, "step": 12807 }, { "epoch": 1.51879520929681, "grad_norm": 1.1471329685256457, "learning_rate": 3.5607980340574195e-05, "loss": 0.2966, "step": 12808 }, { "epoch": 1.5189137910589352, "grad_norm": 0.9833116586805987, "learning_rate": 3.560580673424837e-05, "loss": 0.1951, "step": 12809 }, { "epoch": 1.51903237282106, "grad_norm": 0.9840205475972659, "learning_rate": 3.560363303015002e-05, "loss": 0.2172, "step": 12810 }, { "epoch": 1.5191509545831852, "grad_norm": 0.8550160569047015, "learning_rate": 3.560145922829917e-05, "loss": 0.1858, "step": 12811 }, { "epoch": 1.5192695363453101, "grad_norm": 1.3881691864993104, "learning_rate": 3.559928532871587e-05, "loss": 0.2803, "step": 12812 }, { "epoch": 1.5193881181074351, "grad_norm": 1.3503220394436788, "learning_rate": 3.559711133142016e-05, "loss": 0.3773, "step": 12813 }, { "epoch": 1.51950669986956, "grad_norm": 1.0530331822900083, "learning_rate": 3.559493723643208e-05, "loss": 0.2241, "step": 12814 }, { "epoch": 1.519625281631685, "grad_norm": 1.1546545050975312, "learning_rate": 3.559276304377167e-05, "loss": 0.2227, "step": 12815 }, { "epoch": 1.51974386339381, "grad_norm": 0.9668690494127918, "learning_rate": 3.559058875345897e-05, "loss": 0.2222, "step": 12816 }, { "epoch": 1.519862445155935, "grad_norm": 0.8049246541591667, "learning_rate": 3.5588414365514034e-05, "loss": 0.219, "step": 12817 }, { "epoch": 1.51998102691806, "grad_norm": 0.9535109359849181, "learning_rate": 3.5586239879956896e-05, "loss": 0.1934, "step": 12818 }, { "epoch": 1.520099608680185, "grad_norm": 1.0605257292318742, "learning_rate": 3.558406529680761e-05, "loss": 0.2595, "step": 12819 }, { "epoch": 1.52021819044231, "grad_norm": 1.0581513569365546, "learning_rate": 3.558189061608622e-05, "loss": 0.2845, "step": 12820 }, { "epoch": 1.520336772204435, "grad_norm": 1.2354677684272697, "learning_rate": 3.557971583781278e-05, "loss": 0.2212, "step": 12821 }, { "epoch": 1.52045535396656, "grad_norm": 1.1671579199233224, "learning_rate": 3.557754096200733e-05, "loss": 0.2539, "step": 12822 }, { "epoch": 1.520573935728685, "grad_norm": 1.0417040783792737, "learning_rate": 3.557536598868993e-05, "loss": 0.1942, "step": 12823 }, { "epoch": 1.52069251749081, "grad_norm": 1.4462090076118372, "learning_rate": 3.5573190917880605e-05, "loss": 0.2414, "step": 12824 }, { "epoch": 1.520811099252935, "grad_norm": 1.2471241349503797, "learning_rate": 3.557101574959944e-05, "loss": 0.2038, "step": 12825 }, { "epoch": 1.5209296810150599, "grad_norm": 1.0065580335726687, "learning_rate": 3.5568840483866473e-05, "loss": 0.1897, "step": 12826 }, { "epoch": 1.5210482627771849, "grad_norm": 0.7772645430964024, "learning_rate": 3.556666512070175e-05, "loss": 0.1679, "step": 12827 }, { "epoch": 1.5211668445393098, "grad_norm": 1.1299965183766798, "learning_rate": 3.556448966012535e-05, "loss": 0.2439, "step": 12828 }, { "epoch": 1.5212854263014348, "grad_norm": 1.0003181616712027, "learning_rate": 3.556231410215729e-05, "loss": 0.1784, "step": 12829 }, { "epoch": 1.5214040080635598, "grad_norm": 1.1718410073226182, "learning_rate": 3.5560138446817655e-05, "loss": 0.2474, "step": 12830 }, { "epoch": 1.5215225898256848, "grad_norm": 0.8634869931508408, "learning_rate": 3.555796269412649e-05, "loss": 0.1887, "step": 12831 }, { "epoch": 1.5216411715878098, "grad_norm": 0.8455399190518014, "learning_rate": 3.5555786844103864e-05, "loss": 0.1929, "step": 12832 }, { "epoch": 1.5217597533499347, "grad_norm": 1.0827286405372403, "learning_rate": 3.555361089676982e-05, "loss": 0.2919, "step": 12833 }, { "epoch": 1.5218783351120597, "grad_norm": 0.9629380632316193, "learning_rate": 3.5551434852144425e-05, "loss": 0.2184, "step": 12834 }, { "epoch": 1.5219969168741847, "grad_norm": 1.0034264888382383, "learning_rate": 3.554925871024774e-05, "loss": 0.2343, "step": 12835 }, { "epoch": 1.5221154986363097, "grad_norm": 0.9438581902226297, "learning_rate": 3.554708247109984e-05, "loss": 0.1872, "step": 12836 }, { "epoch": 1.5222340803984347, "grad_norm": 1.5123536004102422, "learning_rate": 3.554490613472075e-05, "loss": 0.3089, "step": 12837 }, { "epoch": 1.5223526621605599, "grad_norm": 1.239584567512024, "learning_rate": 3.5542729701130574e-05, "loss": 0.2444, "step": 12838 }, { "epoch": 1.5224712439226846, "grad_norm": 0.8654599120372531, "learning_rate": 3.554055317034935e-05, "loss": 0.1491, "step": 12839 }, { "epoch": 1.5225898256848098, "grad_norm": 1.020026004644902, "learning_rate": 3.553837654239715e-05, "loss": 0.2134, "step": 12840 }, { "epoch": 1.5227084074469346, "grad_norm": 1.2606602599731052, "learning_rate": 3.553619981729404e-05, "loss": 0.256, "step": 12841 }, { "epoch": 1.5228269892090598, "grad_norm": 1.0907434341978008, "learning_rate": 3.5534022995060096e-05, "loss": 0.1792, "step": 12842 }, { "epoch": 1.5229455709711845, "grad_norm": 1.3443085921678128, "learning_rate": 3.553184607571538e-05, "loss": 0.3125, "step": 12843 }, { "epoch": 1.5230641527333098, "grad_norm": 0.9554544165954907, "learning_rate": 3.552966905927995e-05, "loss": 0.211, "step": 12844 }, { "epoch": 1.5231827344954345, "grad_norm": 1.3357655178312413, "learning_rate": 3.5527491945773886e-05, "loss": 0.3065, "step": 12845 }, { "epoch": 1.5233013162575597, "grad_norm": 0.9979791480828737, "learning_rate": 3.5525314735217255e-05, "loss": 0.1694, "step": 12846 }, { "epoch": 1.5234198980196845, "grad_norm": 0.8708678532978099, "learning_rate": 3.552313742763013e-05, "loss": 0.2002, "step": 12847 }, { "epoch": 1.5235384797818097, "grad_norm": 1.0134185876935402, "learning_rate": 3.5520960023032586e-05, "loss": 0.2851, "step": 12848 }, { "epoch": 1.5236570615439344, "grad_norm": 1.0077957680855296, "learning_rate": 3.551878252144469e-05, "loss": 0.2542, "step": 12849 }, { "epoch": 1.5237756433060596, "grad_norm": 1.2626057072433472, "learning_rate": 3.551660492288651e-05, "loss": 0.2697, "step": 12850 }, { "epoch": 1.5238942250681844, "grad_norm": 0.9001594375719711, "learning_rate": 3.551442722737814e-05, "loss": 0.209, "step": 12851 }, { "epoch": 1.5240128068303096, "grad_norm": 1.0878311985434648, "learning_rate": 3.551224943493964e-05, "loss": 0.1912, "step": 12852 }, { "epoch": 1.5241313885924344, "grad_norm": 1.1070184205268812, "learning_rate": 3.551007154559109e-05, "loss": 0.2237, "step": 12853 }, { "epoch": 1.5242499703545596, "grad_norm": 0.7871784803337842, "learning_rate": 3.550789355935257e-05, "loss": 0.1779, "step": 12854 }, { "epoch": 1.5243685521166843, "grad_norm": 0.8657013775048242, "learning_rate": 3.5505715476244155e-05, "loss": 0.2008, "step": 12855 }, { "epoch": 1.5244871338788095, "grad_norm": 1.4275226546374824, "learning_rate": 3.5503537296285936e-05, "loss": 0.255, "step": 12856 }, { "epoch": 1.5246057156409343, "grad_norm": 0.8925013656513742, "learning_rate": 3.550135901949797e-05, "loss": 0.2046, "step": 12857 }, { "epoch": 1.5247242974030595, "grad_norm": 1.1836670901313346, "learning_rate": 3.5499180645900365e-05, "loss": 0.2601, "step": 12858 }, { "epoch": 1.5248428791651842, "grad_norm": 0.8530506506935547, "learning_rate": 3.5497002175513177e-05, "loss": 0.1568, "step": 12859 }, { "epoch": 1.5249614609273094, "grad_norm": 1.3513767712331632, "learning_rate": 3.5494823608356505e-05, "loss": 0.3589, "step": 12860 }, { "epoch": 1.5250800426894342, "grad_norm": 0.9661768794815984, "learning_rate": 3.549264494445042e-05, "loss": 0.2249, "step": 12861 }, { "epoch": 1.5251986244515594, "grad_norm": 0.9202535548331342, "learning_rate": 3.5490466183815035e-05, "loss": 0.2286, "step": 12862 }, { "epoch": 1.5253172062136844, "grad_norm": 1.0176139725971, "learning_rate": 3.5488287326470406e-05, "loss": 0.2181, "step": 12863 }, { "epoch": 1.5254357879758094, "grad_norm": 1.623399513861169, "learning_rate": 3.5486108372436634e-05, "loss": 0.3192, "step": 12864 }, { "epoch": 1.5255543697379343, "grad_norm": 1.1976986769407152, "learning_rate": 3.5483929321733796e-05, "loss": 0.2433, "step": 12865 }, { "epoch": 1.5256729515000593, "grad_norm": 1.0815221995316286, "learning_rate": 3.5481750174381986e-05, "loss": 0.2583, "step": 12866 }, { "epoch": 1.5257915332621843, "grad_norm": 0.8038665838138259, "learning_rate": 3.5479570930401294e-05, "loss": 0.1452, "step": 12867 }, { "epoch": 1.5259101150243093, "grad_norm": 0.6912178897160326, "learning_rate": 3.547739158981181e-05, "loss": 0.1438, "step": 12868 }, { "epoch": 1.5260286967864343, "grad_norm": 1.2224699931214849, "learning_rate": 3.547521215263363e-05, "loss": 0.2463, "step": 12869 }, { "epoch": 1.5261472785485592, "grad_norm": 0.9496634324967413, "learning_rate": 3.5473032618886823e-05, "loss": 0.2031, "step": 12870 }, { "epoch": 1.5262658603106842, "grad_norm": 0.770577797093182, "learning_rate": 3.5470852988591514e-05, "loss": 0.1955, "step": 12871 }, { "epoch": 1.5263844420728092, "grad_norm": 1.1345003742319786, "learning_rate": 3.546867326176777e-05, "loss": 0.1993, "step": 12872 }, { "epoch": 1.5265030238349342, "grad_norm": 0.8066786640009451, "learning_rate": 3.54664934384357e-05, "loss": 0.1604, "step": 12873 }, { "epoch": 1.5266216055970592, "grad_norm": 1.7802855145258445, "learning_rate": 3.54643135186154e-05, "loss": 0.2759, "step": 12874 }, { "epoch": 1.5267401873591842, "grad_norm": 1.1443853435774451, "learning_rate": 3.546213350232696e-05, "loss": 0.302, "step": 12875 }, { "epoch": 1.5268587691213091, "grad_norm": 1.0329282463425375, "learning_rate": 3.5459953389590474e-05, "loss": 0.2472, "step": 12876 }, { "epoch": 1.5269773508834341, "grad_norm": 0.7996133325430156, "learning_rate": 3.545777318042605e-05, "loss": 0.1842, "step": 12877 }, { "epoch": 1.527095932645559, "grad_norm": 0.8820779180606119, "learning_rate": 3.5455592874853776e-05, "loss": 0.1791, "step": 12878 }, { "epoch": 1.527214514407684, "grad_norm": 0.7937470157244286, "learning_rate": 3.5453412472893766e-05, "loss": 0.1339, "step": 12879 }, { "epoch": 1.527333096169809, "grad_norm": 1.0149029778613194, "learning_rate": 3.54512319745661e-05, "loss": 0.2346, "step": 12880 }, { "epoch": 1.527451677931934, "grad_norm": 1.254795511678671, "learning_rate": 3.5449051379890894e-05, "loss": 0.2857, "step": 12881 }, { "epoch": 1.527570259694059, "grad_norm": 0.9706799962497036, "learning_rate": 3.544687068888825e-05, "loss": 0.1826, "step": 12882 }, { "epoch": 1.527688841456184, "grad_norm": 1.1371724695713745, "learning_rate": 3.5444689901578264e-05, "loss": 0.2382, "step": 12883 }, { "epoch": 1.527807423218309, "grad_norm": 1.226433890498801, "learning_rate": 3.5442509017981055e-05, "loss": 0.2632, "step": 12884 }, { "epoch": 1.527926004980434, "grad_norm": 0.762443992949207, "learning_rate": 3.5440328038116716e-05, "loss": 0.166, "step": 12885 }, { "epoch": 1.528044586742559, "grad_norm": 0.9412518669724855, "learning_rate": 3.543814696200536e-05, "loss": 0.1808, "step": 12886 }, { "epoch": 1.5281631685046841, "grad_norm": 1.200731140271332, "learning_rate": 3.5435965789667074e-05, "loss": 0.2495, "step": 12887 }, { "epoch": 1.528281750266809, "grad_norm": 0.6075508909893481, "learning_rate": 3.5433784521121993e-05, "loss": 0.1306, "step": 12888 }, { "epoch": 1.528400332028934, "grad_norm": 0.8812628728460592, "learning_rate": 3.543160315639021e-05, "loss": 0.1801, "step": 12889 }, { "epoch": 1.5285189137910589, "grad_norm": 0.8385972728755706, "learning_rate": 3.5429421695491835e-05, "loss": 0.2116, "step": 12890 }, { "epoch": 1.528637495553184, "grad_norm": 1.1739157183971491, "learning_rate": 3.542724013844698e-05, "loss": 0.2238, "step": 12891 }, { "epoch": 1.5287560773153088, "grad_norm": 1.1223654891712085, "learning_rate": 3.5425058485275766e-05, "loss": 0.2437, "step": 12892 }, { "epoch": 1.528874659077434, "grad_norm": 1.4932989723036958, "learning_rate": 3.542287673599829e-05, "loss": 0.2605, "step": 12893 }, { "epoch": 1.5289932408395588, "grad_norm": 1.1281476404386483, "learning_rate": 3.5420694890634674e-05, "loss": 0.2735, "step": 12894 }, { "epoch": 1.529111822601684, "grad_norm": 1.043464986449808, "learning_rate": 3.5418512949205035e-05, "loss": 0.2306, "step": 12895 }, { "epoch": 1.5292304043638087, "grad_norm": 0.8648958692593001, "learning_rate": 3.541633091172947e-05, "loss": 0.1444, "step": 12896 }, { "epoch": 1.529348986125934, "grad_norm": 1.0220171020899242, "learning_rate": 3.5414148778228124e-05, "loss": 0.1998, "step": 12897 }, { "epoch": 1.5294675678880587, "grad_norm": 1.6114558155377965, "learning_rate": 3.541196654872109e-05, "loss": 0.37, "step": 12898 }, { "epoch": 1.529586149650184, "grad_norm": 1.164656876250275, "learning_rate": 3.540978422322849e-05, "loss": 0.3151, "step": 12899 }, { "epoch": 1.5297047314123087, "grad_norm": 0.9120332876626611, "learning_rate": 3.540760180177044e-05, "loss": 0.1984, "step": 12900 }, { "epoch": 1.5298233131744339, "grad_norm": 1.0810625608759883, "learning_rate": 3.540541928436707e-05, "loss": 0.2299, "step": 12901 }, { "epoch": 1.5299418949365586, "grad_norm": 1.1524900714041455, "learning_rate": 3.540323667103849e-05, "loss": 0.22, "step": 12902 }, { "epoch": 1.5300604766986838, "grad_norm": 1.2421686526707942, "learning_rate": 3.540105396180483e-05, "loss": 0.2002, "step": 12903 }, { "epoch": 1.5301790584608086, "grad_norm": 0.8278686031026299, "learning_rate": 3.5398871156686206e-05, "loss": 0.192, "step": 12904 }, { "epoch": 1.5302976402229338, "grad_norm": 0.8565528097814316, "learning_rate": 3.5396688255702747e-05, "loss": 0.188, "step": 12905 }, { "epoch": 1.5304162219850586, "grad_norm": 0.9556960490062892, "learning_rate": 3.539450525887456e-05, "loss": 0.2014, "step": 12906 }, { "epoch": 1.5305348037471838, "grad_norm": 1.0472701562060664, "learning_rate": 3.539232216622179e-05, "loss": 0.1643, "step": 12907 }, { "epoch": 1.5306533855093085, "grad_norm": 0.922581938190148, "learning_rate": 3.539013897776455e-05, "loss": 0.2216, "step": 12908 }, { "epoch": 1.5307719672714337, "grad_norm": 1.1351067817012357, "learning_rate": 3.5387955693522975e-05, "loss": 0.3453, "step": 12909 }, { "epoch": 1.5308905490335585, "grad_norm": 0.849977797024075, "learning_rate": 3.5385772313517176e-05, "loss": 0.1638, "step": 12910 }, { "epoch": 1.5310091307956837, "grad_norm": 0.7121233787766494, "learning_rate": 3.53835888377673e-05, "loss": 0.1742, "step": 12911 }, { "epoch": 1.5311277125578087, "grad_norm": 1.1308953267843558, "learning_rate": 3.5381405266293464e-05, "loss": 0.277, "step": 12912 }, { "epoch": 1.5312462943199336, "grad_norm": 0.8656292514072345, "learning_rate": 3.537922159911581e-05, "loss": 0.1592, "step": 12913 }, { "epoch": 1.5313648760820586, "grad_norm": 0.7599069833344445, "learning_rate": 3.537703783625446e-05, "loss": 0.1526, "step": 12914 }, { "epoch": 1.5314834578441836, "grad_norm": 1.191425852818175, "learning_rate": 3.537485397772954e-05, "loss": 0.2135, "step": 12915 }, { "epoch": 1.5316020396063086, "grad_norm": 0.6342451830811993, "learning_rate": 3.537267002356119e-05, "loss": 0.155, "step": 12916 }, { "epoch": 1.5317206213684336, "grad_norm": 1.4591828702066778, "learning_rate": 3.537048597376954e-05, "loss": 0.3795, "step": 12917 }, { "epoch": 1.5318392031305585, "grad_norm": 1.5562004046372202, "learning_rate": 3.536830182837474e-05, "loss": 0.326, "step": 12918 }, { "epoch": 1.5319577848926835, "grad_norm": 0.7073277419706369, "learning_rate": 3.536611758739689e-05, "loss": 0.1347, "step": 12919 }, { "epoch": 1.5320763666548085, "grad_norm": 0.9692130078385965, "learning_rate": 3.536393325085616e-05, "loss": 0.2401, "step": 12920 }, { "epoch": 1.5321949484169335, "grad_norm": 1.1822539793464235, "learning_rate": 3.536174881877267e-05, "loss": 0.2354, "step": 12921 }, { "epoch": 1.5323135301790585, "grad_norm": 0.8406792847645952, "learning_rate": 3.535956429116657e-05, "loss": 0.1794, "step": 12922 }, { "epoch": 1.5324321119411835, "grad_norm": 0.8728744119799386, "learning_rate": 3.535737966805798e-05, "loss": 0.2003, "step": 12923 }, { "epoch": 1.5325506937033084, "grad_norm": 0.7790032597596278, "learning_rate": 3.535519494946706e-05, "loss": 0.1514, "step": 12924 }, { "epoch": 1.5326692754654334, "grad_norm": 1.3373013794959874, "learning_rate": 3.535301013541394e-05, "loss": 0.3106, "step": 12925 }, { "epoch": 1.5327878572275584, "grad_norm": 1.0501721825218668, "learning_rate": 3.535082522591876e-05, "loss": 0.2316, "step": 12926 }, { "epoch": 1.5329064389896834, "grad_norm": 1.2569406903245595, "learning_rate": 3.534864022100167e-05, "loss": 0.2263, "step": 12927 }, { "epoch": 1.5330250207518084, "grad_norm": 0.9349563489277629, "learning_rate": 3.5346455120682796e-05, "loss": 0.2053, "step": 12928 }, { "epoch": 1.5331436025139333, "grad_norm": 1.0345961169469389, "learning_rate": 3.53442699249823e-05, "loss": 0.2217, "step": 12929 }, { "epoch": 1.5332621842760583, "grad_norm": 0.806763595036304, "learning_rate": 3.5342084633920316e-05, "loss": 0.1559, "step": 12930 }, { "epoch": 1.5333807660381833, "grad_norm": 0.8119502272694814, "learning_rate": 3.5339899247517e-05, "loss": 0.186, "step": 12931 }, { "epoch": 1.5334993478003083, "grad_norm": 0.8471037817058749, "learning_rate": 3.5337713765792494e-05, "loss": 0.223, "step": 12932 }, { "epoch": 1.5336179295624333, "grad_norm": 0.9318811017977353, "learning_rate": 3.533552818876694e-05, "loss": 0.2028, "step": 12933 }, { "epoch": 1.5337365113245582, "grad_norm": 0.8946494413316326, "learning_rate": 3.533334251646049e-05, "loss": 0.1626, "step": 12934 }, { "epoch": 1.5338550930866832, "grad_norm": 1.0925514690441913, "learning_rate": 3.5331156748893295e-05, "loss": 0.269, "step": 12935 }, { "epoch": 1.5339736748488082, "grad_norm": 0.9845897007602804, "learning_rate": 3.53289708860855e-05, "loss": 0.2335, "step": 12936 }, { "epoch": 1.5340922566109332, "grad_norm": 1.1320380020439396, "learning_rate": 3.5326784928057264e-05, "loss": 0.2569, "step": 12937 }, { "epoch": 1.5342108383730584, "grad_norm": 1.216845909061438, "learning_rate": 3.532459887482873e-05, "loss": 0.2486, "step": 12938 }, { "epoch": 1.5343294201351831, "grad_norm": 0.8516156809713886, "learning_rate": 3.532241272642006e-05, "loss": 0.2259, "step": 12939 }, { "epoch": 1.5344480018973083, "grad_norm": 1.0620111051324834, "learning_rate": 3.53202264828514e-05, "loss": 0.258, "step": 12940 }, { "epoch": 1.534566583659433, "grad_norm": 1.2582016356972028, "learning_rate": 3.53180401441429e-05, "loss": 0.2061, "step": 12941 }, { "epoch": 1.5346851654215583, "grad_norm": 0.9712485161430684, "learning_rate": 3.531585371031474e-05, "loss": 0.1954, "step": 12942 }, { "epoch": 1.534803747183683, "grad_norm": 0.7788363307945733, "learning_rate": 3.531366718138705e-05, "loss": 0.1542, "step": 12943 }, { "epoch": 1.5349223289458083, "grad_norm": 0.6948123573014953, "learning_rate": 3.531148055738e-05, "loss": 0.1678, "step": 12944 }, { "epoch": 1.535040910707933, "grad_norm": 0.9358222672327046, "learning_rate": 3.530929383831374e-05, "loss": 0.2226, "step": 12945 }, { "epoch": 1.5351594924700582, "grad_norm": 0.8553158346798698, "learning_rate": 3.530710702420843e-05, "loss": 0.1415, "step": 12946 }, { "epoch": 1.535278074232183, "grad_norm": 1.7010113243899436, "learning_rate": 3.530492011508424e-05, "loss": 0.4698, "step": 12947 }, { "epoch": 1.5353966559943082, "grad_norm": 1.2452001008046305, "learning_rate": 3.5302733110961316e-05, "loss": 0.2917, "step": 12948 }, { "epoch": 1.535515237756433, "grad_norm": 1.1352727530819346, "learning_rate": 3.530054601185983e-05, "loss": 0.2326, "step": 12949 }, { "epoch": 1.5356338195185582, "grad_norm": 0.9933986406624238, "learning_rate": 3.5298358817799935e-05, "loss": 0.2107, "step": 12950 }, { "epoch": 1.535752401280683, "grad_norm": 0.9395524826304277, "learning_rate": 3.5296171528801805e-05, "loss": 0.2592, "step": 12951 }, { "epoch": 1.5358709830428081, "grad_norm": 0.7801575493655623, "learning_rate": 3.52939841448856e-05, "loss": 0.1663, "step": 12952 }, { "epoch": 1.5359895648049329, "grad_norm": 0.7122273573564518, "learning_rate": 3.529179666607149e-05, "loss": 0.145, "step": 12953 }, { "epoch": 1.536108146567058, "grad_norm": 0.8597657971757691, "learning_rate": 3.5289609092379636e-05, "loss": 0.1882, "step": 12954 }, { "epoch": 1.5362267283291828, "grad_norm": 1.2115345824538846, "learning_rate": 3.5287421423830195e-05, "loss": 0.2268, "step": 12955 }, { "epoch": 1.536345310091308, "grad_norm": 1.306299420165853, "learning_rate": 3.528523366044335e-05, "loss": 0.3155, "step": 12956 }, { "epoch": 1.5364638918534328, "grad_norm": 0.7890994178266642, "learning_rate": 3.528304580223926e-05, "loss": 0.1449, "step": 12957 }, { "epoch": 1.536582473615558, "grad_norm": 1.359055306527004, "learning_rate": 3.52808578492381e-05, "loss": 0.2365, "step": 12958 }, { "epoch": 1.5367010553776828, "grad_norm": 0.7914279994797084, "learning_rate": 3.527866980146003e-05, "loss": 0.1699, "step": 12959 }, { "epoch": 1.536819637139808, "grad_norm": 0.9918689589861933, "learning_rate": 3.5276481658925234e-05, "loss": 0.1938, "step": 12960 }, { "epoch": 1.5369382189019327, "grad_norm": 1.2261810332422252, "learning_rate": 3.5274293421653885e-05, "loss": 0.2212, "step": 12961 }, { "epoch": 1.537056800664058, "grad_norm": 1.2139540664620825, "learning_rate": 3.527210508966614e-05, "loss": 0.2731, "step": 12962 }, { "epoch": 1.537175382426183, "grad_norm": 0.8333907474252661, "learning_rate": 3.526991666298219e-05, "loss": 0.1638, "step": 12963 }, { "epoch": 1.5372939641883079, "grad_norm": 1.1360137792394915, "learning_rate": 3.5267728141622206e-05, "loss": 0.2094, "step": 12964 }, { "epoch": 1.5374125459504329, "grad_norm": 0.6873009977819795, "learning_rate": 3.526553952560635e-05, "loss": 0.1676, "step": 12965 }, { "epoch": 1.5375311277125578, "grad_norm": 1.0429011040197398, "learning_rate": 3.526335081495481e-05, "loss": 0.2472, "step": 12966 }, { "epoch": 1.5376497094746828, "grad_norm": 0.9282481774124294, "learning_rate": 3.526116200968776e-05, "loss": 0.1899, "step": 12967 }, { "epoch": 1.5377682912368078, "grad_norm": 0.780724859456509, "learning_rate": 3.5258973109825386e-05, "loss": 0.2061, "step": 12968 }, { "epoch": 1.5378868729989328, "grad_norm": 0.9612424249527737, "learning_rate": 3.525678411538785e-05, "loss": 0.1522, "step": 12969 }, { "epoch": 1.5380054547610578, "grad_norm": 0.8730359727669729, "learning_rate": 3.5254595026395355e-05, "loss": 0.2064, "step": 12970 }, { "epoch": 1.5381240365231827, "grad_norm": 0.7827427841143191, "learning_rate": 3.525240584286806e-05, "loss": 0.2007, "step": 12971 }, { "epoch": 1.5382426182853077, "grad_norm": 0.7031202121711667, "learning_rate": 3.525021656482616e-05, "loss": 0.1431, "step": 12972 }, { "epoch": 1.5383612000474327, "grad_norm": 1.0316232516244068, "learning_rate": 3.524802719228983e-05, "loss": 0.214, "step": 12973 }, { "epoch": 1.5384797818095577, "grad_norm": 1.1479799640284831, "learning_rate": 3.5245837725279254e-05, "loss": 0.2845, "step": 12974 }, { "epoch": 1.5385983635716827, "grad_norm": 1.3939127294167244, "learning_rate": 3.524364816381463e-05, "loss": 0.2533, "step": 12975 }, { "epoch": 1.5387169453338077, "grad_norm": 1.0577933843877287, "learning_rate": 3.524145850791612e-05, "loss": 0.1721, "step": 12976 }, { "epoch": 1.5388355270959326, "grad_norm": 1.229558536555313, "learning_rate": 3.523926875760392e-05, "loss": 0.2416, "step": 12977 }, { "epoch": 1.5389541088580576, "grad_norm": 0.9664361911332459, "learning_rate": 3.523707891289823e-05, "loss": 0.2321, "step": 12978 }, { "epoch": 1.5390726906201826, "grad_norm": 1.0806548754547245, "learning_rate": 3.5234888973819215e-05, "loss": 0.1982, "step": 12979 }, { "epoch": 1.5391912723823076, "grad_norm": 1.308227331774289, "learning_rate": 3.523269894038708e-05, "loss": 0.2167, "step": 12980 }, { "epoch": 1.5393098541444326, "grad_norm": 1.1974719436027046, "learning_rate": 3.5230508812622e-05, "loss": 0.2795, "step": 12981 }, { "epoch": 1.5394284359065575, "grad_norm": 0.9459011140074187, "learning_rate": 3.522831859054418e-05, "loss": 0.1701, "step": 12982 }, { "epoch": 1.5395470176686825, "grad_norm": 0.9274187603840137, "learning_rate": 3.5226128274173806e-05, "loss": 0.1889, "step": 12983 }, { "epoch": 1.5396655994308075, "grad_norm": 1.2103907304938715, "learning_rate": 3.522393786353108e-05, "loss": 0.2477, "step": 12984 }, { "epoch": 1.5397841811929325, "grad_norm": 1.117379798555609, "learning_rate": 3.522174735863617e-05, "loss": 0.2414, "step": 12985 }, { "epoch": 1.5399027629550575, "grad_norm": 0.8621323733989461, "learning_rate": 3.521955675950929e-05, "loss": 0.1759, "step": 12986 }, { "epoch": 1.5400213447171827, "grad_norm": 1.4750240682544027, "learning_rate": 3.521736606617063e-05, "loss": 0.312, "step": 12987 }, { "epoch": 1.5401399264793074, "grad_norm": 1.0540793117810618, "learning_rate": 3.521517527864038e-05, "loss": 0.2664, "step": 12988 }, { "epoch": 1.5402585082414326, "grad_norm": 0.8989872899700817, "learning_rate": 3.5212984396938734e-05, "loss": 0.1941, "step": 12989 }, { "epoch": 1.5403770900035574, "grad_norm": 0.8114822814932272, "learning_rate": 3.521079342108591e-05, "loss": 0.1636, "step": 12990 }, { "epoch": 1.5404956717656826, "grad_norm": 0.8777280171578562, "learning_rate": 3.520860235110208e-05, "loss": 0.1819, "step": 12991 }, { "epoch": 1.5406142535278073, "grad_norm": 1.3591510201439414, "learning_rate": 3.5206411187007466e-05, "loss": 0.3091, "step": 12992 }, { "epoch": 1.5407328352899325, "grad_norm": 1.0674957016481832, "learning_rate": 3.5204219928822255e-05, "loss": 0.2943, "step": 12993 }, { "epoch": 1.5408514170520573, "grad_norm": 0.9574320138494344, "learning_rate": 3.520202857656664e-05, "loss": 0.2005, "step": 12994 }, { "epoch": 1.5409699988141825, "grad_norm": 0.5656993264249255, "learning_rate": 3.519983713026084e-05, "loss": 0.1503, "step": 12995 }, { "epoch": 1.5410885805763073, "grad_norm": 1.125911556772812, "learning_rate": 3.519764558992505e-05, "loss": 0.2624, "step": 12996 }, { "epoch": 1.5412071623384325, "grad_norm": 1.163397370051345, "learning_rate": 3.5195453955579464e-05, "loss": 0.2074, "step": 12997 }, { "epoch": 1.5413257441005572, "grad_norm": 1.3938242727766779, "learning_rate": 3.5193262227244306e-05, "loss": 0.2804, "step": 12998 }, { "epoch": 1.5414443258626824, "grad_norm": 1.005019918323092, "learning_rate": 3.519107040493976e-05, "loss": 0.2243, "step": 12999 }, { "epoch": 1.5415629076248072, "grad_norm": 0.8967494602600352, "learning_rate": 3.518887848868605e-05, "loss": 0.1659, "step": 13000 }, { "epoch": 1.5416814893869324, "grad_norm": 1.0798536271305383, "learning_rate": 3.5186686478503374e-05, "loss": 0.2908, "step": 13001 }, { "epoch": 1.5418000711490571, "grad_norm": 0.7699423639711502, "learning_rate": 3.518449437441194e-05, "loss": 0.1861, "step": 13002 }, { "epoch": 1.5419186529111824, "grad_norm": 1.16641854329243, "learning_rate": 3.518230217643195e-05, "loss": 0.2536, "step": 13003 }, { "epoch": 1.5420372346733071, "grad_norm": 0.8133774793515741, "learning_rate": 3.5180109884583636e-05, "loss": 0.19, "step": 13004 }, { "epoch": 1.5421558164354323, "grad_norm": 0.9697880257622497, "learning_rate": 3.517791749888718e-05, "loss": 0.1939, "step": 13005 }, { "epoch": 1.542274398197557, "grad_norm": 1.1560843793957765, "learning_rate": 3.517572501936281e-05, "loss": 0.2178, "step": 13006 }, { "epoch": 1.5423929799596823, "grad_norm": 0.8554450456361307, "learning_rate": 3.517353244603073e-05, "loss": 0.1691, "step": 13007 }, { "epoch": 1.542511561721807, "grad_norm": 0.6257118671982421, "learning_rate": 3.517133977891116e-05, "loss": 0.1558, "step": 13008 }, { "epoch": 1.5426301434839322, "grad_norm": 1.1079535477444116, "learning_rate": 3.516914701802431e-05, "loss": 0.2849, "step": 13009 }, { "epoch": 1.542748725246057, "grad_norm": 0.8969484811979774, "learning_rate": 3.5166954163390384e-05, "loss": 0.1859, "step": 13010 }, { "epoch": 1.5428673070081822, "grad_norm": 0.7351545488196336, "learning_rate": 3.516476121502962e-05, "loss": 0.1363, "step": 13011 }, { "epoch": 1.5429858887703072, "grad_norm": 1.2289123927171712, "learning_rate": 3.516256817296222e-05, "loss": 0.283, "step": 13012 }, { "epoch": 1.5431044705324322, "grad_norm": 0.9515289848502695, "learning_rate": 3.5160375037208404e-05, "loss": 0.1787, "step": 13013 }, { "epoch": 1.5432230522945571, "grad_norm": 0.9608412896001378, "learning_rate": 3.5158181807788386e-05, "loss": 0.2518, "step": 13014 }, { "epoch": 1.5433416340566821, "grad_norm": 1.1206935710543817, "learning_rate": 3.51559884847224e-05, "loss": 0.2641, "step": 13015 }, { "epoch": 1.543460215818807, "grad_norm": 0.9436479665184189, "learning_rate": 3.515379506803064e-05, "loss": 0.1884, "step": 13016 }, { "epoch": 1.543578797580932, "grad_norm": 1.210087028758465, "learning_rate": 3.515160155773335e-05, "loss": 0.2635, "step": 13017 }, { "epoch": 1.543697379343057, "grad_norm": 1.1035978405324391, "learning_rate": 3.514940795385074e-05, "loss": 0.1807, "step": 13018 }, { "epoch": 1.543815961105182, "grad_norm": 0.9566180849829984, "learning_rate": 3.5147214256403025e-05, "loss": 0.2049, "step": 13019 }, { "epoch": 1.543934542867307, "grad_norm": 0.7715351679186057, "learning_rate": 3.514502046541045e-05, "loss": 0.1719, "step": 13020 }, { "epoch": 1.544053124629432, "grad_norm": 1.3308531470103715, "learning_rate": 3.5142826580893226e-05, "loss": 0.287, "step": 13021 }, { "epoch": 1.544171706391557, "grad_norm": 0.8204744601623554, "learning_rate": 3.514063260287157e-05, "loss": 0.1989, "step": 13022 }, { "epoch": 1.544290288153682, "grad_norm": 0.9854019867619556, "learning_rate": 3.513843853136573e-05, "loss": 0.1821, "step": 13023 }, { "epoch": 1.544408869915807, "grad_norm": 0.7957680539764332, "learning_rate": 3.513624436639591e-05, "loss": 0.1751, "step": 13024 }, { "epoch": 1.544527451677932, "grad_norm": 0.9311732494522149, "learning_rate": 3.5134050107982345e-05, "loss": 0.1906, "step": 13025 }, { "epoch": 1.544646033440057, "grad_norm": 1.039127942363783, "learning_rate": 3.5131855756145274e-05, "loss": 0.1978, "step": 13026 }, { "epoch": 1.544764615202182, "grad_norm": 0.9410772594522689, "learning_rate": 3.512966131090492e-05, "loss": 0.2041, "step": 13027 }, { "epoch": 1.5448831969643069, "grad_norm": 0.9802741164736304, "learning_rate": 3.5127466772281496e-05, "loss": 0.1742, "step": 13028 }, { "epoch": 1.5450017787264319, "grad_norm": 1.3425025108458843, "learning_rate": 3.512527214029526e-05, "loss": 0.2392, "step": 13029 }, { "epoch": 1.5451203604885568, "grad_norm": 0.8435718835050805, "learning_rate": 3.512307741496642e-05, "loss": 0.1448, "step": 13030 }, { "epoch": 1.5452389422506818, "grad_norm": 1.0110960655111323, "learning_rate": 3.512088259631522e-05, "loss": 0.1433, "step": 13031 }, { "epoch": 1.5453575240128068, "grad_norm": 1.1656222313852176, "learning_rate": 3.511868768436191e-05, "loss": 0.2208, "step": 13032 }, { "epoch": 1.5454761057749318, "grad_norm": 0.8146590854901843, "learning_rate": 3.51164926791267e-05, "loss": 0.1678, "step": 13033 }, { "epoch": 1.5455946875370568, "grad_norm": 0.8677196349298805, "learning_rate": 3.511429758062983e-05, "loss": 0.1798, "step": 13034 }, { "epoch": 1.5457132692991817, "grad_norm": 1.2680578365054633, "learning_rate": 3.511210238889153e-05, "loss": 0.2402, "step": 13035 }, { "epoch": 1.5458318510613067, "grad_norm": 1.0679785096738463, "learning_rate": 3.510990710393207e-05, "loss": 0.2132, "step": 13036 }, { "epoch": 1.5459504328234317, "grad_norm": 1.21574477350966, "learning_rate": 3.510771172577164e-05, "loss": 0.2434, "step": 13037 }, { "epoch": 1.546069014585557, "grad_norm": 1.050381199497278, "learning_rate": 3.510551625443051e-05, "loss": 0.2208, "step": 13038 }, { "epoch": 1.5461875963476817, "grad_norm": 1.0548831510153518, "learning_rate": 3.510332068992892e-05, "loss": 0.1855, "step": 13039 }, { "epoch": 1.5463061781098069, "grad_norm": 0.9952235029001298, "learning_rate": 3.51011250322871e-05, "loss": 0.1878, "step": 13040 }, { "epoch": 1.5464247598719316, "grad_norm": 1.7066225297407105, "learning_rate": 3.509892928152529e-05, "loss": 0.282, "step": 13041 }, { "epoch": 1.5465433416340568, "grad_norm": 0.916588070782745, "learning_rate": 3.5096733437663736e-05, "loss": 0.1931, "step": 13042 }, { "epoch": 1.5466619233961816, "grad_norm": 1.0023453979422223, "learning_rate": 3.509453750072268e-05, "loss": 0.2472, "step": 13043 }, { "epoch": 1.5467805051583068, "grad_norm": 0.9595273326570879, "learning_rate": 3.509234147072238e-05, "loss": 0.1869, "step": 13044 }, { "epoch": 1.5468990869204315, "grad_norm": 0.672928113799073, "learning_rate": 3.509014534768306e-05, "loss": 0.1716, "step": 13045 }, { "epoch": 1.5470176686825567, "grad_norm": 1.2484328322170228, "learning_rate": 3.508794913162496e-05, "loss": 0.2761, "step": 13046 }, { "epoch": 1.5471362504446815, "grad_norm": 0.9033157703449846, "learning_rate": 3.508575282256836e-05, "loss": 0.1915, "step": 13047 }, { "epoch": 1.5472548322068067, "grad_norm": 1.1480952438110354, "learning_rate": 3.508355642053348e-05, "loss": 0.234, "step": 13048 }, { "epoch": 1.5473734139689315, "grad_norm": 1.0453702842544539, "learning_rate": 3.508135992554057e-05, "loss": 0.2071, "step": 13049 }, { "epoch": 1.5474919957310567, "grad_norm": 0.9988824550678258, "learning_rate": 3.5079163337609884e-05, "loss": 0.1829, "step": 13050 }, { "epoch": 1.5476105774931814, "grad_norm": 0.9240896555106584, "learning_rate": 3.5076966656761677e-05, "loss": 0.2031, "step": 13051 }, { "epoch": 1.5477291592553066, "grad_norm": 1.0949409812057098, "learning_rate": 3.507476988301619e-05, "loss": 0.2568, "step": 13052 }, { "epoch": 1.5478477410174314, "grad_norm": 0.9198506836159268, "learning_rate": 3.5072573016393685e-05, "loss": 0.2146, "step": 13053 }, { "epoch": 1.5479663227795566, "grad_norm": 1.0266702088188966, "learning_rate": 3.5070376056914403e-05, "loss": 0.237, "step": 13054 }, { "epoch": 1.5480849045416814, "grad_norm": 0.7788213817412164, "learning_rate": 3.50681790045986e-05, "loss": 0.157, "step": 13055 }, { "epoch": 1.5482034863038066, "grad_norm": 1.0017466110114257, "learning_rate": 3.5065981859466545e-05, "loss": 0.2031, "step": 13056 }, { "epoch": 1.5483220680659313, "grad_norm": 1.4467566178648157, "learning_rate": 3.5063784621538476e-05, "loss": 0.3024, "step": 13057 }, { "epoch": 1.5484406498280565, "grad_norm": 0.7696994498241706, "learning_rate": 3.506158729083465e-05, "loss": 0.1403, "step": 13058 }, { "epoch": 1.5485592315901813, "grad_norm": 0.8286890474267681, "learning_rate": 3.505938986737533e-05, "loss": 0.1728, "step": 13059 }, { "epoch": 1.5486778133523065, "grad_norm": 1.0212572920373388, "learning_rate": 3.5057192351180766e-05, "loss": 0.2196, "step": 13060 }, { "epoch": 1.5487963951144312, "grad_norm": 0.7996385785179889, "learning_rate": 3.505499474227123e-05, "loss": 0.1524, "step": 13061 }, { "epoch": 1.5489149768765564, "grad_norm": 0.9279176677819817, "learning_rate": 3.5052797040666976e-05, "loss": 0.1993, "step": 13062 }, { "epoch": 1.5490335586386814, "grad_norm": 0.917622027986063, "learning_rate": 3.505059924638825e-05, "loss": 0.1662, "step": 13063 }, { "epoch": 1.5491521404008064, "grad_norm": 1.0395938932733506, "learning_rate": 3.504840135945533e-05, "loss": 0.196, "step": 13064 }, { "epoch": 1.5492707221629314, "grad_norm": 1.0471978220964535, "learning_rate": 3.504620337988847e-05, "loss": 0.2154, "step": 13065 }, { "epoch": 1.5493893039250564, "grad_norm": 1.1668586063290236, "learning_rate": 3.5044005307707926e-05, "loss": 0.1981, "step": 13066 }, { "epoch": 1.5495078856871813, "grad_norm": 1.6821725162158043, "learning_rate": 3.504180714293398e-05, "loss": 0.3727, "step": 13067 }, { "epoch": 1.5496264674493063, "grad_norm": 1.0776016600845943, "learning_rate": 3.503960888558688e-05, "loss": 0.2294, "step": 13068 }, { "epoch": 1.5497450492114313, "grad_norm": 1.2868963298416136, "learning_rate": 3.50374105356869e-05, "loss": 0.2545, "step": 13069 }, { "epoch": 1.5498636309735563, "grad_norm": 1.0441579310427482, "learning_rate": 3.503521209325431e-05, "loss": 0.2129, "step": 13070 }, { "epoch": 1.5499822127356813, "grad_norm": 0.8406246000742389, "learning_rate": 3.5033013558309364e-05, "loss": 0.1921, "step": 13071 }, { "epoch": 1.5501007944978062, "grad_norm": 1.4279590781610474, "learning_rate": 3.503081493087234e-05, "loss": 0.229, "step": 13072 }, { "epoch": 1.5502193762599312, "grad_norm": 0.9657246571399705, "learning_rate": 3.502861621096349e-05, "loss": 0.2107, "step": 13073 }, { "epoch": 1.5503379580220562, "grad_norm": 0.872634330352901, "learning_rate": 3.5026417398603114e-05, "loss": 0.1513, "step": 13074 }, { "epoch": 1.5504565397841812, "grad_norm": 0.9887145828471897, "learning_rate": 3.502421849381146e-05, "loss": 0.1516, "step": 13075 }, { "epoch": 1.5505751215463062, "grad_norm": 0.7951344078562673, "learning_rate": 3.50220194966088e-05, "loss": 0.1662, "step": 13076 }, { "epoch": 1.5506937033084311, "grad_norm": 0.9924477354321085, "learning_rate": 3.501982040701541e-05, "loss": 0.2227, "step": 13077 }, { "epoch": 1.5508122850705561, "grad_norm": 1.0684090699813609, "learning_rate": 3.5017621225051564e-05, "loss": 0.2752, "step": 13078 }, { "epoch": 1.550930866832681, "grad_norm": 0.7275373549364704, "learning_rate": 3.5015421950737537e-05, "loss": 0.1963, "step": 13079 }, { "epoch": 1.551049448594806, "grad_norm": 1.3470975164693046, "learning_rate": 3.50132225840936e-05, "loss": 0.271, "step": 13080 }, { "epoch": 1.551168030356931, "grad_norm": 0.9380366284748598, "learning_rate": 3.501102312514003e-05, "loss": 0.1907, "step": 13081 }, { "epoch": 1.551286612119056, "grad_norm": 0.8075865378197027, "learning_rate": 3.5008823573897096e-05, "loss": 0.1921, "step": 13082 }, { "epoch": 1.551405193881181, "grad_norm": 1.0404317924871154, "learning_rate": 3.500662393038509e-05, "loss": 0.2751, "step": 13083 }, { "epoch": 1.551523775643306, "grad_norm": 0.9659705538542429, "learning_rate": 3.5004424194624284e-05, "loss": 0.2031, "step": 13084 }, { "epoch": 1.551642357405431, "grad_norm": 1.3460301913964523, "learning_rate": 3.500222436663495e-05, "loss": 0.1922, "step": 13085 }, { "epoch": 1.551760939167556, "grad_norm": 0.929401080154763, "learning_rate": 3.500002444643738e-05, "loss": 0.1899, "step": 13086 }, { "epoch": 1.5518795209296812, "grad_norm": 0.6572845765107708, "learning_rate": 3.499782443405184e-05, "loss": 0.1476, "step": 13087 }, { "epoch": 1.551998102691806, "grad_norm": 1.0528895662198148, "learning_rate": 3.499562432949862e-05, "loss": 0.2394, "step": 13088 }, { "epoch": 1.5521166844539311, "grad_norm": 0.9642450757858113, "learning_rate": 3.4993424132798e-05, "loss": 0.2432, "step": 13089 }, { "epoch": 1.552235266216056, "grad_norm": 0.9577589298138799, "learning_rate": 3.499122384397027e-05, "loss": 0.2211, "step": 13090 }, { "epoch": 1.552353847978181, "grad_norm": 0.8359548967670991, "learning_rate": 3.49890234630357e-05, "loss": 0.1692, "step": 13091 }, { "epoch": 1.5524724297403059, "grad_norm": 1.0742670570751842, "learning_rate": 3.49868229900146e-05, "loss": 0.1966, "step": 13092 }, { "epoch": 1.552591011502431, "grad_norm": 1.098460007765056, "learning_rate": 3.4984622424927214e-05, "loss": 0.266, "step": 13093 }, { "epoch": 1.5527095932645558, "grad_norm": 1.001684991231171, "learning_rate": 3.498242176779387e-05, "loss": 0.1978, "step": 13094 }, { "epoch": 1.552828175026681, "grad_norm": 1.020942017416016, "learning_rate": 3.498022101863483e-05, "loss": 0.1689, "step": 13095 }, { "epoch": 1.5529467567888058, "grad_norm": 1.0037811351896286, "learning_rate": 3.497802017747041e-05, "loss": 0.2373, "step": 13096 }, { "epoch": 1.553065338550931, "grad_norm": 1.0030523640356488, "learning_rate": 3.497581924432086e-05, "loss": 0.2528, "step": 13097 }, { "epoch": 1.5531839203130557, "grad_norm": 0.8545710128825859, "learning_rate": 3.497361821920649e-05, "loss": 0.2045, "step": 13098 }, { "epoch": 1.553302502075181, "grad_norm": 1.173053381474065, "learning_rate": 3.4971417102147595e-05, "loss": 0.2468, "step": 13099 }, { "epoch": 1.5534210838373057, "grad_norm": 1.016236958324545, "learning_rate": 3.496921589316446e-05, "loss": 0.2783, "step": 13100 }, { "epoch": 1.553539665599431, "grad_norm": 0.868498869740926, "learning_rate": 3.496701459227738e-05, "loss": 0.1769, "step": 13101 }, { "epoch": 1.5536582473615557, "grad_norm": 0.9722466723626487, "learning_rate": 3.4964813199506654e-05, "loss": 0.2514, "step": 13102 }, { "epoch": 1.5537768291236809, "grad_norm": 0.8977767740813032, "learning_rate": 3.4962611714872565e-05, "loss": 0.1713, "step": 13103 }, { "epoch": 1.5538954108858056, "grad_norm": 0.6958854050095565, "learning_rate": 3.496041013839542e-05, "loss": 0.1233, "step": 13104 }, { "epoch": 1.5540139926479308, "grad_norm": 1.0593222338801247, "learning_rate": 3.49582084700955e-05, "loss": 0.2065, "step": 13105 }, { "epoch": 1.5541325744100556, "grad_norm": 0.9595613580882345, "learning_rate": 3.4956006709993105e-05, "loss": 0.198, "step": 13106 }, { "epoch": 1.5542511561721808, "grad_norm": 1.017860415584178, "learning_rate": 3.495380485810855e-05, "loss": 0.193, "step": 13107 }, { "epoch": 1.5543697379343056, "grad_norm": 1.3310186285271595, "learning_rate": 3.4951602914462114e-05, "loss": 0.288, "step": 13108 }, { "epoch": 1.5544883196964308, "grad_norm": 0.8549752818571824, "learning_rate": 3.49494008790741e-05, "loss": 0.1809, "step": 13109 }, { "epoch": 1.5546069014585555, "grad_norm": 1.514987428659583, "learning_rate": 3.4947198751964806e-05, "loss": 0.3449, "step": 13110 }, { "epoch": 1.5547254832206807, "grad_norm": 1.4370681489152513, "learning_rate": 3.494499653315454e-05, "loss": 0.231, "step": 13111 }, { "epoch": 1.5548440649828057, "grad_norm": 0.8798598073730708, "learning_rate": 3.4942794222663614e-05, "loss": 0.1524, "step": 13112 }, { "epoch": 1.5549626467449307, "grad_norm": 0.8643654177395045, "learning_rate": 3.49405918205123e-05, "loss": 0.1823, "step": 13113 }, { "epoch": 1.5550812285070557, "grad_norm": 1.169254076067171, "learning_rate": 3.493838932672093e-05, "loss": 0.2577, "step": 13114 }, { "epoch": 1.5551998102691806, "grad_norm": 0.9466897195807291, "learning_rate": 3.49361867413098e-05, "loss": 0.2144, "step": 13115 }, { "epoch": 1.5553183920313056, "grad_norm": 1.1512895021469502, "learning_rate": 3.493398406429921e-05, "loss": 0.2144, "step": 13116 }, { "epoch": 1.5554369737934306, "grad_norm": 0.870182091702603, "learning_rate": 3.4931781295709466e-05, "loss": 0.1885, "step": 13117 }, { "epoch": 1.5555555555555556, "grad_norm": 0.9162967941398, "learning_rate": 3.492957843556088e-05, "loss": 0.1808, "step": 13118 }, { "epoch": 1.5556741373176806, "grad_norm": 0.9545320451698327, "learning_rate": 3.4927375483873756e-05, "loss": 0.2328, "step": 13119 }, { "epoch": 1.5557927190798055, "grad_norm": 0.9387178101470325, "learning_rate": 3.492517244066841e-05, "loss": 0.2126, "step": 13120 }, { "epoch": 1.5559113008419305, "grad_norm": 0.8285605348662975, "learning_rate": 3.492296930596514e-05, "loss": 0.1727, "step": 13121 }, { "epoch": 1.5560298826040555, "grad_norm": 1.1411185207194117, "learning_rate": 3.4920766079784256e-05, "loss": 0.2736, "step": 13122 }, { "epoch": 1.5561484643661805, "grad_norm": 1.4504907902272868, "learning_rate": 3.491856276214608e-05, "loss": 0.3513, "step": 13123 }, { "epoch": 1.5562670461283055, "grad_norm": 0.9169851624813995, "learning_rate": 3.491635935307092e-05, "loss": 0.1745, "step": 13124 }, { "epoch": 1.5563856278904304, "grad_norm": 0.7225685037801819, "learning_rate": 3.4914155852579084e-05, "loss": 0.1477, "step": 13125 }, { "epoch": 1.5565042096525554, "grad_norm": 0.8401533594041928, "learning_rate": 3.4911952260690885e-05, "loss": 0.2204, "step": 13126 }, { "epoch": 1.5566227914146804, "grad_norm": 0.7677923788791334, "learning_rate": 3.490974857742665e-05, "loss": 0.1568, "step": 13127 }, { "epoch": 1.5567413731768054, "grad_norm": 0.9133681432459265, "learning_rate": 3.490754480280668e-05, "loss": 0.2203, "step": 13128 }, { "epoch": 1.5568599549389304, "grad_norm": 0.9382883976517621, "learning_rate": 3.490534093685129e-05, "loss": 0.2293, "step": 13129 }, { "epoch": 1.5569785367010553, "grad_norm": 0.9171212760494835, "learning_rate": 3.490313697958081e-05, "loss": 0.1858, "step": 13130 }, { "epoch": 1.5570971184631803, "grad_norm": 0.9558803791481454, "learning_rate": 3.4900932931015557e-05, "loss": 0.195, "step": 13131 }, { "epoch": 1.5572157002253053, "grad_norm": 0.8143420738406641, "learning_rate": 3.4898728791175836e-05, "loss": 0.1277, "step": 13132 }, { "epoch": 1.5573342819874303, "grad_norm": 1.0184367700783872, "learning_rate": 3.4896524560081976e-05, "loss": 0.2343, "step": 13133 }, { "epoch": 1.5574528637495553, "grad_norm": 0.743029945149777, "learning_rate": 3.48943202377543e-05, "loss": 0.1549, "step": 13134 }, { "epoch": 1.5575714455116803, "grad_norm": 1.0759046626010405, "learning_rate": 3.4892115824213126e-05, "loss": 0.2489, "step": 13135 }, { "epoch": 1.5576900272738052, "grad_norm": 0.7685458075041345, "learning_rate": 3.488991131947876e-05, "loss": 0.1561, "step": 13136 }, { "epoch": 1.5578086090359302, "grad_norm": 0.7843781122116337, "learning_rate": 3.488770672357155e-05, "loss": 0.1909, "step": 13137 }, { "epoch": 1.5579271907980554, "grad_norm": 0.9633345607065357, "learning_rate": 3.4885502036511805e-05, "loss": 0.246, "step": 13138 }, { "epoch": 1.5580457725601802, "grad_norm": 0.9983931072730781, "learning_rate": 3.488329725831986e-05, "loss": 0.2472, "step": 13139 }, { "epoch": 1.5581643543223054, "grad_norm": 0.9764359336522147, "learning_rate": 3.488109238901602e-05, "loss": 0.202, "step": 13140 }, { "epoch": 1.5582829360844301, "grad_norm": 1.365040164823216, "learning_rate": 3.487888742862064e-05, "loss": 0.2928, "step": 13141 }, { "epoch": 1.5584015178465553, "grad_norm": 0.9783119528862977, "learning_rate": 3.487668237715403e-05, "loss": 0.2038, "step": 13142 }, { "epoch": 1.55852009960868, "grad_norm": 0.7434150304840806, "learning_rate": 3.487447723463652e-05, "loss": 0.1703, "step": 13143 }, { "epoch": 1.5586386813708053, "grad_norm": 1.136884150012758, "learning_rate": 3.4872272001088436e-05, "loss": 0.2454, "step": 13144 }, { "epoch": 1.55875726313293, "grad_norm": 1.030295439461227, "learning_rate": 3.4870066676530104e-05, "loss": 0.2101, "step": 13145 }, { "epoch": 1.5588758448950553, "grad_norm": 0.9885845766563467, "learning_rate": 3.486786126098187e-05, "loss": 0.1579, "step": 13146 }, { "epoch": 1.55899442665718, "grad_norm": 0.8308606307170483, "learning_rate": 3.4865655754464055e-05, "loss": 0.1489, "step": 13147 }, { "epoch": 1.5591130084193052, "grad_norm": 0.9507319579651601, "learning_rate": 3.4863450156996993e-05, "loss": 0.1766, "step": 13148 }, { "epoch": 1.55923159018143, "grad_norm": 1.4461470309998787, "learning_rate": 3.4861244468601006e-05, "loss": 0.2676, "step": 13149 }, { "epoch": 1.5593501719435552, "grad_norm": 0.937917364969022, "learning_rate": 3.485903868929645e-05, "loss": 0.1783, "step": 13150 }, { "epoch": 1.55946875370568, "grad_norm": 0.9345252435868415, "learning_rate": 3.485683281910364e-05, "loss": 0.1293, "step": 13151 }, { "epoch": 1.5595873354678051, "grad_norm": 0.7953220958005324, "learning_rate": 3.485462685804292e-05, "loss": 0.1725, "step": 13152 }, { "epoch": 1.55970591722993, "grad_norm": 1.2236064144136873, "learning_rate": 3.4852420806134625e-05, "loss": 0.2779, "step": 13153 }, { "epoch": 1.559824498992055, "grad_norm": 0.716966711499907, "learning_rate": 3.4850214663399097e-05, "loss": 0.1667, "step": 13154 }, { "epoch": 1.5599430807541799, "grad_norm": 1.2624678998033978, "learning_rate": 3.484800842985666e-05, "loss": 0.2547, "step": 13155 }, { "epoch": 1.560061662516305, "grad_norm": 1.4976435318468857, "learning_rate": 3.484580210552767e-05, "loss": 0.2893, "step": 13156 }, { "epoch": 1.5601802442784298, "grad_norm": 1.3434666687946861, "learning_rate": 3.484359569043245e-05, "loss": 0.313, "step": 13157 }, { "epoch": 1.560298826040555, "grad_norm": 0.75282790743166, "learning_rate": 3.4841389184591354e-05, "loss": 0.1262, "step": 13158 }, { "epoch": 1.5604174078026798, "grad_norm": 1.0326077936016833, "learning_rate": 3.483918258802471e-05, "loss": 0.2633, "step": 13159 }, { "epoch": 1.560535989564805, "grad_norm": 0.8636408224343743, "learning_rate": 3.483697590075288e-05, "loss": 0.1766, "step": 13160 }, { "epoch": 1.56065457132693, "grad_norm": 0.8689257925996184, "learning_rate": 3.48347691227962e-05, "loss": 0.1448, "step": 13161 }, { "epoch": 1.560773153089055, "grad_norm": 0.9796690272594073, "learning_rate": 3.4832562254174996e-05, "loss": 0.163, "step": 13162 }, { "epoch": 1.56089173485118, "grad_norm": 1.2703920853303352, "learning_rate": 3.4830355294909626e-05, "loss": 0.2188, "step": 13163 }, { "epoch": 1.561010316613305, "grad_norm": 1.019503899981079, "learning_rate": 3.482814824502044e-05, "loss": 0.2171, "step": 13164 }, { "epoch": 1.56112889837543, "grad_norm": 1.0326705985466231, "learning_rate": 3.4825941104527776e-05, "loss": 0.1822, "step": 13165 }, { "epoch": 1.5612474801375549, "grad_norm": 0.7451861516693179, "learning_rate": 3.4823733873451986e-05, "loss": 0.1498, "step": 13166 }, { "epoch": 1.5613660618996799, "grad_norm": 1.029665812120613, "learning_rate": 3.482152655181341e-05, "loss": 0.1927, "step": 13167 }, { "epoch": 1.5614846436618048, "grad_norm": 0.638219144555679, "learning_rate": 3.481931913963241e-05, "loss": 0.1288, "step": 13168 }, { "epoch": 1.5616032254239298, "grad_norm": 0.8939337582924006, "learning_rate": 3.481711163692933e-05, "loss": 0.1928, "step": 13169 }, { "epoch": 1.5617218071860548, "grad_norm": 1.2054224263876296, "learning_rate": 3.4814904043724514e-05, "loss": 0.2526, "step": 13170 }, { "epoch": 1.5618403889481798, "grad_norm": 0.7357278429336755, "learning_rate": 3.481269636003832e-05, "loss": 0.1353, "step": 13171 }, { "epoch": 1.5619589707103048, "grad_norm": 1.238856399947145, "learning_rate": 3.4810488585891106e-05, "loss": 0.2339, "step": 13172 }, { "epoch": 1.5620775524724297, "grad_norm": 1.1983088503602157, "learning_rate": 3.480828072130321e-05, "loss": 0.2503, "step": 13173 }, { "epoch": 1.5621961342345547, "grad_norm": 1.204405699804078, "learning_rate": 3.4806072766295e-05, "loss": 0.2597, "step": 13174 }, { "epoch": 1.5623147159966797, "grad_norm": 1.2380819521129187, "learning_rate": 3.480386472088681e-05, "loss": 0.3873, "step": 13175 }, { "epoch": 1.5624332977588047, "grad_norm": 1.0893692460197961, "learning_rate": 3.480165658509903e-05, "loss": 0.2768, "step": 13176 }, { "epoch": 1.5625518795209297, "grad_norm": 0.8569574255706042, "learning_rate": 3.479944835895198e-05, "loss": 0.1614, "step": 13177 }, { "epoch": 1.5626704612830546, "grad_norm": 0.7668269586692791, "learning_rate": 3.479724004246604e-05, "loss": 0.169, "step": 13178 }, { "epoch": 1.5627890430451796, "grad_norm": 0.6682922608541269, "learning_rate": 3.479503163566156e-05, "loss": 0.1472, "step": 13179 }, { "epoch": 1.5629076248073046, "grad_norm": 0.9389542021314308, "learning_rate": 3.4792823138558896e-05, "loss": 0.1868, "step": 13180 }, { "epoch": 1.5630262065694296, "grad_norm": 0.8916742226957423, "learning_rate": 3.479061455117841e-05, "loss": 0.2176, "step": 13181 }, { "epoch": 1.5631447883315546, "grad_norm": 1.0662468727269692, "learning_rate": 3.478840587354047e-05, "loss": 0.188, "step": 13182 }, { "epoch": 1.5632633700936795, "grad_norm": 0.8892597472391855, "learning_rate": 3.478619710566543e-05, "loss": 0.2375, "step": 13183 }, { "epoch": 1.5633819518558045, "grad_norm": 0.7259959465142727, "learning_rate": 3.4783988247573654e-05, "loss": 0.1825, "step": 13184 }, { "epoch": 1.5635005336179295, "grad_norm": 1.3366955204197444, "learning_rate": 3.478177929928551e-05, "loss": 0.3019, "step": 13185 }, { "epoch": 1.5636191153800545, "grad_norm": 1.0812665846776774, "learning_rate": 3.4779570260821346e-05, "loss": 0.2609, "step": 13186 }, { "epoch": 1.5637376971421797, "grad_norm": 1.0869951484186313, "learning_rate": 3.477736113220154e-05, "loss": 0.1765, "step": 13187 }, { "epoch": 1.5638562789043045, "grad_norm": 1.718947982543437, "learning_rate": 3.477515191344645e-05, "loss": 0.4162, "step": 13188 }, { "epoch": 1.5639748606664297, "grad_norm": 0.8352294110688225, "learning_rate": 3.477294260457645e-05, "loss": 0.2067, "step": 13189 }, { "epoch": 1.5640934424285544, "grad_norm": 0.8328858629247802, "learning_rate": 3.47707332056119e-05, "loss": 0.17, "step": 13190 }, { "epoch": 1.5642120241906796, "grad_norm": 0.920419847545811, "learning_rate": 3.476852371657318e-05, "loss": 0.2326, "step": 13191 }, { "epoch": 1.5643306059528044, "grad_norm": 0.9075128141393741, "learning_rate": 3.4766314137480635e-05, "loss": 0.1937, "step": 13192 }, { "epoch": 1.5644491877149296, "grad_norm": 0.8884059638869388, "learning_rate": 3.476410446835467e-05, "loss": 0.2124, "step": 13193 }, { "epoch": 1.5645677694770543, "grad_norm": 0.7438938334856057, "learning_rate": 3.4761894709215625e-05, "loss": 0.1661, "step": 13194 }, { "epoch": 1.5646863512391795, "grad_norm": 0.6207332193627888, "learning_rate": 3.475968486008388e-05, "loss": 0.1408, "step": 13195 }, { "epoch": 1.5648049330013043, "grad_norm": 0.7970662726306347, "learning_rate": 3.4757474920979814e-05, "loss": 0.1831, "step": 13196 }, { "epoch": 1.5649235147634295, "grad_norm": 1.0948669748978865, "learning_rate": 3.4755264891923787e-05, "loss": 0.1935, "step": 13197 }, { "epoch": 1.5650420965255543, "grad_norm": 0.9483677777044881, "learning_rate": 3.4753054772936184e-05, "loss": 0.1652, "step": 13198 }, { "epoch": 1.5651606782876795, "grad_norm": 0.9375258536586146, "learning_rate": 3.4750844564037376e-05, "loss": 0.2012, "step": 13199 }, { "epoch": 1.5652792600498042, "grad_norm": 1.0808514467618617, "learning_rate": 3.4748634265247745e-05, "loss": 0.2269, "step": 13200 }, { "epoch": 1.5653978418119294, "grad_norm": 1.5372384586298424, "learning_rate": 3.4746423876587654e-05, "loss": 0.3357, "step": 13201 }, { "epoch": 1.5655164235740542, "grad_norm": 0.9955527433387563, "learning_rate": 3.4744213398077486e-05, "loss": 0.1565, "step": 13202 }, { "epoch": 1.5656350053361794, "grad_norm": 1.2086227462878647, "learning_rate": 3.474200282973762e-05, "loss": 0.3203, "step": 13203 }, { "epoch": 1.5657535870983041, "grad_norm": 1.0048366896537224, "learning_rate": 3.4739792171588445e-05, "loss": 0.2073, "step": 13204 }, { "epoch": 1.5658721688604293, "grad_norm": 0.9223992955295633, "learning_rate": 3.473758142365031e-05, "loss": 0.1863, "step": 13205 }, { "epoch": 1.565990750622554, "grad_norm": 1.2426688099356693, "learning_rate": 3.473537058594363e-05, "loss": 0.2627, "step": 13206 }, { "epoch": 1.5661093323846793, "grad_norm": 1.382349015499398, "learning_rate": 3.473315965848877e-05, "loss": 0.2615, "step": 13207 }, { "epoch": 1.566227914146804, "grad_norm": 1.0100553360160247, "learning_rate": 3.473094864130611e-05, "loss": 0.2707, "step": 13208 }, { "epoch": 1.5663464959089293, "grad_norm": 0.9363426345892094, "learning_rate": 3.472873753441603e-05, "loss": 0.2228, "step": 13209 }, { "epoch": 1.566465077671054, "grad_norm": 0.8672278435320432, "learning_rate": 3.472652633783893e-05, "loss": 0.1778, "step": 13210 }, { "epoch": 1.5665836594331792, "grad_norm": 1.0135695399107922, "learning_rate": 3.4724315051595186e-05, "loss": 0.2467, "step": 13211 }, { "epoch": 1.5667022411953042, "grad_norm": 0.8248195741559083, "learning_rate": 3.472210367570518e-05, "loss": 0.1452, "step": 13212 }, { "epoch": 1.5668208229574292, "grad_norm": 1.0866939203848895, "learning_rate": 3.47198922101893e-05, "loss": 0.2249, "step": 13213 }, { "epoch": 1.5669394047195542, "grad_norm": 0.7831122653160899, "learning_rate": 3.4717680655067926e-05, "loss": 0.1746, "step": 13214 }, { "epoch": 1.5670579864816792, "grad_norm": 1.0833003639953835, "learning_rate": 3.471546901036146e-05, "loss": 0.2007, "step": 13215 }, { "epoch": 1.5671765682438041, "grad_norm": 1.4005742853964025, "learning_rate": 3.471325727609028e-05, "loss": 0.2518, "step": 13216 }, { "epoch": 1.5672951500059291, "grad_norm": 1.2181356264479462, "learning_rate": 3.4711045452274784e-05, "loss": 0.2428, "step": 13217 }, { "epoch": 1.567413731768054, "grad_norm": 0.6814681584806378, "learning_rate": 3.470883353893535e-05, "loss": 0.149, "step": 13218 }, { "epoch": 1.567532313530179, "grad_norm": 0.9608356106472743, "learning_rate": 3.4706621536092385e-05, "loss": 0.2174, "step": 13219 }, { "epoch": 1.567650895292304, "grad_norm": 0.9581203430609055, "learning_rate": 3.4704409443766264e-05, "loss": 0.2342, "step": 13220 }, { "epoch": 1.567769477054429, "grad_norm": 1.2738806275813774, "learning_rate": 3.47021972619774e-05, "loss": 0.2597, "step": 13221 }, { "epoch": 1.567888058816554, "grad_norm": 0.9149980568339752, "learning_rate": 3.469998499074616e-05, "loss": 0.1913, "step": 13222 }, { "epoch": 1.568006640578679, "grad_norm": 1.079628212083828, "learning_rate": 3.4697772630092965e-05, "loss": 0.2206, "step": 13223 }, { "epoch": 1.568125222340804, "grad_norm": 1.0741132240037679, "learning_rate": 3.469556018003819e-05, "loss": 0.2249, "step": 13224 }, { "epoch": 1.568243804102929, "grad_norm": 1.4656309764865003, "learning_rate": 3.469334764060225e-05, "loss": 0.2927, "step": 13225 }, { "epoch": 1.568362385865054, "grad_norm": 0.8148552502795045, "learning_rate": 3.4691135011805525e-05, "loss": 0.1777, "step": 13226 }, { "epoch": 1.568480967627179, "grad_norm": 1.351307862582582, "learning_rate": 3.4688922293668416e-05, "loss": 0.2968, "step": 13227 }, { "epoch": 1.568599549389304, "grad_norm": 0.912162892251187, "learning_rate": 3.468670948621133e-05, "loss": 0.2094, "step": 13228 }, { "epoch": 1.5687181311514289, "grad_norm": 0.841427170544016, "learning_rate": 3.468449658945466e-05, "loss": 0.1485, "step": 13229 }, { "epoch": 1.5688367129135539, "grad_norm": 1.206975331036354, "learning_rate": 3.468228360341881e-05, "loss": 0.27, "step": 13230 }, { "epoch": 1.5689552946756788, "grad_norm": 0.8494602496163988, "learning_rate": 3.468007052812417e-05, "loss": 0.1788, "step": 13231 }, { "epoch": 1.5690738764378038, "grad_norm": 0.9434594031383579, "learning_rate": 3.467785736359116e-05, "loss": 0.2507, "step": 13232 }, { "epoch": 1.5691924581999288, "grad_norm": 0.923356628447899, "learning_rate": 3.467564410984017e-05, "loss": 0.1933, "step": 13233 }, { "epoch": 1.5693110399620538, "grad_norm": 0.6845619841609953, "learning_rate": 3.4673430766891604e-05, "loss": 0.1472, "step": 13234 }, { "epoch": 1.5694296217241788, "grad_norm": 0.7537206593042752, "learning_rate": 3.4671217334765867e-05, "loss": 0.147, "step": 13235 }, { "epoch": 1.569548203486304, "grad_norm": 0.8939821243257468, "learning_rate": 3.466900381348337e-05, "loss": 0.2187, "step": 13236 }, { "epoch": 1.5696667852484287, "grad_norm": 0.7676365612441697, "learning_rate": 3.466679020306451e-05, "loss": 0.1811, "step": 13237 }, { "epoch": 1.569785367010554, "grad_norm": 0.919819351966761, "learning_rate": 3.46645765035297e-05, "loss": 0.2263, "step": 13238 }, { "epoch": 1.5699039487726787, "grad_norm": 0.878796048344292, "learning_rate": 3.466236271489935e-05, "loss": 0.1863, "step": 13239 }, { "epoch": 1.570022530534804, "grad_norm": 0.8706017186377146, "learning_rate": 3.466014883719386e-05, "loss": 0.206, "step": 13240 }, { "epoch": 1.5701411122969287, "grad_norm": 1.0578735279385667, "learning_rate": 3.465793487043365e-05, "loss": 0.2153, "step": 13241 }, { "epoch": 1.5702596940590539, "grad_norm": 0.6348067258936739, "learning_rate": 3.4655720814639116e-05, "loss": 0.1356, "step": 13242 }, { "epoch": 1.5703782758211786, "grad_norm": 1.1682154473928497, "learning_rate": 3.465350666983068e-05, "loss": 0.1845, "step": 13243 }, { "epoch": 1.5704968575833038, "grad_norm": 0.924577258662925, "learning_rate": 3.465129243602875e-05, "loss": 0.22, "step": 13244 }, { "epoch": 1.5706154393454286, "grad_norm": 1.022088270546706, "learning_rate": 3.4649078113253746e-05, "loss": 0.1672, "step": 13245 }, { "epoch": 1.5707340211075538, "grad_norm": 0.8683372750122006, "learning_rate": 3.4646863701526064e-05, "loss": 0.21, "step": 13246 }, { "epoch": 1.5708526028696785, "grad_norm": 1.0484633275469653, "learning_rate": 3.464464920086613e-05, "loss": 0.2181, "step": 13247 }, { "epoch": 1.5709711846318037, "grad_norm": 0.8961848419924358, "learning_rate": 3.464243461129436e-05, "loss": 0.1625, "step": 13248 }, { "epoch": 1.5710897663939285, "grad_norm": 1.3231312574574834, "learning_rate": 3.464021993283118e-05, "loss": 0.3024, "step": 13249 }, { "epoch": 1.5712083481560537, "grad_norm": 1.1374632186384155, "learning_rate": 3.463800516549697e-05, "loss": 0.1914, "step": 13250 }, { "epoch": 1.5713269299181785, "grad_norm": 0.8911519714801194, "learning_rate": 3.463579030931219e-05, "loss": 0.1726, "step": 13251 }, { "epoch": 1.5714455116803037, "grad_norm": 1.0335887920985332, "learning_rate": 3.463357536429723e-05, "loss": 0.2237, "step": 13252 }, { "epoch": 1.5715640934424284, "grad_norm": 0.6823741749774485, "learning_rate": 3.4631360330472526e-05, "loss": 0.1424, "step": 13253 }, { "epoch": 1.5716826752045536, "grad_norm": 0.8542246357711445, "learning_rate": 3.4629145207858484e-05, "loss": 0.186, "step": 13254 }, { "epoch": 1.5718012569666784, "grad_norm": 1.0686970877889392, "learning_rate": 3.4626929996475524e-05, "loss": 0.1838, "step": 13255 }, { "epoch": 1.5719198387288036, "grad_norm": 1.3792189562540869, "learning_rate": 3.4624714696344084e-05, "loss": 0.2749, "step": 13256 }, { "epoch": 1.5720384204909283, "grad_norm": 0.8988524140555754, "learning_rate": 3.462249930748458e-05, "loss": 0.1558, "step": 13257 }, { "epoch": 1.5721570022530535, "grad_norm": 0.9512910008584282, "learning_rate": 3.4620283829917425e-05, "loss": 0.1971, "step": 13258 }, { "epoch": 1.5722755840151783, "grad_norm": 0.899238095453484, "learning_rate": 3.461806826366305e-05, "loss": 0.2091, "step": 13259 }, { "epoch": 1.5723941657773035, "grad_norm": 1.1220614907948745, "learning_rate": 3.461585260874189e-05, "loss": 0.2079, "step": 13260 }, { "epoch": 1.5725127475394285, "grad_norm": 0.9960531939814639, "learning_rate": 3.461363686517435e-05, "loss": 0.1867, "step": 13261 }, { "epoch": 1.5726313293015535, "grad_norm": 1.0532916340049345, "learning_rate": 3.461142103298087e-05, "loss": 0.2352, "step": 13262 }, { "epoch": 1.5727499110636785, "grad_norm": 1.0949180794393114, "learning_rate": 3.460920511218188e-05, "loss": 0.2, "step": 13263 }, { "epoch": 1.5728684928258034, "grad_norm": 0.9889669595306804, "learning_rate": 3.46069891027978e-05, "loss": 0.1754, "step": 13264 }, { "epoch": 1.5729870745879284, "grad_norm": 1.0037155632001806, "learning_rate": 3.4604773004849064e-05, "loss": 0.1705, "step": 13265 }, { "epoch": 1.5731056563500534, "grad_norm": 0.7864541694755648, "learning_rate": 3.460255681835609e-05, "loss": 0.1821, "step": 13266 }, { "epoch": 1.5732242381121784, "grad_norm": 1.0354486698217673, "learning_rate": 3.4600340543339324e-05, "loss": 0.2369, "step": 13267 }, { "epoch": 1.5733428198743034, "grad_norm": 1.6961051388217772, "learning_rate": 3.459812417981919e-05, "loss": 0.2997, "step": 13268 }, { "epoch": 1.5734614016364283, "grad_norm": 0.9972658033609357, "learning_rate": 3.459590772781612e-05, "loss": 0.1898, "step": 13269 }, { "epoch": 1.5735799833985533, "grad_norm": 1.503612441506859, "learning_rate": 3.459369118735054e-05, "loss": 0.3582, "step": 13270 }, { "epoch": 1.5736985651606783, "grad_norm": 0.9628108585596351, "learning_rate": 3.4591474558442904e-05, "loss": 0.2212, "step": 13271 }, { "epoch": 1.5738171469228033, "grad_norm": 0.8123474317896026, "learning_rate": 3.458925784111363e-05, "loss": 0.1465, "step": 13272 }, { "epoch": 1.5739357286849283, "grad_norm": 1.0025928295418158, "learning_rate": 3.458704103538316e-05, "loss": 0.2421, "step": 13273 }, { "epoch": 1.5740543104470532, "grad_norm": 1.0218360777146323, "learning_rate": 3.458482414127193e-05, "loss": 0.2267, "step": 13274 }, { "epoch": 1.5741728922091782, "grad_norm": 0.8185532441417953, "learning_rate": 3.458260715880037e-05, "loss": 0.1633, "step": 13275 }, { "epoch": 1.5742914739713032, "grad_norm": 0.7502598974981589, "learning_rate": 3.458039008798892e-05, "loss": 0.1664, "step": 13276 }, { "epoch": 1.5744100557334282, "grad_norm": 1.2935494061052137, "learning_rate": 3.4578172928858035e-05, "loss": 0.2982, "step": 13277 }, { "epoch": 1.5745286374955532, "grad_norm": 1.0735427178719006, "learning_rate": 3.4575955681428125e-05, "loss": 0.2577, "step": 13278 }, { "epoch": 1.5746472192576781, "grad_norm": 1.0613615314308766, "learning_rate": 3.4573738345719656e-05, "loss": 0.1973, "step": 13279 }, { "epoch": 1.5747658010198031, "grad_norm": 0.9863939547759781, "learning_rate": 3.457152092175305e-05, "loss": 0.2253, "step": 13280 }, { "epoch": 1.574884382781928, "grad_norm": 0.8362483842773701, "learning_rate": 3.4569303409548774e-05, "loss": 0.1812, "step": 13281 }, { "epoch": 1.575002964544053, "grad_norm": 1.0384628995128522, "learning_rate": 3.456708580912725e-05, "loss": 0.1984, "step": 13282 }, { "epoch": 1.575121546306178, "grad_norm": 1.4233200935262995, "learning_rate": 3.456486812050892e-05, "loss": 0.3227, "step": 13283 }, { "epoch": 1.575240128068303, "grad_norm": 1.084707712477007, "learning_rate": 3.4562650343714244e-05, "loss": 0.2471, "step": 13284 }, { "epoch": 1.575358709830428, "grad_norm": 0.8489281058434864, "learning_rate": 3.456043247876365e-05, "loss": 0.197, "step": 13285 }, { "epoch": 1.575477291592553, "grad_norm": 1.0129519907256181, "learning_rate": 3.4558214525677594e-05, "loss": 0.1987, "step": 13286 }, { "epoch": 1.5755958733546782, "grad_norm": 0.9847219047584574, "learning_rate": 3.455599648447653e-05, "loss": 0.2343, "step": 13287 }, { "epoch": 1.575714455116803, "grad_norm": 0.8994012153287565, "learning_rate": 3.4553778355180885e-05, "loss": 0.1861, "step": 13288 }, { "epoch": 1.5758330368789282, "grad_norm": 1.14669042992534, "learning_rate": 3.4551560137811116e-05, "loss": 0.2186, "step": 13289 }, { "epoch": 1.575951618641053, "grad_norm": 0.930285195900521, "learning_rate": 3.454934183238769e-05, "loss": 0.2044, "step": 13290 }, { "epoch": 1.5760702004031781, "grad_norm": 0.9686223065700884, "learning_rate": 3.454712343893103e-05, "loss": 0.1804, "step": 13291 }, { "epoch": 1.576188782165303, "grad_norm": 0.7800524405634899, "learning_rate": 3.454490495746161e-05, "loss": 0.1572, "step": 13292 }, { "epoch": 1.576307363927428, "grad_norm": 1.1040861351371538, "learning_rate": 3.4542686387999866e-05, "loss": 0.2591, "step": 13293 }, { "epoch": 1.5764259456895529, "grad_norm": 1.3582232380129369, "learning_rate": 3.454046773056626e-05, "loss": 0.3502, "step": 13294 }, { "epoch": 1.576544527451678, "grad_norm": 0.8073558426448947, "learning_rate": 3.4538248985181235e-05, "loss": 0.1783, "step": 13295 }, { "epoch": 1.5766631092138028, "grad_norm": 0.9133698742469906, "learning_rate": 3.4536030151865253e-05, "loss": 0.2173, "step": 13296 }, { "epoch": 1.576781690975928, "grad_norm": 1.0192054082841688, "learning_rate": 3.453381123063876e-05, "loss": 0.1745, "step": 13297 }, { "epoch": 1.5769002727380528, "grad_norm": 1.1051053217174127, "learning_rate": 3.453159222152223e-05, "loss": 0.2444, "step": 13298 }, { "epoch": 1.577018854500178, "grad_norm": 1.0020184613019196, "learning_rate": 3.45293731245361e-05, "loss": 0.2025, "step": 13299 }, { "epoch": 1.5771374362623027, "grad_norm": 1.74153199161126, "learning_rate": 3.4527153939700834e-05, "loss": 0.4371, "step": 13300 }, { "epoch": 1.577256018024428, "grad_norm": 1.1568727434214314, "learning_rate": 3.45249346670369e-05, "loss": 0.291, "step": 13301 }, { "epoch": 1.5773745997865527, "grad_norm": 0.9104339222614066, "learning_rate": 3.452271530656474e-05, "loss": 0.195, "step": 13302 }, { "epoch": 1.577493181548678, "grad_norm": 1.4303890678529187, "learning_rate": 3.452049585830483e-05, "loss": 0.3091, "step": 13303 }, { "epoch": 1.5776117633108027, "grad_norm": 1.005178135890975, "learning_rate": 3.451827632227762e-05, "loss": 0.184, "step": 13304 }, { "epoch": 1.5777303450729279, "grad_norm": 0.821670243135691, "learning_rate": 3.451605669850358e-05, "loss": 0.1988, "step": 13305 }, { "epoch": 1.5778489268350526, "grad_norm": 1.0614104731568546, "learning_rate": 3.4513836987003156e-05, "loss": 0.2939, "step": 13306 }, { "epoch": 1.5779675085971778, "grad_norm": 0.9544639354613433, "learning_rate": 3.451161718779682e-05, "loss": 0.1667, "step": 13307 }, { "epoch": 1.5780860903593026, "grad_norm": 1.2972007451231395, "learning_rate": 3.450939730090504e-05, "loss": 0.238, "step": 13308 }, { "epoch": 1.5782046721214278, "grad_norm": 1.0087216092835123, "learning_rate": 3.4507177326348294e-05, "loss": 0.2282, "step": 13309 }, { "epoch": 1.5783232538835525, "grad_norm": 1.3316043361636618, "learning_rate": 3.450495726414701e-05, "loss": 0.2474, "step": 13310 }, { "epoch": 1.5784418356456777, "grad_norm": 1.211564722206576, "learning_rate": 3.450273711432169e-05, "loss": 0.291, "step": 13311 }, { "epoch": 1.5785604174078027, "grad_norm": 0.9468410188742312, "learning_rate": 3.4500516876892786e-05, "loss": 0.1936, "step": 13312 }, { "epoch": 1.5786789991699277, "grad_norm": 1.167446615391396, "learning_rate": 3.449829655188076e-05, "loss": 0.2802, "step": 13313 }, { "epoch": 1.5787975809320527, "grad_norm": 1.294948308961233, "learning_rate": 3.449607613930609e-05, "loss": 0.2475, "step": 13314 }, { "epoch": 1.5789161626941777, "grad_norm": 0.8217348186237058, "learning_rate": 3.4493855639189245e-05, "loss": 0.1804, "step": 13315 }, { "epoch": 1.5790347444563027, "grad_norm": 1.010223919669672, "learning_rate": 3.44916350515507e-05, "loss": 0.2602, "step": 13316 }, { "epoch": 1.5791533262184276, "grad_norm": 0.8657899069850123, "learning_rate": 3.44894143764109e-05, "loss": 0.1505, "step": 13317 }, { "epoch": 1.5792719079805526, "grad_norm": 1.108770242382398, "learning_rate": 3.448719361379035e-05, "loss": 0.2745, "step": 13318 }, { "epoch": 1.5793904897426776, "grad_norm": 1.0048057300583726, "learning_rate": 3.44849727637095e-05, "loss": 0.2436, "step": 13319 }, { "epoch": 1.5795090715048026, "grad_norm": 1.0697620830377153, "learning_rate": 3.448275182618884e-05, "loss": 0.2907, "step": 13320 }, { "epoch": 1.5796276532669276, "grad_norm": 1.2059743996089733, "learning_rate": 3.4480530801248836e-05, "loss": 0.2354, "step": 13321 }, { "epoch": 1.5797462350290525, "grad_norm": 0.9079730719560163, "learning_rate": 3.4478309688909964e-05, "loss": 0.1625, "step": 13322 }, { "epoch": 1.5798648167911775, "grad_norm": 1.052646316018626, "learning_rate": 3.44760884891927e-05, "loss": 0.2199, "step": 13323 }, { "epoch": 1.5799833985533025, "grad_norm": 0.9825469379620707, "learning_rate": 3.4473867202117516e-05, "loss": 0.2095, "step": 13324 }, { "epoch": 1.5801019803154275, "grad_norm": 0.9539685836756817, "learning_rate": 3.4471645827704896e-05, "loss": 0.2405, "step": 13325 }, { "epoch": 1.5802205620775525, "grad_norm": 0.9080364714132103, "learning_rate": 3.4469424365975314e-05, "loss": 0.2028, "step": 13326 }, { "epoch": 1.5803391438396774, "grad_norm": 0.9345755542007149, "learning_rate": 3.446720281694925e-05, "loss": 0.1877, "step": 13327 }, { "epoch": 1.5804577256018024, "grad_norm": 1.0467507409669483, "learning_rate": 3.4464981180647195e-05, "loss": 0.2353, "step": 13328 }, { "epoch": 1.5805763073639274, "grad_norm": 0.9369665676654045, "learning_rate": 3.446275945708961e-05, "loss": 0.2291, "step": 13329 }, { "epoch": 1.5806948891260524, "grad_norm": 0.9209363048604801, "learning_rate": 3.4460537646296995e-05, "loss": 0.2054, "step": 13330 }, { "epoch": 1.5808134708881774, "grad_norm": 1.0698326913956147, "learning_rate": 3.445831574828983e-05, "loss": 0.2159, "step": 13331 }, { "epoch": 1.5809320526503023, "grad_norm": 0.8346544027074242, "learning_rate": 3.445609376308858e-05, "loss": 0.153, "step": 13332 }, { "epoch": 1.5810506344124273, "grad_norm": 1.0998952218916211, "learning_rate": 3.445387169071375e-05, "loss": 0.2213, "step": 13333 }, { "epoch": 1.5811692161745523, "grad_norm": 1.0121023526846984, "learning_rate": 3.44516495311858e-05, "loss": 0.2665, "step": 13334 }, { "epoch": 1.5812877979366773, "grad_norm": 0.8552495813760629, "learning_rate": 3.444942728452525e-05, "loss": 0.1224, "step": 13335 }, { "epoch": 1.5814063796988025, "grad_norm": 1.0577391107780458, "learning_rate": 3.444720495075256e-05, "loss": 0.2064, "step": 13336 }, { "epoch": 1.5815249614609272, "grad_norm": 0.9715937448671865, "learning_rate": 3.4444982529888223e-05, "loss": 0.1841, "step": 13337 }, { "epoch": 1.5816435432230525, "grad_norm": 0.7721940569431855, "learning_rate": 3.444276002195273e-05, "loss": 0.1621, "step": 13338 }, { "epoch": 1.5817621249851772, "grad_norm": 0.9244563477224146, "learning_rate": 3.444053742696657e-05, "loss": 0.1722, "step": 13339 }, { "epoch": 1.5818807067473024, "grad_norm": 0.983028807556441, "learning_rate": 3.443831474495024e-05, "loss": 0.2262, "step": 13340 }, { "epoch": 1.5819992885094272, "grad_norm": 0.8704360610124602, "learning_rate": 3.443609197592421e-05, "loss": 0.1484, "step": 13341 }, { "epoch": 1.5821178702715524, "grad_norm": 1.1446351547807985, "learning_rate": 3.443386911990899e-05, "loss": 0.2226, "step": 13342 }, { "epoch": 1.5822364520336771, "grad_norm": 1.462917108506428, "learning_rate": 3.443164617692507e-05, "loss": 0.2982, "step": 13343 }, { "epoch": 1.5823550337958023, "grad_norm": 1.1205743929911003, "learning_rate": 3.442942314699293e-05, "loss": 0.2192, "step": 13344 }, { "epoch": 1.582473615557927, "grad_norm": 0.8867768709169482, "learning_rate": 3.442720003013307e-05, "loss": 0.1794, "step": 13345 }, { "epoch": 1.5825921973200523, "grad_norm": 1.0562728951567968, "learning_rate": 3.4424976826366e-05, "loss": 0.2269, "step": 13346 }, { "epoch": 1.582710779082177, "grad_norm": 1.0822477203869245, "learning_rate": 3.4422753535712184e-05, "loss": 0.1959, "step": 13347 }, { "epoch": 1.5828293608443023, "grad_norm": 0.9082446485480822, "learning_rate": 3.442053015819214e-05, "loss": 0.2015, "step": 13348 }, { "epoch": 1.582947942606427, "grad_norm": 0.9880052406302757, "learning_rate": 3.4418306693826356e-05, "loss": 0.1994, "step": 13349 }, { "epoch": 1.5830665243685522, "grad_norm": 1.3963316724123471, "learning_rate": 3.4416083142635345e-05, "loss": 0.2497, "step": 13350 }, { "epoch": 1.583185106130677, "grad_norm": 0.8635802677400557, "learning_rate": 3.441385950463959e-05, "loss": 0.1953, "step": 13351 }, { "epoch": 1.5833036878928022, "grad_norm": 0.9289946387219047, "learning_rate": 3.44116357798596e-05, "loss": 0.1987, "step": 13352 }, { "epoch": 1.583422269654927, "grad_norm": 1.1551104484389705, "learning_rate": 3.440941196831586e-05, "loss": 0.2914, "step": 13353 }, { "epoch": 1.5835408514170521, "grad_norm": 2.1667688792437203, "learning_rate": 3.440718807002888e-05, "loss": 0.3846, "step": 13354 }, { "epoch": 1.583659433179177, "grad_norm": 0.8698224767590405, "learning_rate": 3.4404964085019165e-05, "loss": 0.2052, "step": 13355 }, { "epoch": 1.583778014941302, "grad_norm": 0.7030163249635428, "learning_rate": 3.440274001330722e-05, "loss": 0.1179, "step": 13356 }, { "epoch": 1.5838965967034269, "grad_norm": 0.647089124059883, "learning_rate": 3.440051585491353e-05, "loss": 0.1341, "step": 13357 }, { "epoch": 1.584015178465552, "grad_norm": 0.8730927867278692, "learning_rate": 3.439829160985862e-05, "loss": 0.2041, "step": 13358 }, { "epoch": 1.5841337602276768, "grad_norm": 0.8827560309735789, "learning_rate": 3.439606727816299e-05, "loss": 0.228, "step": 13359 }, { "epoch": 1.584252341989802, "grad_norm": 1.0277225446869869, "learning_rate": 3.439384285984713e-05, "loss": 0.1994, "step": 13360 }, { "epoch": 1.584370923751927, "grad_norm": 0.605952556297319, "learning_rate": 3.4391618354931566e-05, "loss": 0.1246, "step": 13361 }, { "epoch": 1.584489505514052, "grad_norm": 0.8879551774132002, "learning_rate": 3.438939376343679e-05, "loss": 0.2202, "step": 13362 }, { "epoch": 1.584608087276177, "grad_norm": 0.7396459139873478, "learning_rate": 3.438716908538334e-05, "loss": 0.1455, "step": 13363 }, { "epoch": 1.584726669038302, "grad_norm": 0.8874101237925796, "learning_rate": 3.4384944320791676e-05, "loss": 0.1907, "step": 13364 }, { "epoch": 1.584845250800427, "grad_norm": 0.6460124031562413, "learning_rate": 3.438271946968235e-05, "loss": 0.1805, "step": 13365 }, { "epoch": 1.584963832562552, "grad_norm": 0.7004648498347694, "learning_rate": 3.4380494532075846e-05, "loss": 0.118, "step": 13366 }, { "epoch": 1.585082414324677, "grad_norm": 0.8045458290478564, "learning_rate": 3.437826950799269e-05, "loss": 0.1881, "step": 13367 }, { "epoch": 1.5852009960868019, "grad_norm": 1.028629407033567, "learning_rate": 3.437604439745339e-05, "loss": 0.2397, "step": 13368 }, { "epoch": 1.5853195778489269, "grad_norm": 0.8680563726536847, "learning_rate": 3.437381920047846e-05, "loss": 0.1946, "step": 13369 }, { "epoch": 1.5854381596110518, "grad_norm": 0.718379587088822, "learning_rate": 3.437159391708841e-05, "loss": 0.1377, "step": 13370 }, { "epoch": 1.5855567413731768, "grad_norm": 0.8302742449303914, "learning_rate": 3.4369368547303755e-05, "loss": 0.1529, "step": 13371 }, { "epoch": 1.5856753231353018, "grad_norm": 1.0157802229798385, "learning_rate": 3.436714309114502e-05, "loss": 0.2017, "step": 13372 }, { "epoch": 1.5857939048974268, "grad_norm": 0.9788303415881313, "learning_rate": 3.43649175486327e-05, "loss": 0.1806, "step": 13373 }, { "epoch": 1.5859124866595518, "grad_norm": 0.7677106527705798, "learning_rate": 3.436269191978733e-05, "loss": 0.1531, "step": 13374 }, { "epoch": 1.5860310684216767, "grad_norm": 0.7724759454521409, "learning_rate": 3.436046620462942e-05, "loss": 0.1572, "step": 13375 }, { "epoch": 1.5861496501838017, "grad_norm": 0.9546821654830531, "learning_rate": 3.4358240403179486e-05, "loss": 0.2209, "step": 13376 }, { "epoch": 1.5862682319459267, "grad_norm": 0.899806909733568, "learning_rate": 3.435601451545806e-05, "loss": 0.169, "step": 13377 }, { "epoch": 1.5863868137080517, "grad_norm": 0.7435280686984017, "learning_rate": 3.4353788541485656e-05, "loss": 0.136, "step": 13378 }, { "epoch": 1.5865053954701767, "grad_norm": 0.7300145758135365, "learning_rate": 3.435156248128279e-05, "loss": 0.1504, "step": 13379 }, { "epoch": 1.5866239772323016, "grad_norm": 0.7411773143008036, "learning_rate": 3.4349336334869975e-05, "loss": 0.1457, "step": 13380 }, { "epoch": 1.5867425589944266, "grad_norm": 0.8018572702237099, "learning_rate": 3.434711010226775e-05, "loss": 0.1452, "step": 13381 }, { "epoch": 1.5868611407565516, "grad_norm": 1.0333761641893158, "learning_rate": 3.434488378349664e-05, "loss": 0.1884, "step": 13382 }, { "epoch": 1.5869797225186766, "grad_norm": 0.791992223882882, "learning_rate": 3.434265737857715e-05, "loss": 0.1449, "step": 13383 }, { "epoch": 1.5870983042808016, "grad_norm": 1.0461127505212595, "learning_rate": 3.434043088752982e-05, "loss": 0.1963, "step": 13384 }, { "epoch": 1.5872168860429265, "grad_norm": 0.8765481540758108, "learning_rate": 3.4338204310375166e-05, "loss": 0.19, "step": 13385 }, { "epoch": 1.5873354678050515, "grad_norm": 1.0835498030768387, "learning_rate": 3.433597764713372e-05, "loss": 0.2241, "step": 13386 }, { "epoch": 1.5874540495671767, "grad_norm": 0.9268936586179801, "learning_rate": 3.433375089782601e-05, "loss": 0.2091, "step": 13387 }, { "epoch": 1.5875726313293015, "grad_norm": 0.8934072183969943, "learning_rate": 3.433152406247257e-05, "loss": 0.1686, "step": 13388 }, { "epoch": 1.5876912130914267, "grad_norm": 0.8680618765073916, "learning_rate": 3.432929714109392e-05, "loss": 0.1538, "step": 13389 }, { "epoch": 1.5878097948535514, "grad_norm": 0.7298674385989, "learning_rate": 3.4327070133710585e-05, "loss": 0.162, "step": 13390 }, { "epoch": 1.5879283766156767, "grad_norm": 1.0114817463924457, "learning_rate": 3.43248430403431e-05, "loss": 0.2169, "step": 13391 }, { "epoch": 1.5880469583778014, "grad_norm": 0.8882729638626742, "learning_rate": 3.4322615861012e-05, "loss": 0.146, "step": 13392 }, { "epoch": 1.5881655401399266, "grad_norm": 1.0533190911166483, "learning_rate": 3.432038859573782e-05, "loss": 0.229, "step": 13393 }, { "epoch": 1.5882841219020514, "grad_norm": 1.0232536391715625, "learning_rate": 3.431816124454108e-05, "loss": 0.2306, "step": 13394 }, { "epoch": 1.5884027036641766, "grad_norm": 0.92904722039629, "learning_rate": 3.431593380744233e-05, "loss": 0.2545, "step": 13395 }, { "epoch": 1.5885212854263013, "grad_norm": 1.1916362459487368, "learning_rate": 3.4313706284462076e-05, "loss": 0.2512, "step": 13396 }, { "epoch": 1.5886398671884265, "grad_norm": 1.0017399163717118, "learning_rate": 3.431147867562088e-05, "loss": 0.1568, "step": 13397 }, { "epoch": 1.5887584489505513, "grad_norm": 0.8528515793108605, "learning_rate": 3.4309250980939265e-05, "loss": 0.1697, "step": 13398 }, { "epoch": 1.5888770307126765, "grad_norm": 0.7803612440108989, "learning_rate": 3.430702320043777e-05, "loss": 0.1722, "step": 13399 }, { "epoch": 1.5889956124748013, "grad_norm": 1.053364594399141, "learning_rate": 3.4304795334136944e-05, "loss": 0.2425, "step": 13400 }, { "epoch": 1.5891141942369265, "grad_norm": 1.1655494449414912, "learning_rate": 3.430256738205731e-05, "loss": 0.2493, "step": 13401 }, { "epoch": 1.5892327759990512, "grad_norm": 0.773027001214362, "learning_rate": 3.430033934421942e-05, "loss": 0.1792, "step": 13402 }, { "epoch": 1.5893513577611764, "grad_norm": 1.4027875770414469, "learning_rate": 3.4298111220643795e-05, "loss": 0.3214, "step": 13403 }, { "epoch": 1.5894699395233012, "grad_norm": 0.8732905483940658, "learning_rate": 3.429588301135099e-05, "loss": 0.2244, "step": 13404 }, { "epoch": 1.5895885212854264, "grad_norm": 1.0140281134591838, "learning_rate": 3.429365471636155e-05, "loss": 0.1802, "step": 13405 }, { "epoch": 1.5897071030475511, "grad_norm": 0.8376038961745782, "learning_rate": 3.4291426335696e-05, "loss": 0.1797, "step": 13406 }, { "epoch": 1.5898256848096763, "grad_norm": 0.7847067724968454, "learning_rate": 3.4289197869374893e-05, "loss": 0.1538, "step": 13407 }, { "epoch": 1.589944266571801, "grad_norm": 0.870727861574082, "learning_rate": 3.428696931741878e-05, "loss": 0.1596, "step": 13408 }, { "epoch": 1.5900628483339263, "grad_norm": 1.234153619716242, "learning_rate": 3.4284740679848196e-05, "loss": 0.2762, "step": 13409 }, { "epoch": 1.590181430096051, "grad_norm": 0.6688878169616422, "learning_rate": 3.428251195668369e-05, "loss": 0.1419, "step": 13410 }, { "epoch": 1.5903000118581763, "grad_norm": 0.9249975276735752, "learning_rate": 3.428028314794581e-05, "loss": 0.1565, "step": 13411 }, { "epoch": 1.5904185936203012, "grad_norm": 0.762287401996631, "learning_rate": 3.427805425365509e-05, "loss": 0.1782, "step": 13412 }, { "epoch": 1.5905371753824262, "grad_norm": 1.2499389616185752, "learning_rate": 3.4275825273832094e-05, "loss": 0.2483, "step": 13413 }, { "epoch": 1.5906557571445512, "grad_norm": 0.8708550399845539, "learning_rate": 3.427359620849736e-05, "loss": 0.203, "step": 13414 }, { "epoch": 1.5907743389066762, "grad_norm": 0.7624881814559452, "learning_rate": 3.4271367057671446e-05, "loss": 0.1556, "step": 13415 }, { "epoch": 1.5908929206688012, "grad_norm": 0.8442363471378119, "learning_rate": 3.426913782137489e-05, "loss": 0.1682, "step": 13416 }, { "epoch": 1.5910115024309261, "grad_norm": 0.8774588720910099, "learning_rate": 3.4266908499628256e-05, "loss": 0.2059, "step": 13417 }, { "epoch": 1.5911300841930511, "grad_norm": 0.8568865003902818, "learning_rate": 3.426467909245209e-05, "loss": 0.1883, "step": 13418 }, { "epoch": 1.5912486659551761, "grad_norm": 0.8144081505097701, "learning_rate": 3.426244959986694e-05, "loss": 0.2001, "step": 13419 }, { "epoch": 1.591367247717301, "grad_norm": 0.9635752089409804, "learning_rate": 3.426022002189336e-05, "loss": 0.1557, "step": 13420 }, { "epoch": 1.591485829479426, "grad_norm": 1.0312054535044797, "learning_rate": 3.4257990358551915e-05, "loss": 0.1959, "step": 13421 }, { "epoch": 1.591604411241551, "grad_norm": 1.358734449422405, "learning_rate": 3.425576060986315e-05, "loss": 0.2197, "step": 13422 }, { "epoch": 1.591722993003676, "grad_norm": 1.0549960932359117, "learning_rate": 3.425353077584762e-05, "loss": 0.2315, "step": 13423 }, { "epoch": 1.591841574765801, "grad_norm": 0.8907473004491052, "learning_rate": 3.425130085652588e-05, "loss": 0.198, "step": 13424 }, { "epoch": 1.591960156527926, "grad_norm": 0.7171634655965656, "learning_rate": 3.424907085191849e-05, "loss": 0.1486, "step": 13425 }, { "epoch": 1.592078738290051, "grad_norm": 1.0058110851528745, "learning_rate": 3.424684076204602e-05, "loss": 0.1857, "step": 13426 }, { "epoch": 1.592197320052176, "grad_norm": 0.7709835521422479, "learning_rate": 3.4244610586929005e-05, "loss": 0.1451, "step": 13427 }, { "epoch": 1.592315901814301, "grad_norm": 1.211577512612124, "learning_rate": 3.424238032658803e-05, "loss": 0.2601, "step": 13428 }, { "epoch": 1.592434483576426, "grad_norm": 1.23038571733898, "learning_rate": 3.424014998104363e-05, "loss": 0.2506, "step": 13429 }, { "epoch": 1.592553065338551, "grad_norm": 1.054486948122292, "learning_rate": 3.423791955031638e-05, "loss": 0.2329, "step": 13430 }, { "epoch": 1.5926716471006759, "grad_norm": 1.0133860509390649, "learning_rate": 3.4235689034426846e-05, "loss": 0.2138, "step": 13431 }, { "epoch": 1.5927902288628009, "grad_norm": 0.9279083996411258, "learning_rate": 3.4233458433395583e-05, "loss": 0.1795, "step": 13432 }, { "epoch": 1.5929088106249258, "grad_norm": 0.7417602507377317, "learning_rate": 3.4231227747243154e-05, "loss": 0.1456, "step": 13433 }, { "epoch": 1.5930273923870508, "grad_norm": 0.9333533362981811, "learning_rate": 3.422899697599013e-05, "loss": 0.1856, "step": 13434 }, { "epoch": 1.5931459741491758, "grad_norm": 0.8759470289790621, "learning_rate": 3.422676611965706e-05, "loss": 0.1855, "step": 13435 }, { "epoch": 1.593264555911301, "grad_norm": 2.0830181029560806, "learning_rate": 3.422453517826453e-05, "loss": 0.458, "step": 13436 }, { "epoch": 1.5933831376734258, "grad_norm": 1.1039251080509307, "learning_rate": 3.42223041518331e-05, "loss": 0.2105, "step": 13437 }, { "epoch": 1.593501719435551, "grad_norm": 1.0372891205115091, "learning_rate": 3.4220073040383326e-05, "loss": 0.2162, "step": 13438 }, { "epoch": 1.5936203011976757, "grad_norm": 0.6726639053269128, "learning_rate": 3.421784184393579e-05, "loss": 0.1443, "step": 13439 }, { "epoch": 1.593738882959801, "grad_norm": 0.9883580562241387, "learning_rate": 3.4215610562511055e-05, "loss": 0.1731, "step": 13440 }, { "epoch": 1.5938574647219257, "grad_norm": 0.9645077641332059, "learning_rate": 3.421337919612969e-05, "loss": 0.2153, "step": 13441 }, { "epoch": 1.593976046484051, "grad_norm": 0.8677132104822005, "learning_rate": 3.421114774481227e-05, "loss": 0.1964, "step": 13442 }, { "epoch": 1.5940946282461756, "grad_norm": 0.7137359257680594, "learning_rate": 3.4208916208579364e-05, "loss": 0.1626, "step": 13443 }, { "epoch": 1.5942132100083009, "grad_norm": 0.8225049000386762, "learning_rate": 3.4206684587451544e-05, "loss": 0.1744, "step": 13444 }, { "epoch": 1.5943317917704256, "grad_norm": 0.7959343077777657, "learning_rate": 3.4204452881449376e-05, "loss": 0.1783, "step": 13445 }, { "epoch": 1.5944503735325508, "grad_norm": 0.9577236411316574, "learning_rate": 3.420222109059345e-05, "loss": 0.1877, "step": 13446 }, { "epoch": 1.5945689552946756, "grad_norm": 0.8448101355027032, "learning_rate": 3.4199989214904315e-05, "loss": 0.1523, "step": 13447 }, { "epoch": 1.5946875370568008, "grad_norm": 1.1198492697123907, "learning_rate": 3.419775725440257e-05, "loss": 0.2755, "step": 13448 }, { "epoch": 1.5948061188189255, "grad_norm": 1.091715182507115, "learning_rate": 3.419552520910878e-05, "loss": 0.1886, "step": 13449 }, { "epoch": 1.5949247005810507, "grad_norm": 0.9712354691051607, "learning_rate": 3.4193293079043527e-05, "loss": 0.2328, "step": 13450 }, { "epoch": 1.5950432823431755, "grad_norm": 1.0222561548135638, "learning_rate": 3.419106086422739e-05, "loss": 0.2195, "step": 13451 }, { "epoch": 1.5951618641053007, "grad_norm": 1.1901934150145486, "learning_rate": 3.418882856468093e-05, "loss": 0.2125, "step": 13452 }, { "epoch": 1.5952804458674255, "grad_norm": 1.1138767100136906, "learning_rate": 3.418659618042475e-05, "loss": 0.2788, "step": 13453 }, { "epoch": 1.5953990276295507, "grad_norm": 1.0314012795353706, "learning_rate": 3.418436371147941e-05, "loss": 0.2213, "step": 13454 }, { "epoch": 1.5955176093916754, "grad_norm": 1.215090006536514, "learning_rate": 3.4182131157865505e-05, "loss": 0.2651, "step": 13455 }, { "epoch": 1.5956361911538006, "grad_norm": 0.9044588232160928, "learning_rate": 3.41798985196036e-05, "loss": 0.1585, "step": 13456 }, { "epoch": 1.5957547729159254, "grad_norm": 0.9911071138934332, "learning_rate": 3.41776657967143e-05, "loss": 0.1637, "step": 13457 }, { "epoch": 1.5958733546780506, "grad_norm": 1.383739523690293, "learning_rate": 3.4175432989218174e-05, "loss": 0.3735, "step": 13458 }, { "epoch": 1.5959919364401753, "grad_norm": 1.017627788576349, "learning_rate": 3.417320009713581e-05, "loss": 0.2059, "step": 13459 }, { "epoch": 1.5961105182023005, "grad_norm": 1.0468587537867824, "learning_rate": 3.4170967120487786e-05, "loss": 0.2226, "step": 13460 }, { "epoch": 1.5962290999644255, "grad_norm": 1.0904646146820587, "learning_rate": 3.416873405929469e-05, "loss": 0.2277, "step": 13461 }, { "epoch": 1.5963476817265505, "grad_norm": 0.9803992787766296, "learning_rate": 3.416650091357711e-05, "loss": 0.2202, "step": 13462 }, { "epoch": 1.5964662634886755, "grad_norm": 1.1132615052020571, "learning_rate": 3.416426768335563e-05, "loss": 0.2641, "step": 13463 }, { "epoch": 1.5965848452508005, "grad_norm": 0.9333248472517162, "learning_rate": 3.4162034368650845e-05, "loss": 0.2212, "step": 13464 }, { "epoch": 1.5967034270129254, "grad_norm": 0.8153775306673816, "learning_rate": 3.4159800969483335e-05, "loss": 0.1508, "step": 13465 }, { "epoch": 1.5968220087750504, "grad_norm": 0.7916052970318194, "learning_rate": 3.4157567485873694e-05, "loss": 0.1638, "step": 13466 }, { "epoch": 1.5969405905371754, "grad_norm": 1.4702995686961848, "learning_rate": 3.415533391784251e-05, "loss": 0.3761, "step": 13467 }, { "epoch": 1.5970591722993004, "grad_norm": 0.7715947997622719, "learning_rate": 3.415310026541037e-05, "loss": 0.1627, "step": 13468 }, { "epoch": 1.5971777540614254, "grad_norm": 0.9111882430835674, "learning_rate": 3.4150866528597876e-05, "loss": 0.1701, "step": 13469 }, { "epoch": 1.5972963358235504, "grad_norm": 0.9484199666799671, "learning_rate": 3.414863270742561e-05, "loss": 0.2119, "step": 13470 }, { "epoch": 1.5974149175856753, "grad_norm": 0.6520649635307016, "learning_rate": 3.4146398801914175e-05, "loss": 0.1697, "step": 13471 }, { "epoch": 1.5975334993478003, "grad_norm": 1.1388107825911822, "learning_rate": 3.414416481208416e-05, "loss": 0.255, "step": 13472 }, { "epoch": 1.5976520811099253, "grad_norm": 1.0733449740540182, "learning_rate": 3.414193073795615e-05, "loss": 0.3216, "step": 13473 }, { "epoch": 1.5977706628720503, "grad_norm": 1.2312673642099778, "learning_rate": 3.413969657955075e-05, "loss": 0.3322, "step": 13474 }, { "epoch": 1.5978892446341753, "grad_norm": 1.6892843469683152, "learning_rate": 3.413746233688856e-05, "loss": 0.3806, "step": 13475 }, { "epoch": 1.5980078263963002, "grad_norm": 0.844824059647215, "learning_rate": 3.413522800999017e-05, "loss": 0.1902, "step": 13476 }, { "epoch": 1.5981264081584252, "grad_norm": 0.7443518837096292, "learning_rate": 3.413299359887618e-05, "loss": 0.1335, "step": 13477 }, { "epoch": 1.5982449899205502, "grad_norm": 0.9920964237745092, "learning_rate": 3.413075910356719e-05, "loss": 0.2419, "step": 13478 }, { "epoch": 1.5983635716826752, "grad_norm": 0.7998344107100257, "learning_rate": 3.4128524524083804e-05, "loss": 0.1701, "step": 13479 }, { "epoch": 1.5984821534448002, "grad_norm": 1.0423128812326956, "learning_rate": 3.412628986044661e-05, "loss": 0.2272, "step": 13480 }, { "epoch": 1.5986007352069251, "grad_norm": 0.8284482589375506, "learning_rate": 3.412405511267622e-05, "loss": 0.1696, "step": 13481 }, { "epoch": 1.5987193169690501, "grad_norm": 1.1648303612595803, "learning_rate": 3.412182028079322e-05, "loss": 0.2374, "step": 13482 }, { "epoch": 1.598837898731175, "grad_norm": 0.9323866982979495, "learning_rate": 3.4119585364818225e-05, "loss": 0.188, "step": 13483 }, { "epoch": 1.5989564804933, "grad_norm": 0.8688767386398583, "learning_rate": 3.411735036477184e-05, "loss": 0.2072, "step": 13484 }, { "epoch": 1.599075062255425, "grad_norm": 0.7346797332143312, "learning_rate": 3.411511528067466e-05, "loss": 0.1634, "step": 13485 }, { "epoch": 1.59919364401755, "grad_norm": 1.0067270733455558, "learning_rate": 3.41128801125473e-05, "loss": 0.174, "step": 13486 }, { "epoch": 1.5993122257796752, "grad_norm": 0.728021897912261, "learning_rate": 3.411064486041036e-05, "loss": 0.1604, "step": 13487 }, { "epoch": 1.5994308075418, "grad_norm": 1.1987512727202745, "learning_rate": 3.410840952428445e-05, "loss": 0.3049, "step": 13488 }, { "epoch": 1.5995493893039252, "grad_norm": 0.8880635718155879, "learning_rate": 3.4106174104190166e-05, "loss": 0.2153, "step": 13489 }, { "epoch": 1.59966797106605, "grad_norm": 1.1037324772736972, "learning_rate": 3.410393860014813e-05, "loss": 0.2513, "step": 13490 }, { "epoch": 1.5997865528281752, "grad_norm": 1.3408036567694623, "learning_rate": 3.4101703012178945e-05, "loss": 0.2198, "step": 13491 }, { "epoch": 1.5999051345903, "grad_norm": 0.8264223886498777, "learning_rate": 3.4099467340303216e-05, "loss": 0.1861, "step": 13492 }, { "epoch": 1.6000237163524251, "grad_norm": 0.8634868711335765, "learning_rate": 3.409723158454156e-05, "loss": 0.1953, "step": 13493 }, { "epoch": 1.6001422981145499, "grad_norm": 0.6849392218817861, "learning_rate": 3.4094995744914585e-05, "loss": 0.1777, "step": 13494 }, { "epoch": 1.600260879876675, "grad_norm": 1.3082730991254246, "learning_rate": 3.409275982144289e-05, "loss": 0.2813, "step": 13495 }, { "epoch": 1.6003794616387998, "grad_norm": 0.7481041318409017, "learning_rate": 3.409052381414711e-05, "loss": 0.209, "step": 13496 }, { "epoch": 1.600498043400925, "grad_norm": 0.8823959029580243, "learning_rate": 3.4088287723047844e-05, "loss": 0.2048, "step": 13497 }, { "epoch": 1.6006166251630498, "grad_norm": 0.9909934434767602, "learning_rate": 3.408605154816571e-05, "loss": 0.1911, "step": 13498 }, { "epoch": 1.600735206925175, "grad_norm": 0.8885088019093903, "learning_rate": 3.4083815289521325e-05, "loss": 0.2094, "step": 13499 }, { "epoch": 1.6008537886872998, "grad_norm": 0.8882085146771737, "learning_rate": 3.40815789471353e-05, "loss": 0.1583, "step": 13500 }, { "epoch": 1.600972370449425, "grad_norm": 0.8863267014828995, "learning_rate": 3.4079342521028254e-05, "loss": 0.1834, "step": 13501 }, { "epoch": 1.6010909522115497, "grad_norm": 0.8651881886348568, "learning_rate": 3.407710601122081e-05, "loss": 0.1813, "step": 13502 }, { "epoch": 1.601209533973675, "grad_norm": 0.9892637603701658, "learning_rate": 3.407486941773358e-05, "loss": 0.2121, "step": 13503 }, { "epoch": 1.6013281157357997, "grad_norm": 0.8872899848967587, "learning_rate": 3.4072632740587175e-05, "loss": 0.184, "step": 13504 }, { "epoch": 1.601446697497925, "grad_norm": 0.9433550935921263, "learning_rate": 3.407039597980222e-05, "loss": 0.1666, "step": 13505 }, { "epoch": 1.6015652792600497, "grad_norm": 0.773664110226806, "learning_rate": 3.406815913539934e-05, "loss": 0.1653, "step": 13506 }, { "epoch": 1.6016838610221749, "grad_norm": 0.9340644596055254, "learning_rate": 3.406592220739916e-05, "loss": 0.1926, "step": 13507 }, { "epoch": 1.6018024427842996, "grad_norm": 0.7853581707803934, "learning_rate": 3.4063685195822284e-05, "loss": 0.1464, "step": 13508 }, { "epoch": 1.6019210245464248, "grad_norm": 0.900357697191691, "learning_rate": 3.406144810068935e-05, "loss": 0.1928, "step": 13509 }, { "epoch": 1.6020396063085496, "grad_norm": 1.3370616188791227, "learning_rate": 3.405921092202098e-05, "loss": 0.298, "step": 13510 }, { "epoch": 1.6021581880706748, "grad_norm": 1.8641030229243212, "learning_rate": 3.405697365983779e-05, "loss": 0.313, "step": 13511 }, { "epoch": 1.6022767698327998, "grad_norm": 1.303538397997438, "learning_rate": 3.405473631416041e-05, "loss": 0.2473, "step": 13512 }, { "epoch": 1.6023953515949247, "grad_norm": 1.355096838909902, "learning_rate": 3.405249888500947e-05, "loss": 0.3161, "step": 13513 }, { "epoch": 1.6025139333570497, "grad_norm": 1.2352806256103936, "learning_rate": 3.405026137240558e-05, "loss": 0.2662, "step": 13514 }, { "epoch": 1.6026325151191747, "grad_norm": 0.7402868544623429, "learning_rate": 3.4048023776369386e-05, "loss": 0.1536, "step": 13515 }, { "epoch": 1.6027510968812997, "grad_norm": 0.7215650905772116, "learning_rate": 3.404578609692151e-05, "loss": 0.1594, "step": 13516 }, { "epoch": 1.6028696786434247, "grad_norm": 0.7113366695819432, "learning_rate": 3.4043548334082566e-05, "loss": 0.1639, "step": 13517 }, { "epoch": 1.6029882604055496, "grad_norm": 0.9807514432619729, "learning_rate": 3.404131048787321e-05, "loss": 0.2081, "step": 13518 }, { "epoch": 1.6031068421676746, "grad_norm": 0.6698792388612282, "learning_rate": 3.403907255831405e-05, "loss": 0.1376, "step": 13519 }, { "epoch": 1.6032254239297996, "grad_norm": 1.3451319202272793, "learning_rate": 3.403683454542573e-05, "loss": 0.2727, "step": 13520 }, { "epoch": 1.6033440056919246, "grad_norm": 0.6851321135475673, "learning_rate": 3.403459644922888e-05, "loss": 0.1525, "step": 13521 }, { "epoch": 1.6034625874540496, "grad_norm": 0.9362250143819911, "learning_rate": 3.403235826974413e-05, "loss": 0.241, "step": 13522 }, { "epoch": 1.6035811692161746, "grad_norm": 0.8517756020187245, "learning_rate": 3.403012000699211e-05, "loss": 0.1622, "step": 13523 }, { "epoch": 1.6036997509782995, "grad_norm": 0.8551200432747054, "learning_rate": 3.402788166099346e-05, "loss": 0.1997, "step": 13524 }, { "epoch": 1.6038183327404245, "grad_norm": 0.6349959821925, "learning_rate": 3.40256432317688e-05, "loss": 0.1476, "step": 13525 }, { "epoch": 1.6039369145025495, "grad_norm": 1.1472373520985402, "learning_rate": 3.402340471933879e-05, "loss": 0.2344, "step": 13526 }, { "epoch": 1.6040554962646745, "grad_norm": 0.9845151270809698, "learning_rate": 3.402116612372405e-05, "loss": 0.1988, "step": 13527 }, { "epoch": 1.6041740780267995, "grad_norm": 1.0368320768717643, "learning_rate": 3.4018927444945216e-05, "loss": 0.2257, "step": 13528 }, { "epoch": 1.6042926597889244, "grad_norm": 0.906245701353201, "learning_rate": 3.4016688683022936e-05, "loss": 0.2469, "step": 13529 }, { "epoch": 1.6044112415510494, "grad_norm": 1.1349569860058075, "learning_rate": 3.401444983797784e-05, "loss": 0.2529, "step": 13530 }, { "epoch": 1.6045298233131744, "grad_norm": 0.8226979241294041, "learning_rate": 3.4012210909830574e-05, "loss": 0.16, "step": 13531 }, { "epoch": 1.6046484050752994, "grad_norm": 1.7723820025336667, "learning_rate": 3.400997189860177e-05, "loss": 0.3181, "step": 13532 }, { "epoch": 1.6047669868374244, "grad_norm": 0.9719278599251597, "learning_rate": 3.400773280431208e-05, "loss": 0.1983, "step": 13533 }, { "epoch": 1.6048855685995493, "grad_norm": 0.7513047400382211, "learning_rate": 3.4005493626982135e-05, "loss": 0.1496, "step": 13534 }, { "epoch": 1.6050041503616743, "grad_norm": 0.8075197451710521, "learning_rate": 3.400325436663259e-05, "loss": 0.1724, "step": 13535 }, { "epoch": 1.6051227321237995, "grad_norm": 0.9466565523991356, "learning_rate": 3.4001015023284065e-05, "loss": 0.2019, "step": 13536 }, { "epoch": 1.6052413138859243, "grad_norm": 1.7013584477514245, "learning_rate": 3.399877559695723e-05, "loss": 0.4311, "step": 13537 }, { "epoch": 1.6053598956480495, "grad_norm": 0.7545895964515632, "learning_rate": 3.399653608767272e-05, "loss": 0.1816, "step": 13538 }, { "epoch": 1.6054784774101742, "grad_norm": 1.048935274319745, "learning_rate": 3.3994296495451184e-05, "loss": 0.2007, "step": 13539 }, { "epoch": 1.6055970591722994, "grad_norm": 0.7711310317899935, "learning_rate": 3.3992056820313255e-05, "loss": 0.1751, "step": 13540 }, { "epoch": 1.6057156409344242, "grad_norm": 0.8894213924622344, "learning_rate": 3.3989817062279586e-05, "loss": 0.2152, "step": 13541 }, { "epoch": 1.6058342226965494, "grad_norm": 1.1045790534147182, "learning_rate": 3.398757722137084e-05, "loss": 0.2196, "step": 13542 }, { "epoch": 1.6059528044586742, "grad_norm": 1.2889404855573487, "learning_rate": 3.3985337297607644e-05, "loss": 0.2677, "step": 13543 }, { "epoch": 1.6060713862207994, "grad_norm": 1.2664215108022452, "learning_rate": 3.398309729101066e-05, "loss": 0.2185, "step": 13544 }, { "epoch": 1.6061899679829241, "grad_norm": 0.9043808989222406, "learning_rate": 3.398085720160053e-05, "loss": 0.1925, "step": 13545 }, { "epoch": 1.6063085497450493, "grad_norm": 0.9116028894114818, "learning_rate": 3.397861702939791e-05, "loss": 0.1901, "step": 13546 }, { "epoch": 1.606427131507174, "grad_norm": 1.0767153711932262, "learning_rate": 3.397637677442346e-05, "loss": 0.2257, "step": 13547 }, { "epoch": 1.6065457132692993, "grad_norm": 0.9561191711667552, "learning_rate": 3.397413643669782e-05, "loss": 0.193, "step": 13548 }, { "epoch": 1.606664295031424, "grad_norm": 1.0633503536870574, "learning_rate": 3.397189601624164e-05, "loss": 0.1792, "step": 13549 }, { "epoch": 1.6067828767935493, "grad_norm": 0.7909377657456592, "learning_rate": 3.396965551307559e-05, "loss": 0.1506, "step": 13550 }, { "epoch": 1.606901458555674, "grad_norm": 0.7212099584136074, "learning_rate": 3.396741492722031e-05, "loss": 0.1718, "step": 13551 }, { "epoch": 1.6070200403177992, "grad_norm": 0.9788415314540474, "learning_rate": 3.3965174258696466e-05, "loss": 0.1838, "step": 13552 }, { "epoch": 1.607138622079924, "grad_norm": 1.059660054395232, "learning_rate": 3.39629335075247e-05, "loss": 0.2037, "step": 13553 }, { "epoch": 1.6072572038420492, "grad_norm": 1.2847542563770173, "learning_rate": 3.396069267372568e-05, "loss": 0.2758, "step": 13554 }, { "epoch": 1.607375785604174, "grad_norm": 1.4677993605180935, "learning_rate": 3.3958451757320065e-05, "loss": 0.3552, "step": 13555 }, { "epoch": 1.6074943673662991, "grad_norm": 0.9289287616071418, "learning_rate": 3.395621075832851e-05, "loss": 0.1592, "step": 13556 }, { "epoch": 1.607612949128424, "grad_norm": 1.1886142770109522, "learning_rate": 3.395396967677168e-05, "loss": 0.2572, "step": 13557 }, { "epoch": 1.607731530890549, "grad_norm": 1.1438909713878742, "learning_rate": 3.395172851267022e-05, "loss": 0.2613, "step": 13558 }, { "epoch": 1.6078501126526739, "grad_norm": 0.7972614760463648, "learning_rate": 3.394948726604481e-05, "loss": 0.1747, "step": 13559 }, { "epoch": 1.607968694414799, "grad_norm": 1.1815429855915538, "learning_rate": 3.3947245936916095e-05, "loss": 0.2473, "step": 13560 }, { "epoch": 1.608087276176924, "grad_norm": 1.0623454070029827, "learning_rate": 3.3945004525304754e-05, "loss": 0.1659, "step": 13561 }, { "epoch": 1.608205857939049, "grad_norm": 0.872210123151847, "learning_rate": 3.394276303123143e-05, "loss": 0.1654, "step": 13562 }, { "epoch": 1.608324439701174, "grad_norm": 0.9517262388283974, "learning_rate": 3.39405214547168e-05, "loss": 0.2403, "step": 13563 }, { "epoch": 1.608443021463299, "grad_norm": 1.1292200053343784, "learning_rate": 3.393827979578153e-05, "loss": 0.2162, "step": 13564 }, { "epoch": 1.608561603225424, "grad_norm": 0.9521819178241844, "learning_rate": 3.3936038054446274e-05, "loss": 0.1972, "step": 13565 }, { "epoch": 1.608680184987549, "grad_norm": 0.9734035684609293, "learning_rate": 3.3933796230731706e-05, "loss": 0.2032, "step": 13566 }, { "epoch": 1.608798766749674, "grad_norm": 0.7637045836710352, "learning_rate": 3.39315543246585e-05, "loss": 0.1781, "step": 13567 }, { "epoch": 1.608917348511799, "grad_norm": 1.1618996515184463, "learning_rate": 3.392931233624731e-05, "loss": 0.2103, "step": 13568 }, { "epoch": 1.6090359302739239, "grad_norm": 0.7128452863015635, "learning_rate": 3.392707026551881e-05, "loss": 0.1395, "step": 13569 }, { "epoch": 1.6091545120360489, "grad_norm": 0.972746411836376, "learning_rate": 3.392482811249367e-05, "loss": 0.2109, "step": 13570 }, { "epoch": 1.6092730937981738, "grad_norm": 1.1433809053327062, "learning_rate": 3.3922585877192557e-05, "loss": 0.2589, "step": 13571 }, { "epoch": 1.6093916755602988, "grad_norm": 0.9992032558866402, "learning_rate": 3.3920343559636144e-05, "loss": 0.2706, "step": 13572 }, { "epoch": 1.6095102573224238, "grad_norm": 0.9181847723638037, "learning_rate": 3.3918101159845104e-05, "loss": 0.2265, "step": 13573 }, { "epoch": 1.6096288390845488, "grad_norm": 1.250518235347137, "learning_rate": 3.391585867784011e-05, "loss": 0.2629, "step": 13574 }, { "epoch": 1.6097474208466738, "grad_norm": 1.0989502994979954, "learning_rate": 3.3913616113641834e-05, "loss": 0.2549, "step": 13575 }, { "epoch": 1.6098660026087988, "grad_norm": 1.1160069003899873, "learning_rate": 3.391137346727094e-05, "loss": 0.2489, "step": 13576 }, { "epoch": 1.6099845843709237, "grad_norm": 0.7694789611053993, "learning_rate": 3.3909130738748105e-05, "loss": 0.14, "step": 13577 }, { "epoch": 1.6101031661330487, "grad_norm": 0.9287851855737399, "learning_rate": 3.390688792809403e-05, "loss": 0.1954, "step": 13578 }, { "epoch": 1.6102217478951737, "grad_norm": 0.758506650861356, "learning_rate": 3.3904645035329355e-05, "loss": 0.1318, "step": 13579 }, { "epoch": 1.6103403296572987, "grad_norm": 1.2853399164951371, "learning_rate": 3.390240206047478e-05, "loss": 0.2498, "step": 13580 }, { "epoch": 1.6104589114194237, "grad_norm": 1.0701881418754882, "learning_rate": 3.390015900355097e-05, "loss": 0.2254, "step": 13581 }, { "epoch": 1.6105774931815486, "grad_norm": 0.9438365196683013, "learning_rate": 3.389791586457861e-05, "loss": 0.2015, "step": 13582 }, { "epoch": 1.6106960749436736, "grad_norm": 0.5779318370597929, "learning_rate": 3.389567264357838e-05, "loss": 0.1472, "step": 13583 }, { "epoch": 1.6108146567057986, "grad_norm": 0.985927092584453, "learning_rate": 3.389342934057095e-05, "loss": 0.2157, "step": 13584 }, { "epoch": 1.6109332384679236, "grad_norm": 1.3364674397206713, "learning_rate": 3.389118595557701e-05, "loss": 0.312, "step": 13585 }, { "epoch": 1.6110518202300486, "grad_norm": 1.0041316770231627, "learning_rate": 3.388894248861724e-05, "loss": 0.2339, "step": 13586 }, { "epoch": 1.6111704019921738, "grad_norm": 0.8772251203687723, "learning_rate": 3.388669893971232e-05, "loss": 0.1844, "step": 13587 }, { "epoch": 1.6112889837542985, "grad_norm": 0.8485816228409283, "learning_rate": 3.388445530888293e-05, "loss": 0.2122, "step": 13588 }, { "epoch": 1.6114075655164237, "grad_norm": 1.4006400567697253, "learning_rate": 3.3882211596149766e-05, "loss": 0.3414, "step": 13589 }, { "epoch": 1.6115261472785485, "grad_norm": 0.9761318398621757, "learning_rate": 3.38799678015335e-05, "loss": 0.2103, "step": 13590 }, { "epoch": 1.6116447290406737, "grad_norm": 1.0944553669525197, "learning_rate": 3.387772392505482e-05, "loss": 0.227, "step": 13591 }, { "epoch": 1.6117633108027984, "grad_norm": 0.8897190146954418, "learning_rate": 3.3875479966734404e-05, "loss": 0.2019, "step": 13592 }, { "epoch": 1.6118818925649236, "grad_norm": 1.2168851302369554, "learning_rate": 3.387323592659296e-05, "loss": 0.2939, "step": 13593 }, { "epoch": 1.6120004743270484, "grad_norm": 0.957771093158159, "learning_rate": 3.387099180465115e-05, "loss": 0.2045, "step": 13594 }, { "epoch": 1.6121190560891736, "grad_norm": 1.032516204223673, "learning_rate": 3.386874760092967e-05, "loss": 0.1863, "step": 13595 }, { "epoch": 1.6122376378512984, "grad_norm": 0.9330910551594108, "learning_rate": 3.3866503315449225e-05, "loss": 0.1939, "step": 13596 }, { "epoch": 1.6123562196134236, "grad_norm": 0.8905323257331389, "learning_rate": 3.3864258948230486e-05, "loss": 0.17, "step": 13597 }, { "epoch": 1.6124748013755483, "grad_norm": 0.9363631685014732, "learning_rate": 3.3862014499294156e-05, "loss": 0.2031, "step": 13598 }, { "epoch": 1.6125933831376735, "grad_norm": 0.7559906888099894, "learning_rate": 3.3859769968660915e-05, "loss": 0.145, "step": 13599 }, { "epoch": 1.6127119648997983, "grad_norm": 0.8632302460261173, "learning_rate": 3.385752535635146e-05, "loss": 0.1915, "step": 13600 }, { "epoch": 1.6128305466619235, "grad_norm": 0.8458449082827031, "learning_rate": 3.385528066238648e-05, "loss": 0.1362, "step": 13601 }, { "epoch": 1.6129491284240483, "grad_norm": 1.0572467947020574, "learning_rate": 3.3853035886786675e-05, "loss": 0.1928, "step": 13602 }, { "epoch": 1.6130677101861735, "grad_norm": 1.1006473571029887, "learning_rate": 3.385079102957274e-05, "loss": 0.2142, "step": 13603 }, { "epoch": 1.6131862919482982, "grad_norm": 0.9324211747258991, "learning_rate": 3.384854609076536e-05, "loss": 0.1968, "step": 13604 }, { "epoch": 1.6133048737104234, "grad_norm": 2.5564974793817843, "learning_rate": 3.384630107038523e-05, "loss": 0.4576, "step": 13605 }, { "epoch": 1.6134234554725482, "grad_norm": 0.9576276124380596, "learning_rate": 3.384405596845306e-05, "loss": 0.1624, "step": 13606 }, { "epoch": 1.6135420372346734, "grad_norm": 0.7496703243729987, "learning_rate": 3.384181078498954e-05, "loss": 0.1593, "step": 13607 }, { "epoch": 1.6136606189967981, "grad_norm": 0.9469569600337734, "learning_rate": 3.383956552001536e-05, "loss": 0.2052, "step": 13608 }, { "epoch": 1.6137792007589233, "grad_norm": 1.0337092768983647, "learning_rate": 3.383732017355123e-05, "loss": 0.2379, "step": 13609 }, { "epoch": 1.613897782521048, "grad_norm": 1.3485018596516543, "learning_rate": 3.383507474561785e-05, "loss": 0.3628, "step": 13610 }, { "epoch": 1.6140163642831733, "grad_norm": 1.1649486844957613, "learning_rate": 3.383282923623591e-05, "loss": 0.2259, "step": 13611 }, { "epoch": 1.6141349460452983, "grad_norm": 1.0235728135006552, "learning_rate": 3.383058364542611e-05, "loss": 0.2752, "step": 13612 }, { "epoch": 1.6142535278074233, "grad_norm": 1.2034689234573563, "learning_rate": 3.382833797320917e-05, "loss": 0.2488, "step": 13613 }, { "epoch": 1.6143721095695482, "grad_norm": 0.9116858919611394, "learning_rate": 3.382609221960576e-05, "loss": 0.1992, "step": 13614 }, { "epoch": 1.6144906913316732, "grad_norm": 1.0119294344840337, "learning_rate": 3.382384638463662e-05, "loss": 0.2244, "step": 13615 }, { "epoch": 1.6146092730937982, "grad_norm": 0.7971381777427082, "learning_rate": 3.382160046832243e-05, "loss": 0.1531, "step": 13616 }, { "epoch": 1.6147278548559232, "grad_norm": 0.7946280995673286, "learning_rate": 3.3819354470683914e-05, "loss": 0.174, "step": 13617 }, { "epoch": 1.6148464366180482, "grad_norm": 0.8876352261563554, "learning_rate": 3.3817108391741754e-05, "loss": 0.1513, "step": 13618 }, { "epoch": 1.6149650183801731, "grad_norm": 0.9969255590467208, "learning_rate": 3.381486223151667e-05, "loss": 0.2378, "step": 13619 }, { "epoch": 1.6150836001422981, "grad_norm": 0.9102081034310795, "learning_rate": 3.3812615990029364e-05, "loss": 0.2255, "step": 13620 }, { "epoch": 1.615202181904423, "grad_norm": 0.9141708707197608, "learning_rate": 3.3810369667300544e-05, "loss": 0.1808, "step": 13621 }, { "epoch": 1.615320763666548, "grad_norm": 1.0801083309084234, "learning_rate": 3.380812326335092e-05, "loss": 0.1811, "step": 13622 }, { "epoch": 1.615439345428673, "grad_norm": 0.8539744516917497, "learning_rate": 3.380587677820121e-05, "loss": 0.211, "step": 13623 }, { "epoch": 1.615557927190798, "grad_norm": 0.8491010028084615, "learning_rate": 3.38036302118721e-05, "loss": 0.2068, "step": 13624 }, { "epoch": 1.615676508952923, "grad_norm": 1.232066113800596, "learning_rate": 3.3801383564384326e-05, "loss": 0.2229, "step": 13625 }, { "epoch": 1.615795090715048, "grad_norm": 1.163170310118204, "learning_rate": 3.379913683575858e-05, "loss": 0.2366, "step": 13626 }, { "epoch": 1.615913672477173, "grad_norm": 0.9876741576608851, "learning_rate": 3.379689002601558e-05, "loss": 0.2385, "step": 13627 }, { "epoch": 1.616032254239298, "grad_norm": 0.8380318077750708, "learning_rate": 3.379464313517606e-05, "loss": 0.1902, "step": 13628 }, { "epoch": 1.616150836001423, "grad_norm": 0.9556700246203035, "learning_rate": 3.3792396163260696e-05, "loss": 0.1538, "step": 13629 }, { "epoch": 1.616269417763548, "grad_norm": 0.9471218654521043, "learning_rate": 3.379014911029023e-05, "loss": 0.2147, "step": 13630 }, { "epoch": 1.616387999525673, "grad_norm": 0.7154635155848712, "learning_rate": 3.378790197628537e-05, "loss": 0.125, "step": 13631 }, { "epoch": 1.616506581287798, "grad_norm": 1.4691132549086028, "learning_rate": 3.378565476126683e-05, "loss": 0.3429, "step": 13632 }, { "epoch": 1.6166251630499229, "grad_norm": 1.1237551907931338, "learning_rate": 3.378340746525532e-05, "loss": 0.2205, "step": 13633 }, { "epoch": 1.6167437448120479, "grad_norm": 0.6663581814621451, "learning_rate": 3.378116008827157e-05, "loss": 0.1489, "step": 13634 }, { "epoch": 1.6168623265741728, "grad_norm": 0.6334973709544391, "learning_rate": 3.377891263033629e-05, "loss": 0.137, "step": 13635 }, { "epoch": 1.616980908336298, "grad_norm": 1.1126421170450689, "learning_rate": 3.37766650914702e-05, "loss": 0.2472, "step": 13636 }, { "epoch": 1.6170994900984228, "grad_norm": 0.8748109937749801, "learning_rate": 3.377441747169402e-05, "loss": 0.2094, "step": 13637 }, { "epoch": 1.617218071860548, "grad_norm": 0.938285937442134, "learning_rate": 3.3772169771028474e-05, "loss": 0.2149, "step": 13638 }, { "epoch": 1.6173366536226728, "grad_norm": 0.915445794994883, "learning_rate": 3.376992198949428e-05, "loss": 0.2014, "step": 13639 }, { "epoch": 1.617455235384798, "grad_norm": 1.2191013802450434, "learning_rate": 3.3767674127112155e-05, "loss": 0.2428, "step": 13640 }, { "epoch": 1.6175738171469227, "grad_norm": 1.0622185770943817, "learning_rate": 3.376542618390283e-05, "loss": 0.2307, "step": 13641 }, { "epoch": 1.617692398909048, "grad_norm": 0.8969996372962068, "learning_rate": 3.376317815988702e-05, "loss": 0.1738, "step": 13642 }, { "epoch": 1.6178109806711727, "grad_norm": 0.9060639817723778, "learning_rate": 3.376093005508546e-05, "loss": 0.2205, "step": 13643 }, { "epoch": 1.6179295624332979, "grad_norm": 0.9315818016114056, "learning_rate": 3.375868186951887e-05, "loss": 0.1996, "step": 13644 }, { "epoch": 1.6180481441954226, "grad_norm": 1.5527058719801659, "learning_rate": 3.3756433603207965e-05, "loss": 0.3372, "step": 13645 }, { "epoch": 1.6181667259575478, "grad_norm": 0.8499769363408511, "learning_rate": 3.375418525617348e-05, "loss": 0.1696, "step": 13646 }, { "epoch": 1.6182853077196726, "grad_norm": 0.8235942486906423, "learning_rate": 3.3751936828436145e-05, "loss": 0.1687, "step": 13647 }, { "epoch": 1.6184038894817978, "grad_norm": 0.8739519026088557, "learning_rate": 3.374968832001669e-05, "loss": 0.2089, "step": 13648 }, { "epoch": 1.6185224712439226, "grad_norm": 0.9829261735216673, "learning_rate": 3.3747439730935835e-05, "loss": 0.1619, "step": 13649 }, { "epoch": 1.6186410530060478, "grad_norm": 1.216753712074762, "learning_rate": 3.374519106121431e-05, "loss": 0.2623, "step": 13650 }, { "epoch": 1.6187596347681725, "grad_norm": 0.9858459730372046, "learning_rate": 3.3742942310872855e-05, "loss": 0.1781, "step": 13651 }, { "epoch": 1.6188782165302977, "grad_norm": 1.7411001845849785, "learning_rate": 3.374069347993218e-05, "loss": 0.3463, "step": 13652 }, { "epoch": 1.6189967982924225, "grad_norm": 1.4723357497902192, "learning_rate": 3.373844456841305e-05, "loss": 0.3116, "step": 13653 }, { "epoch": 1.6191153800545477, "grad_norm": 1.267954642852169, "learning_rate": 3.373619557633616e-05, "loss": 0.2037, "step": 13654 }, { "epoch": 1.6192339618166725, "grad_norm": 0.871727794910229, "learning_rate": 3.373394650372226e-05, "loss": 0.1903, "step": 13655 }, { "epoch": 1.6193525435787977, "grad_norm": 0.7374357485001722, "learning_rate": 3.3731697350592086e-05, "loss": 0.1556, "step": 13656 }, { "epoch": 1.6194711253409224, "grad_norm": 1.1286260384082023, "learning_rate": 3.372944811696637e-05, "loss": 0.2725, "step": 13657 }, { "epoch": 1.6195897071030476, "grad_norm": 0.9243529784499751, "learning_rate": 3.372719880286585e-05, "loss": 0.2114, "step": 13658 }, { "epoch": 1.6197082888651724, "grad_norm": 0.8707619944140542, "learning_rate": 3.372494940831126e-05, "loss": 0.2119, "step": 13659 }, { "epoch": 1.6198268706272976, "grad_norm": 0.8030247233535237, "learning_rate": 3.372269993332333e-05, "loss": 0.1473, "step": 13660 }, { "epoch": 1.6199454523894226, "grad_norm": 0.9684179055424077, "learning_rate": 3.3720450377922804e-05, "loss": 0.191, "step": 13661 }, { "epoch": 1.6200640341515475, "grad_norm": 1.139724217614926, "learning_rate": 3.371820074213042e-05, "loss": 0.2273, "step": 13662 }, { "epoch": 1.6201826159136725, "grad_norm": 1.0053170911613099, "learning_rate": 3.371595102596692e-05, "loss": 0.2145, "step": 13663 }, { "epoch": 1.6203011976757975, "grad_norm": 1.2872698819670092, "learning_rate": 3.371370122945304e-05, "loss": 0.3245, "step": 13664 }, { "epoch": 1.6204197794379225, "grad_norm": 0.9084503813914308, "learning_rate": 3.371145135260951e-05, "loss": 0.2285, "step": 13665 }, { "epoch": 1.6205383612000475, "grad_norm": 1.0601116024385333, "learning_rate": 3.370920139545709e-05, "loss": 0.2723, "step": 13666 }, { "epoch": 1.6206569429621724, "grad_norm": 1.1194379840375634, "learning_rate": 3.3706951358016514e-05, "loss": 0.3247, "step": 13667 }, { "epoch": 1.6207755247242974, "grad_norm": 0.8360860860138564, "learning_rate": 3.3704701240308525e-05, "loss": 0.1669, "step": 13668 }, { "epoch": 1.6208941064864224, "grad_norm": 1.0024386428734928, "learning_rate": 3.370245104235386e-05, "loss": 0.2479, "step": 13669 }, { "epoch": 1.6210126882485474, "grad_norm": 0.9855184347802448, "learning_rate": 3.370020076417327e-05, "loss": 0.1785, "step": 13670 }, { "epoch": 1.6211312700106724, "grad_norm": 0.7857733107456513, "learning_rate": 3.3697950405787496e-05, "loss": 0.1812, "step": 13671 }, { "epoch": 1.6212498517727973, "grad_norm": 1.2717285673427725, "learning_rate": 3.369569996721729e-05, "loss": 0.379, "step": 13672 }, { "epoch": 1.6213684335349223, "grad_norm": 0.8184735091620444, "learning_rate": 3.3693449448483385e-05, "loss": 0.1967, "step": 13673 }, { "epoch": 1.6214870152970473, "grad_norm": 1.05557726058533, "learning_rate": 3.369119884960654e-05, "loss": 0.2475, "step": 13674 }, { "epoch": 1.6216055970591723, "grad_norm": 0.8918837929186254, "learning_rate": 3.3688948170607506e-05, "loss": 0.1515, "step": 13675 }, { "epoch": 1.6217241788212973, "grad_norm": 1.5871167293069581, "learning_rate": 3.368669741150702e-05, "loss": 0.3201, "step": 13676 }, { "epoch": 1.6218427605834222, "grad_norm": 0.8295198969817767, "learning_rate": 3.3684446572325835e-05, "loss": 0.2048, "step": 13677 }, { "epoch": 1.6219613423455472, "grad_norm": 1.0011743620673579, "learning_rate": 3.3682195653084704e-05, "loss": 0.1775, "step": 13678 }, { "epoch": 1.6220799241076722, "grad_norm": 0.8083765229905293, "learning_rate": 3.367994465380438e-05, "loss": 0.14, "step": 13679 }, { "epoch": 1.6221985058697972, "grad_norm": 1.1256861256571837, "learning_rate": 3.3677693574505604e-05, "loss": 0.2756, "step": 13680 }, { "epoch": 1.6223170876319222, "grad_norm": 0.7687276890863579, "learning_rate": 3.3675442415209135e-05, "loss": 0.1671, "step": 13681 }, { "epoch": 1.6224356693940472, "grad_norm": 1.6201980872883666, "learning_rate": 3.3673191175935726e-05, "loss": 0.3887, "step": 13682 }, { "epoch": 1.6225542511561721, "grad_norm": 1.09592672004351, "learning_rate": 3.367093985670613e-05, "loss": 0.2065, "step": 13683 }, { "epoch": 1.6226728329182971, "grad_norm": 0.9426105707307836, "learning_rate": 3.36686884575411e-05, "loss": 0.2428, "step": 13684 }, { "epoch": 1.6227914146804223, "grad_norm": 0.8571974409893841, "learning_rate": 3.36664369784614e-05, "loss": 0.2022, "step": 13685 }, { "epoch": 1.622909996442547, "grad_norm": 1.3707536227124213, "learning_rate": 3.366418541948777e-05, "loss": 0.3424, "step": 13686 }, { "epoch": 1.6230285782046723, "grad_norm": 1.1490551981078874, "learning_rate": 3.366193378064098e-05, "loss": 0.244, "step": 13687 }, { "epoch": 1.623147159966797, "grad_norm": 0.8777329585583704, "learning_rate": 3.365968206194178e-05, "loss": 0.1756, "step": 13688 }, { "epoch": 1.6232657417289222, "grad_norm": 0.99789026074068, "learning_rate": 3.365743026341093e-05, "loss": 0.1669, "step": 13689 }, { "epoch": 1.623384323491047, "grad_norm": 0.9797915397397243, "learning_rate": 3.3655178385069197e-05, "loss": 0.2385, "step": 13690 }, { "epoch": 1.6235029052531722, "grad_norm": 0.8450567483241379, "learning_rate": 3.365292642693732e-05, "loss": 0.1593, "step": 13691 }, { "epoch": 1.623621487015297, "grad_norm": 1.1075711241837154, "learning_rate": 3.365067438903609e-05, "loss": 0.223, "step": 13692 }, { "epoch": 1.6237400687774222, "grad_norm": 0.7620249215726593, "learning_rate": 3.364842227138624e-05, "loss": 0.1936, "step": 13693 }, { "epoch": 1.623858650539547, "grad_norm": 0.7810793572857115, "learning_rate": 3.3646170074008536e-05, "loss": 0.1404, "step": 13694 }, { "epoch": 1.6239772323016721, "grad_norm": 1.0271868094004033, "learning_rate": 3.364391779692375e-05, "loss": 0.1893, "step": 13695 }, { "epoch": 1.6240958140637969, "grad_norm": 1.1432133151637889, "learning_rate": 3.3641665440152646e-05, "loss": 0.2429, "step": 13696 }, { "epoch": 1.624214395825922, "grad_norm": 0.7382413316272385, "learning_rate": 3.3639413003715985e-05, "loss": 0.1888, "step": 13697 }, { "epoch": 1.6243329775880468, "grad_norm": 0.9068104451250998, "learning_rate": 3.3637160487634524e-05, "loss": 0.1428, "step": 13698 }, { "epoch": 1.624451559350172, "grad_norm": 1.2544357807613191, "learning_rate": 3.3634907891929046e-05, "loss": 0.2671, "step": 13699 }, { "epoch": 1.6245701411122968, "grad_norm": 1.0386748257401675, "learning_rate": 3.36326552166203e-05, "loss": 0.2134, "step": 13700 }, { "epoch": 1.624688722874422, "grad_norm": 0.7479820648047979, "learning_rate": 3.363040246172905e-05, "loss": 0.1492, "step": 13701 }, { "epoch": 1.6248073046365468, "grad_norm": 0.908190874567819, "learning_rate": 3.362814962727608e-05, "loss": 0.162, "step": 13702 }, { "epoch": 1.624925886398672, "grad_norm": 1.085345193169849, "learning_rate": 3.362589671328216e-05, "loss": 0.1992, "step": 13703 }, { "epoch": 1.6250444681607967, "grad_norm": 1.109566289833208, "learning_rate": 3.362364371976804e-05, "loss": 0.2168, "step": 13704 }, { "epoch": 1.625163049922922, "grad_norm": 1.7251536466881205, "learning_rate": 3.3621390646754496e-05, "loss": 0.3641, "step": 13705 }, { "epoch": 1.6252816316850467, "grad_norm": 0.9940243193765376, "learning_rate": 3.3619137494262304e-05, "loss": 0.207, "step": 13706 }, { "epoch": 1.625400213447172, "grad_norm": 0.9428053595571985, "learning_rate": 3.361688426231224e-05, "loss": 0.1904, "step": 13707 }, { "epoch": 1.6255187952092967, "grad_norm": 1.3344008206074287, "learning_rate": 3.3614630950925066e-05, "loss": 0.2656, "step": 13708 }, { "epoch": 1.6256373769714219, "grad_norm": 0.9405180588123065, "learning_rate": 3.3612377560121564e-05, "loss": 0.1636, "step": 13709 }, { "epoch": 1.6257559587335468, "grad_norm": 1.90621440343506, "learning_rate": 3.361012408992249e-05, "loss": 0.3802, "step": 13710 }, { "epoch": 1.6258745404956718, "grad_norm": 1.083135816551555, "learning_rate": 3.3607870540348655e-05, "loss": 0.1819, "step": 13711 }, { "epoch": 1.6259931222577968, "grad_norm": 0.9926448463457774, "learning_rate": 3.3605616911420786e-05, "loss": 0.1691, "step": 13712 }, { "epoch": 1.6261117040199218, "grad_norm": 1.0127348348637626, "learning_rate": 3.360336320315968e-05, "loss": 0.1979, "step": 13713 }, { "epoch": 1.6262302857820468, "grad_norm": 1.1247203056989743, "learning_rate": 3.360110941558613e-05, "loss": 0.1996, "step": 13714 }, { "epoch": 1.6263488675441717, "grad_norm": 1.1884838810851956, "learning_rate": 3.3598855548720884e-05, "loss": 0.2213, "step": 13715 }, { "epoch": 1.6264674493062967, "grad_norm": 0.6770383965193688, "learning_rate": 3.359660160258475e-05, "loss": 0.1158, "step": 13716 }, { "epoch": 1.6265860310684217, "grad_norm": 0.7327824011959738, "learning_rate": 3.359434757719848e-05, "loss": 0.1525, "step": 13717 }, { "epoch": 1.6267046128305467, "grad_norm": 0.7802458980463394, "learning_rate": 3.359209347258288e-05, "loss": 0.182, "step": 13718 }, { "epoch": 1.6268231945926717, "grad_norm": 0.7671265860638286, "learning_rate": 3.35898392887587e-05, "loss": 0.1706, "step": 13719 }, { "epoch": 1.6269417763547966, "grad_norm": 0.6861152293135646, "learning_rate": 3.3587585025746734e-05, "loss": 0.157, "step": 13720 }, { "epoch": 1.6270603581169216, "grad_norm": 0.9985896885354253, "learning_rate": 3.3585330683567766e-05, "loss": 0.1979, "step": 13721 }, { "epoch": 1.6271789398790466, "grad_norm": 1.589899374330507, "learning_rate": 3.358307626224259e-05, "loss": 0.2985, "step": 13722 }, { "epoch": 1.6272975216411716, "grad_norm": 1.0333465580108316, "learning_rate": 3.3580821761791956e-05, "loss": 0.247, "step": 13723 }, { "epoch": 1.6274161034032966, "grad_norm": 1.2963601545261456, "learning_rate": 3.357856718223668e-05, "loss": 0.2525, "step": 13724 }, { "epoch": 1.6275346851654215, "grad_norm": 0.9499510709899827, "learning_rate": 3.357631252359753e-05, "loss": 0.1866, "step": 13725 }, { "epoch": 1.6276532669275465, "grad_norm": 0.649623491850891, "learning_rate": 3.35740577858953e-05, "loss": 0.1671, "step": 13726 }, { "epoch": 1.6277718486896715, "grad_norm": 0.9407060548933799, "learning_rate": 3.3571802969150766e-05, "loss": 0.2154, "step": 13727 }, { "epoch": 1.6278904304517965, "grad_norm": 1.0169629598431156, "learning_rate": 3.356954807338473e-05, "loss": 0.2442, "step": 13728 }, { "epoch": 1.6280090122139215, "grad_norm": 1.0747822166390397, "learning_rate": 3.3567293098617966e-05, "loss": 0.2695, "step": 13729 }, { "epoch": 1.6281275939760464, "grad_norm": 1.1340566883835383, "learning_rate": 3.356503804487126e-05, "loss": 0.2189, "step": 13730 }, { "epoch": 1.6282461757381714, "grad_norm": 1.380719376809305, "learning_rate": 3.356278291216541e-05, "loss": 0.3575, "step": 13731 }, { "epoch": 1.6283647575002964, "grad_norm": 1.215477346044068, "learning_rate": 3.3560527700521196e-05, "loss": 0.2562, "step": 13732 }, { "epoch": 1.6284833392624214, "grad_norm": 1.0944361030348233, "learning_rate": 3.3558272409959424e-05, "loss": 0.1915, "step": 13733 }, { "epoch": 1.6286019210245464, "grad_norm": 1.60270632031045, "learning_rate": 3.355601704050086e-05, "loss": 0.4594, "step": 13734 }, { "epoch": 1.6287205027866714, "grad_norm": 1.055078226526997, "learning_rate": 3.355376159216633e-05, "loss": 0.2079, "step": 13735 }, { "epoch": 1.6288390845487966, "grad_norm": 1.1762846694768332, "learning_rate": 3.35515060649766e-05, "loss": 0.1996, "step": 13736 }, { "epoch": 1.6289576663109213, "grad_norm": 1.2359519121642237, "learning_rate": 3.354925045895247e-05, "loss": 0.2293, "step": 13737 }, { "epoch": 1.6290762480730465, "grad_norm": 0.9027008031495793, "learning_rate": 3.3546994774114726e-05, "loss": 0.1775, "step": 13738 }, { "epoch": 1.6291948298351713, "grad_norm": 0.6659833684438726, "learning_rate": 3.3544739010484185e-05, "loss": 0.1611, "step": 13739 }, { "epoch": 1.6293134115972965, "grad_norm": 0.8694023271797118, "learning_rate": 3.354248316808162e-05, "loss": 0.1757, "step": 13740 }, { "epoch": 1.6294319933594212, "grad_norm": 0.7545629066106005, "learning_rate": 3.354022724692783e-05, "loss": 0.1917, "step": 13741 }, { "epoch": 1.6295505751215464, "grad_norm": 0.9121861799578702, "learning_rate": 3.3537971247043634e-05, "loss": 0.1582, "step": 13742 }, { "epoch": 1.6296691568836712, "grad_norm": 1.7021391666069037, "learning_rate": 3.353571516844981e-05, "loss": 0.3677, "step": 13743 }, { "epoch": 1.6297877386457964, "grad_norm": 1.1203927963031186, "learning_rate": 3.353345901116715e-05, "loss": 0.2037, "step": 13744 }, { "epoch": 1.6299063204079212, "grad_norm": 1.127484860047878, "learning_rate": 3.3531202775216467e-05, "loss": 0.2095, "step": 13745 }, { "epoch": 1.6300249021700464, "grad_norm": 0.9851948389590602, "learning_rate": 3.352894646061855e-05, "loss": 0.1557, "step": 13746 }, { "epoch": 1.6301434839321711, "grad_norm": 0.7627398575204261, "learning_rate": 3.3526690067394215e-05, "loss": 0.1683, "step": 13747 }, { "epoch": 1.6302620656942963, "grad_norm": 1.2710316588812693, "learning_rate": 3.352443359556425e-05, "loss": 0.2423, "step": 13748 }, { "epoch": 1.630380647456421, "grad_norm": 0.8249804297505521, "learning_rate": 3.3522177045149464e-05, "loss": 0.1644, "step": 13749 }, { "epoch": 1.6304992292185463, "grad_norm": 0.8665920208066458, "learning_rate": 3.351992041617065e-05, "loss": 0.1699, "step": 13750 }, { "epoch": 1.630617810980671, "grad_norm": 0.8666714367893166, "learning_rate": 3.351766370864862e-05, "loss": 0.1889, "step": 13751 }, { "epoch": 1.6307363927427962, "grad_norm": 1.1035748284195952, "learning_rate": 3.3515406922604174e-05, "loss": 0.2543, "step": 13752 }, { "epoch": 1.630854974504921, "grad_norm": 0.9771533650475324, "learning_rate": 3.351315005805812e-05, "loss": 0.2236, "step": 13753 }, { "epoch": 1.6309735562670462, "grad_norm": 1.803395780928687, "learning_rate": 3.351089311503126e-05, "loss": 0.3571, "step": 13754 }, { "epoch": 1.631092138029171, "grad_norm": 0.8719188826237051, "learning_rate": 3.3508636093544406e-05, "loss": 0.1757, "step": 13755 }, { "epoch": 1.6312107197912962, "grad_norm": 1.1650072272565815, "learning_rate": 3.3506378993618356e-05, "loss": 0.2223, "step": 13756 }, { "epoch": 1.631329301553421, "grad_norm": 1.0642389061617772, "learning_rate": 3.350412181527393e-05, "loss": 0.1976, "step": 13757 }, { "epoch": 1.6314478833155461, "grad_norm": 0.8147563179423233, "learning_rate": 3.3501864558531926e-05, "loss": 0.1665, "step": 13758 }, { "epoch": 1.631566465077671, "grad_norm": 1.534636508181445, "learning_rate": 3.349960722341315e-05, "loss": 0.3018, "step": 13759 }, { "epoch": 1.631685046839796, "grad_norm": 0.9075494274052961, "learning_rate": 3.349734980993843e-05, "loss": 0.1571, "step": 13760 }, { "epoch": 1.631803628601921, "grad_norm": 1.4695273850599475, "learning_rate": 3.349509231812856e-05, "loss": 0.2688, "step": 13761 }, { "epoch": 1.631922210364046, "grad_norm": 0.9756912059370443, "learning_rate": 3.349283474800435e-05, "loss": 0.1837, "step": 13762 }, { "epoch": 1.632040792126171, "grad_norm": 0.7838181746426034, "learning_rate": 3.349057709958663e-05, "loss": 0.1675, "step": 13763 }, { "epoch": 1.632159373888296, "grad_norm": 0.8514098426739066, "learning_rate": 3.3488319372896193e-05, "loss": 0.1627, "step": 13764 }, { "epoch": 1.632277955650421, "grad_norm": 0.7987923359328409, "learning_rate": 3.3486061567953864e-05, "loss": 0.1493, "step": 13765 }, { "epoch": 1.632396537412546, "grad_norm": 0.8226991177941013, "learning_rate": 3.348380368478045e-05, "loss": 0.1626, "step": 13766 }, { "epoch": 1.632515119174671, "grad_norm": 0.8390393213660257, "learning_rate": 3.348154572339677e-05, "loss": 0.1666, "step": 13767 }, { "epoch": 1.632633700936796, "grad_norm": 1.0701953910350404, "learning_rate": 3.3479287683823645e-05, "loss": 0.2242, "step": 13768 }, { "epoch": 1.632752282698921, "grad_norm": 0.9561159083701002, "learning_rate": 3.347702956608188e-05, "loss": 0.2188, "step": 13769 }, { "epoch": 1.632870864461046, "grad_norm": 1.1985856348078674, "learning_rate": 3.34747713701923e-05, "loss": 0.2357, "step": 13770 }, { "epoch": 1.6329894462231709, "grad_norm": 0.592240433646659, "learning_rate": 3.3472513096175716e-05, "loss": 0.1137, "step": 13771 }, { "epoch": 1.6331080279852959, "grad_norm": 1.3643913516169943, "learning_rate": 3.3470254744052956e-05, "loss": 0.3056, "step": 13772 }, { "epoch": 1.6332266097474208, "grad_norm": 0.9262575336299044, "learning_rate": 3.346799631384484e-05, "loss": 0.1786, "step": 13773 }, { "epoch": 1.6333451915095458, "grad_norm": 0.9918169672841541, "learning_rate": 3.346573780557217e-05, "loss": 0.2055, "step": 13774 }, { "epoch": 1.6334637732716708, "grad_norm": 0.7449651844058982, "learning_rate": 3.346347921925578e-05, "loss": 0.1713, "step": 13775 }, { "epoch": 1.6335823550337958, "grad_norm": 0.8010099071540902, "learning_rate": 3.34612205549165e-05, "loss": 0.1773, "step": 13776 }, { "epoch": 1.6337009367959208, "grad_norm": 1.2262407932251258, "learning_rate": 3.345896181257513e-05, "loss": 0.2869, "step": 13777 }, { "epoch": 1.6338195185580457, "grad_norm": 0.9640538297597379, "learning_rate": 3.345670299225252e-05, "loss": 0.2111, "step": 13778 }, { "epoch": 1.6339381003201707, "grad_norm": 0.8275757836018959, "learning_rate": 3.345444409396946e-05, "loss": 0.1268, "step": 13779 }, { "epoch": 1.6340566820822957, "grad_norm": 0.8268227149554533, "learning_rate": 3.3452185117746806e-05, "loss": 0.1735, "step": 13780 }, { "epoch": 1.6341752638444207, "grad_norm": 0.7760739403861777, "learning_rate": 3.344992606360536e-05, "loss": 0.1791, "step": 13781 }, { "epoch": 1.6342938456065457, "grad_norm": 0.7381002693773655, "learning_rate": 3.344766693156598e-05, "loss": 0.1617, "step": 13782 }, { "epoch": 1.6344124273686707, "grad_norm": 0.853393498544644, "learning_rate": 3.344540772164945e-05, "loss": 0.161, "step": 13783 }, { "epoch": 1.6345310091307956, "grad_norm": 0.9931689474341445, "learning_rate": 3.344314843387661e-05, "loss": 0.1631, "step": 13784 }, { "epoch": 1.6346495908929208, "grad_norm": 1.0301050951169208, "learning_rate": 3.344088906826831e-05, "loss": 0.2505, "step": 13785 }, { "epoch": 1.6347681726550456, "grad_norm": 0.6251763303023079, "learning_rate": 3.343862962484536e-05, "loss": 0.142, "step": 13786 }, { "epoch": 1.6348867544171708, "grad_norm": 0.9737059012586274, "learning_rate": 3.3436370103628594e-05, "loss": 0.1803, "step": 13787 }, { "epoch": 1.6350053361792956, "grad_norm": 1.1205057269626946, "learning_rate": 3.343411050463884e-05, "loss": 0.1687, "step": 13788 }, { "epoch": 1.6351239179414208, "grad_norm": 1.0265850994022747, "learning_rate": 3.343185082789693e-05, "loss": 0.2144, "step": 13789 }, { "epoch": 1.6352424997035455, "grad_norm": 0.9853557178583822, "learning_rate": 3.3429591073423694e-05, "loss": 0.1956, "step": 13790 }, { "epoch": 1.6353610814656707, "grad_norm": 0.7870502954286949, "learning_rate": 3.342733124123997e-05, "loss": 0.1384, "step": 13791 }, { "epoch": 1.6354796632277955, "grad_norm": 1.3150673772233914, "learning_rate": 3.342507133136659e-05, "loss": 0.2602, "step": 13792 }, { "epoch": 1.6355982449899207, "grad_norm": 1.6570515947376168, "learning_rate": 3.3422811343824366e-05, "loss": 0.2699, "step": 13793 }, { "epoch": 1.6357168267520454, "grad_norm": 0.7780657506532218, "learning_rate": 3.342055127863416e-05, "loss": 0.1377, "step": 13794 }, { "epoch": 1.6358354085141706, "grad_norm": 0.9047418022178666, "learning_rate": 3.341829113581681e-05, "loss": 0.2104, "step": 13795 }, { "epoch": 1.6359539902762954, "grad_norm": 0.9812209423711028, "learning_rate": 3.341603091539312e-05, "loss": 0.1616, "step": 13796 }, { "epoch": 1.6360725720384206, "grad_norm": 0.8370606591498236, "learning_rate": 3.341377061738395e-05, "loss": 0.1263, "step": 13797 }, { "epoch": 1.6361911538005454, "grad_norm": 1.4463848587658161, "learning_rate": 3.341151024181014e-05, "loss": 0.2098, "step": 13798 }, { "epoch": 1.6363097355626706, "grad_norm": 0.9745455336227403, "learning_rate": 3.340924978869251e-05, "loss": 0.1782, "step": 13799 }, { "epoch": 1.6364283173247953, "grad_norm": 1.1556016282354855, "learning_rate": 3.340698925805192e-05, "loss": 0.2651, "step": 13800 }, { "epoch": 1.6365468990869205, "grad_norm": 1.2875945476240973, "learning_rate": 3.340472864990919e-05, "loss": 0.2081, "step": 13801 }, { "epoch": 1.6366654808490453, "grad_norm": 1.0189596392781486, "learning_rate": 3.340246796428517e-05, "loss": 0.223, "step": 13802 }, { "epoch": 1.6367840626111705, "grad_norm": 0.8986468946541689, "learning_rate": 3.340020720120071e-05, "loss": 0.1841, "step": 13803 }, { "epoch": 1.6369026443732952, "grad_norm": 0.9977372690294354, "learning_rate": 3.3397946360676624e-05, "loss": 0.1824, "step": 13804 }, { "epoch": 1.6370212261354204, "grad_norm": 1.0007967336062522, "learning_rate": 3.339568544273377e-05, "loss": 0.2078, "step": 13805 }, { "epoch": 1.6371398078975452, "grad_norm": 0.9573606853946208, "learning_rate": 3.339342444739301e-05, "loss": 0.1526, "step": 13806 }, { "epoch": 1.6372583896596704, "grad_norm": 0.9080444191161291, "learning_rate": 3.3391163374675156e-05, "loss": 0.177, "step": 13807 }, { "epoch": 1.6373769714217952, "grad_norm": 1.5448273269789141, "learning_rate": 3.338890222460107e-05, "loss": 0.3354, "step": 13808 }, { "epoch": 1.6374955531839204, "grad_norm": 1.384779326481159, "learning_rate": 3.3386640997191586e-05, "loss": 0.2624, "step": 13809 }, { "epoch": 1.6376141349460454, "grad_norm": 0.6840106123021105, "learning_rate": 3.338437969246757e-05, "loss": 0.1352, "step": 13810 }, { "epoch": 1.6377327167081703, "grad_norm": 0.9562150873660082, "learning_rate": 3.3382118310449836e-05, "loss": 0.2099, "step": 13811 }, { "epoch": 1.6378512984702953, "grad_norm": 0.9429511500239804, "learning_rate": 3.3379856851159267e-05, "loss": 0.1995, "step": 13812 }, { "epoch": 1.6379698802324203, "grad_norm": 0.944505014751812, "learning_rate": 3.337759531461668e-05, "loss": 0.201, "step": 13813 }, { "epoch": 1.6380884619945453, "grad_norm": 0.7830441918315074, "learning_rate": 3.337533370084295e-05, "loss": 0.1863, "step": 13814 }, { "epoch": 1.6382070437566703, "grad_norm": 1.007904047970829, "learning_rate": 3.3373072009858905e-05, "loss": 0.2015, "step": 13815 }, { "epoch": 1.6383256255187952, "grad_norm": 0.9659261930176246, "learning_rate": 3.3370810241685405e-05, "loss": 0.2136, "step": 13816 }, { "epoch": 1.6384442072809202, "grad_norm": 0.8376393800558297, "learning_rate": 3.3368548396343306e-05, "loss": 0.1623, "step": 13817 }, { "epoch": 1.6385627890430452, "grad_norm": 0.6537707457930788, "learning_rate": 3.336628647385345e-05, "loss": 0.1364, "step": 13818 }, { "epoch": 1.6386813708051702, "grad_norm": 1.0544648529577976, "learning_rate": 3.336402447423669e-05, "loss": 0.17, "step": 13819 }, { "epoch": 1.6387999525672952, "grad_norm": 0.9592757490926909, "learning_rate": 3.336176239751388e-05, "loss": 0.2087, "step": 13820 }, { "epoch": 1.6389185343294201, "grad_norm": 0.9265357364024033, "learning_rate": 3.3359500243705885e-05, "loss": 0.1557, "step": 13821 }, { "epoch": 1.6390371160915451, "grad_norm": 1.0366254103195818, "learning_rate": 3.3357238012833535e-05, "loss": 0.1667, "step": 13822 }, { "epoch": 1.63915569785367, "grad_norm": 0.8213776520878997, "learning_rate": 3.33549757049177e-05, "loss": 0.2342, "step": 13823 }, { "epoch": 1.639274279615795, "grad_norm": 0.9221586260141729, "learning_rate": 3.335271331997924e-05, "loss": 0.1246, "step": 13824 }, { "epoch": 1.63939286137792, "grad_norm": 1.0265484314393691, "learning_rate": 3.3350450858039004e-05, "loss": 0.2647, "step": 13825 }, { "epoch": 1.639511443140045, "grad_norm": 1.2220162756962376, "learning_rate": 3.3348188319117854e-05, "loss": 0.2595, "step": 13826 }, { "epoch": 1.63963002490217, "grad_norm": 0.8455125257904342, "learning_rate": 3.334592570323664e-05, "loss": 0.1616, "step": 13827 }, { "epoch": 1.639748606664295, "grad_norm": 0.93221857439907, "learning_rate": 3.334366301041623e-05, "loss": 0.2053, "step": 13828 }, { "epoch": 1.63986718842642, "grad_norm": 0.9065258833235378, "learning_rate": 3.334140024067748e-05, "loss": 0.2131, "step": 13829 }, { "epoch": 1.639985770188545, "grad_norm": 1.0120427881699554, "learning_rate": 3.333913739404125e-05, "loss": 0.232, "step": 13830 }, { "epoch": 1.64010435195067, "grad_norm": 0.9816444681386551, "learning_rate": 3.333687447052839e-05, "loss": 0.1769, "step": 13831 }, { "epoch": 1.640222933712795, "grad_norm": 0.8194731320585147, "learning_rate": 3.3334611470159774e-05, "loss": 0.1503, "step": 13832 }, { "epoch": 1.64034151547492, "grad_norm": 0.8416103477092939, "learning_rate": 3.333234839295626e-05, "loss": 0.1732, "step": 13833 }, { "epoch": 1.640460097237045, "grad_norm": 0.9022962059812977, "learning_rate": 3.333008523893871e-05, "loss": 0.2191, "step": 13834 }, { "epoch": 1.6405786789991699, "grad_norm": 1.2396835675307525, "learning_rate": 3.3327822008127996e-05, "loss": 0.2222, "step": 13835 }, { "epoch": 1.640697260761295, "grad_norm": 0.8950424490388157, "learning_rate": 3.332555870054498e-05, "loss": 0.2092, "step": 13836 }, { "epoch": 1.6408158425234198, "grad_norm": 1.570387363913678, "learning_rate": 3.332329531621051e-05, "loss": 0.314, "step": 13837 }, { "epoch": 1.640934424285545, "grad_norm": 1.2483607279222857, "learning_rate": 3.3321031855145476e-05, "loss": 0.195, "step": 13838 }, { "epoch": 1.6410530060476698, "grad_norm": 1.2094921960380967, "learning_rate": 3.331876831737072e-05, "loss": 0.2015, "step": 13839 }, { "epoch": 1.641171587809795, "grad_norm": 1.6582885557639762, "learning_rate": 3.331650470290712e-05, "loss": 0.3944, "step": 13840 }, { "epoch": 1.6412901695719198, "grad_norm": 0.8531035861818216, "learning_rate": 3.331424101177556e-05, "loss": 0.186, "step": 13841 }, { "epoch": 1.641408751334045, "grad_norm": 1.0711551570554148, "learning_rate": 3.3311977243996886e-05, "loss": 0.2175, "step": 13842 }, { "epoch": 1.6415273330961697, "grad_norm": 0.9740725785293818, "learning_rate": 3.3309713399591967e-05, "loss": 0.1725, "step": 13843 }, { "epoch": 1.641645914858295, "grad_norm": 0.8511476794468771, "learning_rate": 3.3307449478581685e-05, "loss": 0.181, "step": 13844 }, { "epoch": 1.6417644966204197, "grad_norm": 1.0255554701313219, "learning_rate": 3.3305185480986914e-05, "loss": 0.2371, "step": 13845 }, { "epoch": 1.6418830783825449, "grad_norm": 1.2344917940272402, "learning_rate": 3.33029214068285e-05, "loss": 0.2754, "step": 13846 }, { "epoch": 1.6420016601446696, "grad_norm": 1.0236394865855631, "learning_rate": 3.3300657256127354e-05, "loss": 0.1823, "step": 13847 }, { "epoch": 1.6421202419067948, "grad_norm": 0.6175532627495581, "learning_rate": 3.3298393028904315e-05, "loss": 0.1474, "step": 13848 }, { "epoch": 1.6422388236689196, "grad_norm": 0.9885405716833187, "learning_rate": 3.329612872518027e-05, "loss": 0.221, "step": 13849 }, { "epoch": 1.6423574054310448, "grad_norm": 1.013572765660701, "learning_rate": 3.329386434497609e-05, "loss": 0.2688, "step": 13850 }, { "epoch": 1.6424759871931696, "grad_norm": 0.8421649981453955, "learning_rate": 3.329159988831266e-05, "loss": 0.1908, "step": 13851 }, { "epoch": 1.6425945689552948, "grad_norm": 0.7017537414852232, "learning_rate": 3.328933535521084e-05, "loss": 0.1451, "step": 13852 }, { "epoch": 1.6427131507174195, "grad_norm": 0.8711357825753703, "learning_rate": 3.3287070745691506e-05, "loss": 0.1816, "step": 13853 }, { "epoch": 1.6428317324795447, "grad_norm": 1.0136839398157238, "learning_rate": 3.328480605977555e-05, "loss": 0.2081, "step": 13854 }, { "epoch": 1.6429503142416695, "grad_norm": 1.1757713093406859, "learning_rate": 3.328254129748384e-05, "loss": 0.23, "step": 13855 }, { "epoch": 1.6430688960037947, "grad_norm": 0.7302413228220652, "learning_rate": 3.3280276458837254e-05, "loss": 0.1519, "step": 13856 }, { "epoch": 1.6431874777659194, "grad_norm": 0.8595289396260452, "learning_rate": 3.327801154385667e-05, "loss": 0.1945, "step": 13857 }, { "epoch": 1.6433060595280446, "grad_norm": 1.0779159300957744, "learning_rate": 3.327574655256298e-05, "loss": 0.2641, "step": 13858 }, { "epoch": 1.6434246412901694, "grad_norm": 0.9263287691564698, "learning_rate": 3.3273481484977056e-05, "loss": 0.1679, "step": 13859 }, { "epoch": 1.6435432230522946, "grad_norm": 0.7378900284119562, "learning_rate": 3.3271216341119776e-05, "loss": 0.185, "step": 13860 }, { "epoch": 1.6436618048144196, "grad_norm": 0.7255506612740806, "learning_rate": 3.3268951121012015e-05, "loss": 0.1197, "step": 13861 }, { "epoch": 1.6437803865765446, "grad_norm": 1.7373345877610926, "learning_rate": 3.326668582467468e-05, "loss": 0.352, "step": 13862 }, { "epoch": 1.6438989683386696, "grad_norm": 1.173645121866731, "learning_rate": 3.326442045212863e-05, "loss": 0.1948, "step": 13863 }, { "epoch": 1.6440175501007945, "grad_norm": 0.7397754490423194, "learning_rate": 3.326215500339476e-05, "loss": 0.1418, "step": 13864 }, { "epoch": 1.6441361318629195, "grad_norm": 0.8633350511541966, "learning_rate": 3.3259889478493946e-05, "loss": 0.1684, "step": 13865 }, { "epoch": 1.6442547136250445, "grad_norm": 0.9904134991218374, "learning_rate": 3.325762387744709e-05, "loss": 0.1988, "step": 13866 }, { "epoch": 1.6443732953871695, "grad_norm": 0.8876527708041332, "learning_rate": 3.325535820027506e-05, "loss": 0.2124, "step": 13867 }, { "epoch": 1.6444918771492945, "grad_norm": 0.9838041879904412, "learning_rate": 3.3253092446998754e-05, "loss": 0.2075, "step": 13868 }, { "epoch": 1.6446104589114194, "grad_norm": 0.9377139613408977, "learning_rate": 3.325082661763905e-05, "loss": 0.2086, "step": 13869 }, { "epoch": 1.6447290406735444, "grad_norm": 0.7483597936105872, "learning_rate": 3.324856071221685e-05, "loss": 0.1556, "step": 13870 }, { "epoch": 1.6448476224356694, "grad_norm": 0.7995351637515854, "learning_rate": 3.3246294730753034e-05, "loss": 0.1476, "step": 13871 }, { "epoch": 1.6449662041977944, "grad_norm": 1.2097210158505574, "learning_rate": 3.3244028673268494e-05, "loss": 0.2289, "step": 13872 }, { "epoch": 1.6450847859599194, "grad_norm": 0.8989324211230567, "learning_rate": 3.324176253978412e-05, "loss": 0.1671, "step": 13873 }, { "epoch": 1.6452033677220443, "grad_norm": 0.7992434387108824, "learning_rate": 3.3239496330320794e-05, "loss": 0.1697, "step": 13874 }, { "epoch": 1.6453219494841693, "grad_norm": 1.1259092630612604, "learning_rate": 3.3237230044899424e-05, "loss": 0.1992, "step": 13875 }, { "epoch": 1.6454405312462943, "grad_norm": 0.9626395937509123, "learning_rate": 3.3234963683540886e-05, "loss": 0.2173, "step": 13876 }, { "epoch": 1.6455591130084193, "grad_norm": 1.0732032909617721, "learning_rate": 3.32326972462661e-05, "loss": 0.1572, "step": 13877 }, { "epoch": 1.6456776947705443, "grad_norm": 1.362162956814713, "learning_rate": 3.323043073309592e-05, "loss": 0.2987, "step": 13878 }, { "epoch": 1.6457962765326692, "grad_norm": 0.9271324243446668, "learning_rate": 3.322816414405128e-05, "loss": 0.1462, "step": 13879 }, { "epoch": 1.6459148582947942, "grad_norm": 1.1010887660870774, "learning_rate": 3.322589747915304e-05, "loss": 0.231, "step": 13880 }, { "epoch": 1.6460334400569192, "grad_norm": 0.8538060988663343, "learning_rate": 3.322363073842212e-05, "loss": 0.1419, "step": 13881 }, { "epoch": 1.6461520218190442, "grad_norm": 0.7558510836119439, "learning_rate": 3.3221363921879407e-05, "loss": 0.1497, "step": 13882 }, { "epoch": 1.6462706035811692, "grad_norm": 0.8286156253908981, "learning_rate": 3.32190970295458e-05, "loss": 0.2019, "step": 13883 }, { "epoch": 1.6463891853432941, "grad_norm": 0.8846299946023335, "learning_rate": 3.32168300614422e-05, "loss": 0.1769, "step": 13884 }, { "epoch": 1.6465077671054194, "grad_norm": 0.8879951630429925, "learning_rate": 3.32145630175895e-05, "loss": 0.1919, "step": 13885 }, { "epoch": 1.646626348867544, "grad_norm": 1.179404485853633, "learning_rate": 3.3212295898008596e-05, "loss": 0.2198, "step": 13886 }, { "epoch": 1.6467449306296693, "grad_norm": 1.3038771862252292, "learning_rate": 3.3210028702720406e-05, "loss": 0.2653, "step": 13887 }, { "epoch": 1.646863512391794, "grad_norm": 1.0434536435574837, "learning_rate": 3.3207761431745816e-05, "loss": 0.1882, "step": 13888 }, { "epoch": 1.6469820941539193, "grad_norm": 0.9445151194392167, "learning_rate": 3.3205494085105726e-05, "loss": 0.1419, "step": 13889 }, { "epoch": 1.647100675916044, "grad_norm": 0.8853655191893106, "learning_rate": 3.3203226662821054e-05, "loss": 0.2165, "step": 13890 }, { "epoch": 1.6472192576781692, "grad_norm": 1.093965427868732, "learning_rate": 3.3200959164912684e-05, "loss": 0.2079, "step": 13891 }, { "epoch": 1.647337839440294, "grad_norm": 1.1812491269490126, "learning_rate": 3.319869159140152e-05, "loss": 0.19, "step": 13892 }, { "epoch": 1.6474564212024192, "grad_norm": 0.9746596427718994, "learning_rate": 3.319642394230848e-05, "loss": 0.181, "step": 13893 }, { "epoch": 1.647575002964544, "grad_norm": 1.0018570942524019, "learning_rate": 3.319415621765447e-05, "loss": 0.2129, "step": 13894 }, { "epoch": 1.6476935847266692, "grad_norm": 1.087763831119206, "learning_rate": 3.3191888417460375e-05, "loss": 0.2117, "step": 13895 }, { "epoch": 1.647812166488794, "grad_norm": 1.0934254905375338, "learning_rate": 3.318962054174712e-05, "loss": 0.208, "step": 13896 }, { "epoch": 1.6479307482509191, "grad_norm": 1.2480213416383779, "learning_rate": 3.318735259053561e-05, "loss": 0.2104, "step": 13897 }, { "epoch": 1.6480493300130439, "grad_norm": 0.7867062492065029, "learning_rate": 3.3185084563846744e-05, "loss": 0.1608, "step": 13898 }, { "epoch": 1.648167911775169, "grad_norm": 1.0619738248687995, "learning_rate": 3.3182816461701445e-05, "loss": 0.2036, "step": 13899 }, { "epoch": 1.6482864935372938, "grad_norm": 1.1132526731570105, "learning_rate": 3.31805482841206e-05, "loss": 0.1928, "step": 13900 }, { "epoch": 1.648405075299419, "grad_norm": 0.9531157746494681, "learning_rate": 3.317828003112514e-05, "loss": 0.1545, "step": 13901 }, { "epoch": 1.6485236570615438, "grad_norm": 0.9760233971418915, "learning_rate": 3.3176011702735964e-05, "loss": 0.2536, "step": 13902 }, { "epoch": 1.648642238823669, "grad_norm": 0.6614259049745874, "learning_rate": 3.317374329897398e-05, "loss": 0.1133, "step": 13903 }, { "epoch": 1.6487608205857938, "grad_norm": 0.8774068357024186, "learning_rate": 3.317147481986012e-05, "loss": 0.1806, "step": 13904 }, { "epoch": 1.648879402347919, "grad_norm": 1.0602129186677653, "learning_rate": 3.316920626541527e-05, "loss": 0.2315, "step": 13905 }, { "epoch": 1.6489979841100437, "grad_norm": 0.861566434195778, "learning_rate": 3.316693763566036e-05, "loss": 0.1943, "step": 13906 }, { "epoch": 1.649116565872169, "grad_norm": 1.0319345940996132, "learning_rate": 3.3164668930616305e-05, "loss": 0.1951, "step": 13907 }, { "epoch": 1.6492351476342937, "grad_norm": 1.2568961248349955, "learning_rate": 3.316240015030401e-05, "loss": 0.2006, "step": 13908 }, { "epoch": 1.6493537293964189, "grad_norm": 0.8317352542631572, "learning_rate": 3.31601312947444e-05, "loss": 0.1702, "step": 13909 }, { "epoch": 1.6494723111585439, "grad_norm": 1.0025101202750757, "learning_rate": 3.3157862363958374e-05, "loss": 0.1952, "step": 13910 }, { "epoch": 1.6495908929206688, "grad_norm": 1.071957838355202, "learning_rate": 3.3155593357966876e-05, "loss": 0.1609, "step": 13911 }, { "epoch": 1.6497094746827938, "grad_norm": 1.2540299184019081, "learning_rate": 3.315332427679079e-05, "loss": 0.2944, "step": 13912 }, { "epoch": 1.6498280564449188, "grad_norm": 0.9155324824162302, "learning_rate": 3.3151055120451065e-05, "loss": 0.1607, "step": 13913 }, { "epoch": 1.6499466382070438, "grad_norm": 1.052905732107297, "learning_rate": 3.31487858889686e-05, "loss": 0.2767, "step": 13914 }, { "epoch": 1.6500652199691688, "grad_norm": 0.9069887080085022, "learning_rate": 3.3146516582364325e-05, "loss": 0.1575, "step": 13915 }, { "epoch": 1.6501838017312938, "grad_norm": 1.0279514709294884, "learning_rate": 3.3144247200659166e-05, "loss": 0.2337, "step": 13916 }, { "epoch": 1.6503023834934187, "grad_norm": 1.253040605460322, "learning_rate": 3.3141977743874024e-05, "loss": 0.2406, "step": 13917 }, { "epoch": 1.6504209652555437, "grad_norm": 0.836327638859467, "learning_rate": 3.313970821202984e-05, "loss": 0.1569, "step": 13918 }, { "epoch": 1.6505395470176687, "grad_norm": 0.7881465859653758, "learning_rate": 3.313743860514752e-05, "loss": 0.2053, "step": 13919 }, { "epoch": 1.6506581287797937, "grad_norm": 0.8295457334662909, "learning_rate": 3.3135168923248e-05, "loss": 0.1872, "step": 13920 }, { "epoch": 1.6507767105419187, "grad_norm": 1.079958605851507, "learning_rate": 3.31328991663522e-05, "loss": 0.2146, "step": 13921 }, { "epoch": 1.6508952923040436, "grad_norm": 1.3701470165740104, "learning_rate": 3.3130629334481036e-05, "loss": 0.2257, "step": 13922 }, { "epoch": 1.6510138740661686, "grad_norm": 1.013170279567228, "learning_rate": 3.312835942765544e-05, "loss": 0.173, "step": 13923 }, { "epoch": 1.6511324558282936, "grad_norm": 1.2634240658001792, "learning_rate": 3.312608944589635e-05, "loss": 0.2839, "step": 13924 }, { "epoch": 1.6512510375904186, "grad_norm": 0.9102195727003829, "learning_rate": 3.3123819389224665e-05, "loss": 0.1459, "step": 13925 }, { "epoch": 1.6513696193525436, "grad_norm": 1.1284790178101443, "learning_rate": 3.3121549257661336e-05, "loss": 0.2621, "step": 13926 }, { "epoch": 1.6514882011146685, "grad_norm": 0.9292023944537605, "learning_rate": 3.311927905122728e-05, "loss": 0.196, "step": 13927 }, { "epoch": 1.6516067828767935, "grad_norm": 0.6664443219556402, "learning_rate": 3.3117008769943425e-05, "loss": 0.1592, "step": 13928 }, { "epoch": 1.6517253646389185, "grad_norm": 6.0294919960191615, "learning_rate": 3.311473841383071e-05, "loss": 0.2063, "step": 13929 }, { "epoch": 1.6518439464010435, "grad_norm": 1.3010297748306767, "learning_rate": 3.311246798291005e-05, "loss": 0.2043, "step": 13930 }, { "epoch": 1.6519625281631685, "grad_norm": 0.9306122970271944, "learning_rate": 3.311019747720239e-05, "loss": 0.2421, "step": 13931 }, { "epoch": 1.6520811099252934, "grad_norm": 1.047804717940458, "learning_rate": 3.310792689672865e-05, "loss": 0.1757, "step": 13932 }, { "epoch": 1.6521996916874184, "grad_norm": 1.4939889967119, "learning_rate": 3.3105656241509764e-05, "loss": 0.292, "step": 13933 }, { "epoch": 1.6523182734495434, "grad_norm": 0.973963641851158, "learning_rate": 3.310338551156667e-05, "loss": 0.2021, "step": 13934 }, { "epoch": 1.6524368552116684, "grad_norm": 1.054432373662916, "learning_rate": 3.310111470692031e-05, "loss": 0.2785, "step": 13935 }, { "epoch": 1.6525554369737936, "grad_norm": 0.7742324127210048, "learning_rate": 3.309884382759158e-05, "loss": 0.1595, "step": 13936 }, { "epoch": 1.6526740187359183, "grad_norm": 0.9944085031356489, "learning_rate": 3.309657287360147e-05, "loss": 0.2137, "step": 13937 }, { "epoch": 1.6527926004980436, "grad_norm": 0.9808534758008662, "learning_rate": 3.309430184497087e-05, "loss": 0.2062, "step": 13938 }, { "epoch": 1.6529111822601683, "grad_norm": 1.0777331654924396, "learning_rate": 3.309203074172074e-05, "loss": 0.2154, "step": 13939 }, { "epoch": 1.6530297640222935, "grad_norm": 0.7692187432871385, "learning_rate": 3.3089759563872006e-05, "loss": 0.1934, "step": 13940 }, { "epoch": 1.6531483457844183, "grad_norm": 1.12997910401799, "learning_rate": 3.308748831144561e-05, "loss": 0.2487, "step": 13941 }, { "epoch": 1.6532669275465435, "grad_norm": 0.8214283849751711, "learning_rate": 3.308521698446249e-05, "loss": 0.1642, "step": 13942 }, { "epoch": 1.6533855093086682, "grad_norm": 0.8921589143006421, "learning_rate": 3.308294558294358e-05, "loss": 0.2188, "step": 13943 }, { "epoch": 1.6535040910707934, "grad_norm": 0.7336665077014756, "learning_rate": 3.3080674106909834e-05, "loss": 0.1477, "step": 13944 }, { "epoch": 1.6536226728329182, "grad_norm": 0.9481238882538805, "learning_rate": 3.3078402556382174e-05, "loss": 0.171, "step": 13945 }, { "epoch": 1.6537412545950434, "grad_norm": 0.754890465244835, "learning_rate": 3.307613093138155e-05, "loss": 0.1866, "step": 13946 }, { "epoch": 1.6538598363571682, "grad_norm": 0.9910932132422134, "learning_rate": 3.30738592319289e-05, "loss": 0.2198, "step": 13947 }, { "epoch": 1.6539784181192934, "grad_norm": 1.3561599483839972, "learning_rate": 3.307158745804517e-05, "loss": 0.2425, "step": 13948 }, { "epoch": 1.6540969998814181, "grad_norm": 0.8779460468678094, "learning_rate": 3.3069315609751303e-05, "loss": 0.1572, "step": 13949 }, { "epoch": 1.6542155816435433, "grad_norm": 1.1319792189097133, "learning_rate": 3.3067043687068246e-05, "loss": 0.1775, "step": 13950 }, { "epoch": 1.654334163405668, "grad_norm": 1.179409261847428, "learning_rate": 3.3064771690016935e-05, "loss": 0.1704, "step": 13951 }, { "epoch": 1.6544527451677933, "grad_norm": 0.9986249150821348, "learning_rate": 3.306249961861832e-05, "loss": 0.2136, "step": 13952 }, { "epoch": 1.654571326929918, "grad_norm": 0.9936718673974518, "learning_rate": 3.306022747289334e-05, "loss": 0.2153, "step": 13953 }, { "epoch": 1.6546899086920432, "grad_norm": 1.6246129254650212, "learning_rate": 3.305795525286295e-05, "loss": 0.3253, "step": 13954 }, { "epoch": 1.654808490454168, "grad_norm": 1.071571223824016, "learning_rate": 3.305568295854809e-05, "loss": 0.1791, "step": 13955 }, { "epoch": 1.6549270722162932, "grad_norm": 1.4688008832403654, "learning_rate": 3.3053410589969715e-05, "loss": 0.3198, "step": 13956 }, { "epoch": 1.655045653978418, "grad_norm": 1.2328457206161871, "learning_rate": 3.3051138147148776e-05, "loss": 0.2315, "step": 13957 }, { "epoch": 1.6551642357405432, "grad_norm": 0.7404911521293492, "learning_rate": 3.304886563010621e-05, "loss": 0.1203, "step": 13958 }, { "epoch": 1.655282817502668, "grad_norm": 0.6757725432287234, "learning_rate": 3.304659303886297e-05, "loss": 0.124, "step": 13959 }, { "epoch": 1.6554013992647931, "grad_norm": 1.2434154201322978, "learning_rate": 3.3044320373440014e-05, "loss": 0.2231, "step": 13960 }, { "epoch": 1.655519981026918, "grad_norm": 0.9555935940883024, "learning_rate": 3.304204763385829e-05, "loss": 0.1673, "step": 13961 }, { "epoch": 1.655638562789043, "grad_norm": 0.9219798353201587, "learning_rate": 3.303977482013875e-05, "loss": 0.2218, "step": 13962 }, { "epoch": 1.655757144551168, "grad_norm": 0.7827503846712562, "learning_rate": 3.303750193230234e-05, "loss": 0.1911, "step": 13963 }, { "epoch": 1.655875726313293, "grad_norm": 0.8945454948632056, "learning_rate": 3.303522897037001e-05, "loss": 0.1562, "step": 13964 }, { "epoch": 1.655994308075418, "grad_norm": 1.2501402358300706, "learning_rate": 3.303295593436274e-05, "loss": 0.2275, "step": 13965 }, { "epoch": 1.656112889837543, "grad_norm": 1.3857601867731115, "learning_rate": 3.3030682824301456e-05, "loss": 0.3046, "step": 13966 }, { "epoch": 1.656231471599668, "grad_norm": 1.096857910786883, "learning_rate": 3.3028409640207134e-05, "loss": 0.2591, "step": 13967 }, { "epoch": 1.656350053361793, "grad_norm": 0.8435884150642413, "learning_rate": 3.3026136382100714e-05, "loss": 0.197, "step": 13968 }, { "epoch": 1.656468635123918, "grad_norm": 0.810873948244756, "learning_rate": 3.302386305000316e-05, "loss": 0.1903, "step": 13969 }, { "epoch": 1.656587216886043, "grad_norm": 1.031833546838447, "learning_rate": 3.3021589643935425e-05, "loss": 0.2231, "step": 13970 }, { "epoch": 1.656705798648168, "grad_norm": 0.9016917635958688, "learning_rate": 3.301931616391847e-05, "loss": 0.2329, "step": 13971 }, { "epoch": 1.656824380410293, "grad_norm": 0.8897711720221393, "learning_rate": 3.3017042609973254e-05, "loss": 0.1862, "step": 13972 }, { "epoch": 1.6569429621724179, "grad_norm": 0.9614721594772205, "learning_rate": 3.301476898212074e-05, "loss": 0.1823, "step": 13973 }, { "epoch": 1.6570615439345429, "grad_norm": 0.8045516198981914, "learning_rate": 3.301249528038188e-05, "loss": 0.1786, "step": 13974 }, { "epoch": 1.6571801256966678, "grad_norm": 0.7321541032294968, "learning_rate": 3.3010221504777645e-05, "loss": 0.1475, "step": 13975 }, { "epoch": 1.6572987074587928, "grad_norm": 0.9752904026963553, "learning_rate": 3.300794765532898e-05, "loss": 0.1838, "step": 13976 }, { "epoch": 1.6574172892209178, "grad_norm": 0.8831149656134609, "learning_rate": 3.300567373205687e-05, "loss": 0.1598, "step": 13977 }, { "epoch": 1.6575358709830428, "grad_norm": 0.9304520869379564, "learning_rate": 3.3003399734982266e-05, "loss": 0.2174, "step": 13978 }, { "epoch": 1.6576544527451678, "grad_norm": 0.9760102229040017, "learning_rate": 3.300112566412612e-05, "loss": 0.1842, "step": 13979 }, { "epoch": 1.6577730345072927, "grad_norm": 1.0260226000544366, "learning_rate": 3.299885151950942e-05, "loss": 0.2154, "step": 13980 }, { "epoch": 1.6578916162694177, "grad_norm": 0.6912876614515638, "learning_rate": 3.299657730115311e-05, "loss": 0.1127, "step": 13981 }, { "epoch": 1.6580101980315427, "grad_norm": 1.4007288197272427, "learning_rate": 3.299430300907816e-05, "loss": 0.272, "step": 13982 }, { "epoch": 1.6581287797936677, "grad_norm": 1.102471968746673, "learning_rate": 3.299202864330554e-05, "loss": 0.2136, "step": 13983 }, { "epoch": 1.6582473615557927, "grad_norm": 0.9912018262943079, "learning_rate": 3.2989754203856214e-05, "loss": 0.2286, "step": 13984 }, { "epoch": 1.6583659433179179, "grad_norm": 1.1238237244926046, "learning_rate": 3.2987479690751165e-05, "loss": 0.1941, "step": 13985 }, { "epoch": 1.6584845250800426, "grad_norm": 1.237878218284402, "learning_rate": 3.2985205104011334e-05, "loss": 0.2857, "step": 13986 }, { "epoch": 1.6586031068421678, "grad_norm": 0.9227144622753768, "learning_rate": 3.298293044365772e-05, "loss": 0.1833, "step": 13987 }, { "epoch": 1.6587216886042926, "grad_norm": 0.8063986540824569, "learning_rate": 3.298065570971126e-05, "loss": 0.1413, "step": 13988 }, { "epoch": 1.6588402703664178, "grad_norm": 0.7061612840073647, "learning_rate": 3.2978380902192954e-05, "loss": 0.1406, "step": 13989 }, { "epoch": 1.6589588521285425, "grad_norm": 0.8651781674269983, "learning_rate": 3.2976106021123756e-05, "loss": 0.1396, "step": 13990 }, { "epoch": 1.6590774338906678, "grad_norm": 0.945160083250895, "learning_rate": 3.2973831066524644e-05, "loss": 0.1787, "step": 13991 }, { "epoch": 1.6591960156527925, "grad_norm": 1.2153255575877915, "learning_rate": 3.297155603841657e-05, "loss": 0.1961, "step": 13992 }, { "epoch": 1.6593145974149177, "grad_norm": 0.9366390195412858, "learning_rate": 3.296928093682054e-05, "loss": 0.1276, "step": 13993 }, { "epoch": 1.6594331791770425, "grad_norm": 0.9171548108052973, "learning_rate": 3.296700576175751e-05, "loss": 0.1534, "step": 13994 }, { "epoch": 1.6595517609391677, "grad_norm": 1.1957651083346752, "learning_rate": 3.296473051324846e-05, "loss": 0.2406, "step": 13995 }, { "epoch": 1.6596703427012924, "grad_norm": 1.1450872386599924, "learning_rate": 3.2962455191314356e-05, "loss": 0.2073, "step": 13996 }, { "epoch": 1.6597889244634176, "grad_norm": 1.1111336216305703, "learning_rate": 3.296017979597618e-05, "loss": 0.248, "step": 13997 }, { "epoch": 1.6599075062255424, "grad_norm": 1.1471513246894265, "learning_rate": 3.2957904327254916e-05, "loss": 0.2908, "step": 13998 }, { "epoch": 1.6600260879876676, "grad_norm": 1.1647333850066206, "learning_rate": 3.295562878517152e-05, "loss": 0.1965, "step": 13999 }, { "epoch": 1.6601446697497924, "grad_norm": 1.1210908544609235, "learning_rate": 3.2953353169746995e-05, "loss": 0.2022, "step": 14000 }, { "epoch": 1.6602632515119176, "grad_norm": 0.7742748293930636, "learning_rate": 3.2951077481002297e-05, "loss": 0.1506, "step": 14001 }, { "epoch": 1.6603818332740423, "grad_norm": 1.3030827505200004, "learning_rate": 3.294880171895841e-05, "loss": 0.2479, "step": 14002 }, { "epoch": 1.6605004150361675, "grad_norm": 1.2511174718252214, "learning_rate": 3.2946525883636326e-05, "loss": 0.1976, "step": 14003 }, { "epoch": 1.6606189967982923, "grad_norm": 1.1854587309161957, "learning_rate": 3.294424997505703e-05, "loss": 0.2256, "step": 14004 }, { "epoch": 1.6607375785604175, "grad_norm": 1.084006499856509, "learning_rate": 3.294197399324147e-05, "loss": 0.1883, "step": 14005 }, { "epoch": 1.6608561603225422, "grad_norm": 0.7518080216383431, "learning_rate": 3.293969793821067e-05, "loss": 0.1554, "step": 14006 }, { "epoch": 1.6609747420846674, "grad_norm": 1.1193024949349886, "learning_rate": 3.2937421809985574e-05, "loss": 0.2354, "step": 14007 }, { "epoch": 1.6610933238467922, "grad_norm": 1.375357562800624, "learning_rate": 3.2935145608587195e-05, "loss": 0.2969, "step": 14008 }, { "epoch": 1.6612119056089174, "grad_norm": 0.8394376083881863, "learning_rate": 3.2932869334036495e-05, "loss": 0.1907, "step": 14009 }, { "epoch": 1.6613304873710424, "grad_norm": 1.0081816168949649, "learning_rate": 3.293059298635447e-05, "loss": 0.2082, "step": 14010 }, { "epoch": 1.6614490691331674, "grad_norm": 1.2124337752982537, "learning_rate": 3.29283165655621e-05, "loss": 0.3085, "step": 14011 }, { "epoch": 1.6615676508952923, "grad_norm": 0.77580591748616, "learning_rate": 3.292604007168037e-05, "loss": 0.1498, "step": 14012 }, { "epoch": 1.6616862326574173, "grad_norm": 1.0558924380318635, "learning_rate": 3.292376350473028e-05, "loss": 0.1942, "step": 14013 }, { "epoch": 1.6618048144195423, "grad_norm": 0.9875345001588826, "learning_rate": 3.292148686473281e-05, "loss": 0.2, "step": 14014 }, { "epoch": 1.6619233961816673, "grad_norm": 1.0059873770706214, "learning_rate": 3.2919210151708936e-05, "loss": 0.2199, "step": 14015 }, { "epoch": 1.6620419779437923, "grad_norm": 1.0363584978288096, "learning_rate": 3.291693336567966e-05, "loss": 0.1745, "step": 14016 }, { "epoch": 1.6621605597059173, "grad_norm": 0.9725676092523261, "learning_rate": 3.291465650666596e-05, "loss": 0.175, "step": 14017 }, { "epoch": 1.6622791414680422, "grad_norm": 0.7108646561746085, "learning_rate": 3.2912379574688845e-05, "loss": 0.1799, "step": 14018 }, { "epoch": 1.6623977232301672, "grad_norm": 1.8422253647322555, "learning_rate": 3.291010256976928e-05, "loss": 0.32, "step": 14019 }, { "epoch": 1.6625163049922922, "grad_norm": 1.050708442150772, "learning_rate": 3.290782549192828e-05, "loss": 0.1996, "step": 14020 }, { "epoch": 1.6626348867544172, "grad_norm": 1.0269971504693947, "learning_rate": 3.290554834118682e-05, "loss": 0.2097, "step": 14021 }, { "epoch": 1.6627534685165422, "grad_norm": 0.9094733986112387, "learning_rate": 3.2903271117565904e-05, "loss": 0.1511, "step": 14022 }, { "epoch": 1.6628720502786671, "grad_norm": 0.9027106115781086, "learning_rate": 3.2900993821086515e-05, "loss": 0.1529, "step": 14023 }, { "epoch": 1.6629906320407921, "grad_norm": 1.2007047145439325, "learning_rate": 3.289871645176965e-05, "loss": 0.2491, "step": 14024 }, { "epoch": 1.663109213802917, "grad_norm": 0.9995160436461187, "learning_rate": 3.289643900963631e-05, "loss": 0.2228, "step": 14025 }, { "epoch": 1.663227795565042, "grad_norm": 1.1945822066059752, "learning_rate": 3.2894161494707486e-05, "loss": 0.2722, "step": 14026 }, { "epoch": 1.663346377327167, "grad_norm": 0.7054959983999796, "learning_rate": 3.289188390700417e-05, "loss": 0.1804, "step": 14027 }, { "epoch": 1.663464959089292, "grad_norm": 0.7449623012774739, "learning_rate": 3.2889606246547365e-05, "loss": 0.1504, "step": 14028 }, { "epoch": 1.663583540851417, "grad_norm": 1.0791147494373272, "learning_rate": 3.288732851335806e-05, "loss": 0.2567, "step": 14029 }, { "epoch": 1.663702122613542, "grad_norm": 1.0061533141784587, "learning_rate": 3.288505070745727e-05, "loss": 0.1643, "step": 14030 }, { "epoch": 1.663820704375667, "grad_norm": 0.8504435492354446, "learning_rate": 3.2882772828865974e-05, "loss": 0.2181, "step": 14031 }, { "epoch": 1.663939286137792, "grad_norm": 0.8424910509921807, "learning_rate": 3.288049487760517e-05, "loss": 0.1908, "step": 14032 }, { "epoch": 1.664057867899917, "grad_norm": 0.9469789420478224, "learning_rate": 3.287821685369588e-05, "loss": 0.2446, "step": 14033 }, { "epoch": 1.664176449662042, "grad_norm": 0.9685387017661671, "learning_rate": 3.287593875715909e-05, "loss": 0.1874, "step": 14034 }, { "epoch": 1.664295031424167, "grad_norm": 1.1279582369341272, "learning_rate": 3.2873660588015795e-05, "loss": 0.3037, "step": 14035 }, { "epoch": 1.664413613186292, "grad_norm": 0.7078239394088771, "learning_rate": 3.2871382346287014e-05, "loss": 0.1499, "step": 14036 }, { "epoch": 1.6645321949484169, "grad_norm": 0.9734044500574901, "learning_rate": 3.286910403199373e-05, "loss": 0.2126, "step": 14037 }, { "epoch": 1.664650776710542, "grad_norm": 0.9562641770239007, "learning_rate": 3.286682564515697e-05, "loss": 0.2106, "step": 14038 }, { "epoch": 1.6647693584726668, "grad_norm": 1.0259329496286096, "learning_rate": 3.286454718579772e-05, "loss": 0.1789, "step": 14039 }, { "epoch": 1.664887940234792, "grad_norm": 0.9664653584771096, "learning_rate": 3.2862268653936985e-05, "loss": 0.2359, "step": 14040 }, { "epoch": 1.6650065219969168, "grad_norm": 0.8144475858200041, "learning_rate": 3.285999004959578e-05, "loss": 0.1691, "step": 14041 }, { "epoch": 1.665125103759042, "grad_norm": 0.9681912208596729, "learning_rate": 3.2857711372795096e-05, "loss": 0.1922, "step": 14042 }, { "epoch": 1.6652436855211667, "grad_norm": 1.077963793784443, "learning_rate": 3.2855432623555953e-05, "loss": 0.2869, "step": 14043 }, { "epoch": 1.665362267283292, "grad_norm": 0.9300883327750298, "learning_rate": 3.285315380189935e-05, "loss": 0.1978, "step": 14044 }, { "epoch": 1.6654808490454167, "grad_norm": 1.0330585331084008, "learning_rate": 3.2850874907846304e-05, "loss": 0.2611, "step": 14045 }, { "epoch": 1.665599430807542, "grad_norm": 0.9132854026032966, "learning_rate": 3.2848595941417824e-05, "loss": 0.1709, "step": 14046 }, { "epoch": 1.6657180125696667, "grad_norm": 0.9910039531375325, "learning_rate": 3.284631690263491e-05, "loss": 0.2113, "step": 14047 }, { "epoch": 1.6658365943317919, "grad_norm": 0.9111016353181309, "learning_rate": 3.284403779151857e-05, "loss": 0.2095, "step": 14048 }, { "epoch": 1.6659551760939166, "grad_norm": 0.7491117408664655, "learning_rate": 3.284175860808983e-05, "loss": 0.1551, "step": 14049 }, { "epoch": 1.6660737578560418, "grad_norm": 1.0108849436105112, "learning_rate": 3.283947935236969e-05, "loss": 0.2167, "step": 14050 }, { "epoch": 1.6661923396181666, "grad_norm": 0.9382525446004681, "learning_rate": 3.283720002437916e-05, "loss": 0.1767, "step": 14051 }, { "epoch": 1.6663109213802918, "grad_norm": 0.7602689585317933, "learning_rate": 3.283492062413925e-05, "loss": 0.1812, "step": 14052 }, { "epoch": 1.6664295031424166, "grad_norm": 0.8092471927942004, "learning_rate": 3.2832641151670994e-05, "loss": 0.1502, "step": 14053 }, { "epoch": 1.6665480849045418, "grad_norm": 0.822931810401402, "learning_rate": 3.283036160699538e-05, "loss": 0.1944, "step": 14054 }, { "epoch": 1.6666666666666665, "grad_norm": 0.7661760818879054, "learning_rate": 3.2828081990133444e-05, "loss": 0.1475, "step": 14055 }, { "epoch": 1.6667852484287917, "grad_norm": 0.9999674802213672, "learning_rate": 3.2825802301106185e-05, "loss": 0.2192, "step": 14056 }, { "epoch": 1.6669038301909165, "grad_norm": 0.754966047711745, "learning_rate": 3.282352253993463e-05, "loss": 0.1975, "step": 14057 }, { "epoch": 1.6670224119530417, "grad_norm": 0.8599580067865027, "learning_rate": 3.282124270663979e-05, "loss": 0.2323, "step": 14058 }, { "epoch": 1.6671409937151664, "grad_norm": 1.067686018496924, "learning_rate": 3.281896280124267e-05, "loss": 0.186, "step": 14059 }, { "epoch": 1.6672595754772916, "grad_norm": 1.2209231009506163, "learning_rate": 3.281668282376432e-05, "loss": 0.2929, "step": 14060 }, { "epoch": 1.6673781572394166, "grad_norm": 0.9051758382888907, "learning_rate": 3.281440277422573e-05, "loss": 0.164, "step": 14061 }, { "epoch": 1.6674967390015416, "grad_norm": 1.0766904751879616, "learning_rate": 3.281212265264793e-05, "loss": 0.2585, "step": 14062 }, { "epoch": 1.6676153207636666, "grad_norm": 0.780234117263807, "learning_rate": 3.280984245905194e-05, "loss": 0.142, "step": 14063 }, { "epoch": 1.6677339025257916, "grad_norm": 1.0581617219913209, "learning_rate": 3.2807562193458785e-05, "loss": 0.2127, "step": 14064 }, { "epoch": 1.6678524842879165, "grad_norm": 1.1934151963385864, "learning_rate": 3.280528185588947e-05, "loss": 0.2254, "step": 14065 }, { "epoch": 1.6679710660500415, "grad_norm": 0.9479915439032671, "learning_rate": 3.280300144636504e-05, "loss": 0.2107, "step": 14066 }, { "epoch": 1.6680896478121665, "grad_norm": 1.093439621222816, "learning_rate": 3.280072096490649e-05, "loss": 0.1823, "step": 14067 }, { "epoch": 1.6682082295742915, "grad_norm": 1.0178109136192943, "learning_rate": 3.279844041153488e-05, "loss": 0.2072, "step": 14068 }, { "epoch": 1.6683268113364165, "grad_norm": 0.8416063520707616, "learning_rate": 3.279615978627119e-05, "loss": 0.1563, "step": 14069 }, { "epoch": 1.6684453930985415, "grad_norm": 1.0998783089365505, "learning_rate": 3.279387908913648e-05, "loss": 0.2165, "step": 14070 }, { "epoch": 1.6685639748606664, "grad_norm": 0.933458806760182, "learning_rate": 3.279159832015176e-05, "loss": 0.1542, "step": 14071 }, { "epoch": 1.6686825566227914, "grad_norm": 0.8742342098809177, "learning_rate": 3.2789317479338053e-05, "loss": 0.1726, "step": 14072 }, { "epoch": 1.6688011383849164, "grad_norm": 0.8238007003582317, "learning_rate": 3.2787036566716394e-05, "loss": 0.1931, "step": 14073 }, { "epoch": 1.6689197201470414, "grad_norm": 0.9090173990545731, "learning_rate": 3.278475558230781e-05, "loss": 0.1328, "step": 14074 }, { "epoch": 1.6690383019091664, "grad_norm": 0.803679216938461, "learning_rate": 3.2782474526133325e-05, "loss": 0.1687, "step": 14075 }, { "epoch": 1.6691568836712913, "grad_norm": 0.862978169068937, "learning_rate": 3.278019339821396e-05, "loss": 0.2017, "step": 14076 }, { "epoch": 1.6692754654334163, "grad_norm": 0.7874041924872629, "learning_rate": 3.277791219857077e-05, "loss": 0.1535, "step": 14077 }, { "epoch": 1.6693940471955413, "grad_norm": 0.8804084138874005, "learning_rate": 3.2775630927224755e-05, "loss": 0.2187, "step": 14078 }, { "epoch": 1.6695126289576663, "grad_norm": 1.402796073860145, "learning_rate": 3.277334958419696e-05, "loss": 0.3079, "step": 14079 }, { "epoch": 1.6696312107197913, "grad_norm": 1.0735602640035617, "learning_rate": 3.277106816950841e-05, "loss": 0.1722, "step": 14080 }, { "epoch": 1.6697497924819162, "grad_norm": 1.0785366068521374, "learning_rate": 3.276878668318015e-05, "loss": 0.1863, "step": 14081 }, { "epoch": 1.6698683742440412, "grad_norm": 0.9260016330127745, "learning_rate": 3.276650512523319e-05, "loss": 0.1631, "step": 14082 }, { "epoch": 1.6699869560061662, "grad_norm": 1.0106573818500837, "learning_rate": 3.276422349568859e-05, "loss": 0.2647, "step": 14083 }, { "epoch": 1.6701055377682912, "grad_norm": 0.8136110175967916, "learning_rate": 3.276194179456737e-05, "loss": 0.1605, "step": 14084 }, { "epoch": 1.6702241195304164, "grad_norm": 1.035596284527482, "learning_rate": 3.275966002189056e-05, "loss": 0.268, "step": 14085 }, { "epoch": 1.6703427012925411, "grad_norm": 0.8568261172542134, "learning_rate": 3.27573781776792e-05, "loss": 0.1536, "step": 14086 }, { "epoch": 1.6704612830546663, "grad_norm": 0.9218002185505508, "learning_rate": 3.275509626195433e-05, "loss": 0.1961, "step": 14087 }, { "epoch": 1.670579864816791, "grad_norm": 1.02883912199065, "learning_rate": 3.2752814274736974e-05, "loss": 0.2212, "step": 14088 }, { "epoch": 1.6706984465789163, "grad_norm": 0.8405282445958019, "learning_rate": 3.2750532216048186e-05, "loss": 0.1487, "step": 14089 }, { "epoch": 1.670817028341041, "grad_norm": 0.8617758336828976, "learning_rate": 3.274825008590899e-05, "loss": 0.1553, "step": 14090 }, { "epoch": 1.6709356101031663, "grad_norm": 1.201586643144924, "learning_rate": 3.2745967884340436e-05, "loss": 0.2074, "step": 14091 }, { "epoch": 1.671054191865291, "grad_norm": 1.0391531079859624, "learning_rate": 3.274368561136355e-05, "loss": 0.2546, "step": 14092 }, { "epoch": 1.6711727736274162, "grad_norm": 1.0067727608875576, "learning_rate": 3.274140326699938e-05, "loss": 0.2038, "step": 14093 }, { "epoch": 1.671291355389541, "grad_norm": 1.1899788251996064, "learning_rate": 3.273912085126897e-05, "loss": 0.259, "step": 14094 }, { "epoch": 1.6714099371516662, "grad_norm": 0.6595253726235208, "learning_rate": 3.273683836419335e-05, "loss": 0.1338, "step": 14095 }, { "epoch": 1.671528518913791, "grad_norm": 1.4550577310445594, "learning_rate": 3.2734555805793576e-05, "loss": 0.3632, "step": 14096 }, { "epoch": 1.6716471006759162, "grad_norm": 0.9441584322332947, "learning_rate": 3.273227317609067e-05, "loss": 0.1641, "step": 14097 }, { "epoch": 1.671765682438041, "grad_norm": 1.1033761033145042, "learning_rate": 3.272999047510569e-05, "loss": 0.2344, "step": 14098 }, { "epoch": 1.6718842642001661, "grad_norm": 0.8484187932610294, "learning_rate": 3.272770770285969e-05, "loss": 0.1878, "step": 14099 }, { "epoch": 1.6720028459622909, "grad_norm": 1.5294403845103888, "learning_rate": 3.272542485937369e-05, "loss": 0.2918, "step": 14100 }, { "epoch": 1.672121427724416, "grad_norm": 0.7881047586536996, "learning_rate": 3.2723141944668745e-05, "loss": 0.1873, "step": 14101 }, { "epoch": 1.6722400094865408, "grad_norm": 0.9798134213761764, "learning_rate": 3.27208589587659e-05, "loss": 0.1952, "step": 14102 }, { "epoch": 1.672358591248666, "grad_norm": 1.0722762129819527, "learning_rate": 3.271857590168622e-05, "loss": 0.2242, "step": 14103 }, { "epoch": 1.6724771730107908, "grad_norm": 0.8452360675905167, "learning_rate": 3.2716292773450716e-05, "loss": 0.1909, "step": 14104 }, { "epoch": 1.672595754772916, "grad_norm": 0.9752969075065397, "learning_rate": 3.2714009574080466e-05, "loss": 0.2126, "step": 14105 }, { "epoch": 1.6727143365350408, "grad_norm": 0.8936718927168437, "learning_rate": 3.27117263035965e-05, "loss": 0.1906, "step": 14106 }, { "epoch": 1.672832918297166, "grad_norm": 1.0757189324651528, "learning_rate": 3.270944296201989e-05, "loss": 0.2234, "step": 14107 }, { "epoch": 1.6729515000592907, "grad_norm": 0.9210471705925822, "learning_rate": 3.270715954937165e-05, "loss": 0.2298, "step": 14108 }, { "epoch": 1.673070081821416, "grad_norm": 1.2555156864487558, "learning_rate": 3.2704876065672863e-05, "loss": 0.2861, "step": 14109 }, { "epoch": 1.673188663583541, "grad_norm": 1.2004612378867496, "learning_rate": 3.2702592510944564e-05, "loss": 0.3063, "step": 14110 }, { "epoch": 1.6733072453456659, "grad_norm": 1.0713973828293866, "learning_rate": 3.2700308885207805e-05, "loss": 0.2012, "step": 14111 }, { "epoch": 1.6734258271077909, "grad_norm": 1.2298459137320348, "learning_rate": 3.269802518848364e-05, "loss": 0.2409, "step": 14112 }, { "epoch": 1.6735444088699158, "grad_norm": 1.0755418078792724, "learning_rate": 3.269574142079312e-05, "loss": 0.1994, "step": 14113 }, { "epoch": 1.6736629906320408, "grad_norm": 0.6483791926151304, "learning_rate": 3.269345758215731e-05, "loss": 0.1514, "step": 14114 }, { "epoch": 1.6737815723941658, "grad_norm": 0.7749709539317895, "learning_rate": 3.269117367259725e-05, "loss": 0.156, "step": 14115 }, { "epoch": 1.6739001541562908, "grad_norm": 0.9911344953508283, "learning_rate": 3.268888969213401e-05, "loss": 0.2267, "step": 14116 }, { "epoch": 1.6740187359184158, "grad_norm": 0.9786324310635992, "learning_rate": 3.268660564078862e-05, "loss": 0.209, "step": 14117 }, { "epoch": 1.6741373176805407, "grad_norm": 1.258395568168295, "learning_rate": 3.268432151858216e-05, "loss": 0.2943, "step": 14118 }, { "epoch": 1.6742558994426657, "grad_norm": 0.8181165294800651, "learning_rate": 3.268203732553568e-05, "loss": 0.1475, "step": 14119 }, { "epoch": 1.6743744812047907, "grad_norm": 0.8274722114153531, "learning_rate": 3.2679753061670236e-05, "loss": 0.1386, "step": 14120 }, { "epoch": 1.6744930629669157, "grad_norm": 0.7892305846791084, "learning_rate": 3.267746872700688e-05, "loss": 0.1902, "step": 14121 }, { "epoch": 1.6746116447290407, "grad_norm": 0.7559416318089667, "learning_rate": 3.2675184321566685e-05, "loss": 0.1293, "step": 14122 }, { "epoch": 1.6747302264911657, "grad_norm": 0.7789554308566117, "learning_rate": 3.267289984537069e-05, "loss": 0.1533, "step": 14123 }, { "epoch": 1.6748488082532906, "grad_norm": 1.2197171781912353, "learning_rate": 3.267061529843998e-05, "loss": 0.2115, "step": 14124 }, { "epoch": 1.6749673900154156, "grad_norm": 1.300556924826775, "learning_rate": 3.26683306807956e-05, "loss": 0.211, "step": 14125 }, { "epoch": 1.6750859717775406, "grad_norm": 0.7556837887915353, "learning_rate": 3.2666045992458616e-05, "loss": 0.1399, "step": 14126 }, { "epoch": 1.6752045535396656, "grad_norm": 1.1359203809439524, "learning_rate": 3.2663761233450084e-05, "loss": 0.2321, "step": 14127 }, { "epoch": 1.6753231353017906, "grad_norm": 1.1151303304695956, "learning_rate": 3.266147640379108e-05, "loss": 0.2519, "step": 14128 }, { "epoch": 1.6754417170639155, "grad_norm": 1.2163413741797233, "learning_rate": 3.265919150350265e-05, "loss": 0.2333, "step": 14129 }, { "epoch": 1.6755602988260405, "grad_norm": 0.9090864047874648, "learning_rate": 3.2656906532605866e-05, "loss": 0.1996, "step": 14130 }, { "epoch": 1.6756788805881655, "grad_norm": 1.2020993430371687, "learning_rate": 3.26546214911218e-05, "loss": 0.2381, "step": 14131 }, { "epoch": 1.6757974623502905, "grad_norm": 0.7892415251728673, "learning_rate": 3.265233637907151e-05, "loss": 0.1522, "step": 14132 }, { "epoch": 1.6759160441124155, "grad_norm": 0.995823506887617, "learning_rate": 3.265005119647606e-05, "loss": 0.2526, "step": 14133 }, { "epoch": 1.6760346258745404, "grad_norm": 0.9835020373502017, "learning_rate": 3.264776594335652e-05, "loss": 0.1878, "step": 14134 }, { "epoch": 1.6761532076366654, "grad_norm": 1.193188993631261, "learning_rate": 3.2645480619733956e-05, "loss": 0.2082, "step": 14135 }, { "epoch": 1.6762717893987906, "grad_norm": 0.9553936431114728, "learning_rate": 3.264319522562944e-05, "loss": 0.1641, "step": 14136 }, { "epoch": 1.6763903711609154, "grad_norm": 1.091447092157217, "learning_rate": 3.264090976106403e-05, "loss": 0.2136, "step": 14137 }, { "epoch": 1.6765089529230406, "grad_norm": 0.7532120819630432, "learning_rate": 3.26386242260588e-05, "loss": 0.1359, "step": 14138 }, { "epoch": 1.6766275346851653, "grad_norm": 1.0558583033079587, "learning_rate": 3.263633862063483e-05, "loss": 0.2221, "step": 14139 }, { "epoch": 1.6767461164472905, "grad_norm": 0.6512982897408541, "learning_rate": 3.263405294481318e-05, "loss": 0.1412, "step": 14140 }, { "epoch": 1.6768646982094153, "grad_norm": 1.0364525155173465, "learning_rate": 3.263176719861492e-05, "loss": 0.2402, "step": 14141 }, { "epoch": 1.6769832799715405, "grad_norm": 0.6901414601402055, "learning_rate": 3.262948138206112e-05, "loss": 0.1281, "step": 14142 }, { "epoch": 1.6771018617336653, "grad_norm": 1.067261426378336, "learning_rate": 3.262719549517287e-05, "loss": 0.1818, "step": 14143 }, { "epoch": 1.6772204434957905, "grad_norm": 0.789088830750186, "learning_rate": 3.262490953797123e-05, "loss": 0.1402, "step": 14144 }, { "epoch": 1.6773390252579152, "grad_norm": 0.9581810041186875, "learning_rate": 3.2622623510477265e-05, "loss": 0.1515, "step": 14145 }, { "epoch": 1.6774576070200404, "grad_norm": 1.2163532282087561, "learning_rate": 3.2620337412712065e-05, "loss": 0.2164, "step": 14146 }, { "epoch": 1.6775761887821652, "grad_norm": 1.1125405789161962, "learning_rate": 3.2618051244696697e-05, "loss": 0.2054, "step": 14147 }, { "epoch": 1.6776947705442904, "grad_norm": 1.0790600611890262, "learning_rate": 3.2615765006452234e-05, "loss": 0.2149, "step": 14148 }, { "epoch": 1.6778133523064152, "grad_norm": 0.8375563333619894, "learning_rate": 3.2613478697999765e-05, "loss": 0.2027, "step": 14149 }, { "epoch": 1.6779319340685404, "grad_norm": 0.9098866514764675, "learning_rate": 3.261119231936035e-05, "loss": 0.1862, "step": 14150 }, { "epoch": 1.6780505158306651, "grad_norm": 0.7214579578562321, "learning_rate": 3.260890587055508e-05, "loss": 0.1351, "step": 14151 }, { "epoch": 1.6781690975927903, "grad_norm": 0.9028742733300292, "learning_rate": 3.260661935160503e-05, "loss": 0.1603, "step": 14152 }, { "epoch": 1.678287679354915, "grad_norm": 0.9304319115653008, "learning_rate": 3.2604332762531277e-05, "loss": 0.2121, "step": 14153 }, { "epoch": 1.6784062611170403, "grad_norm": 0.8712306183623889, "learning_rate": 3.26020461033549e-05, "loss": 0.1631, "step": 14154 }, { "epoch": 1.678524842879165, "grad_norm": 0.9504757854757154, "learning_rate": 3.259975937409698e-05, "loss": 0.2441, "step": 14155 }, { "epoch": 1.6786434246412902, "grad_norm": 1.1830478041163803, "learning_rate": 3.25974725747786e-05, "loss": 0.2154, "step": 14156 }, { "epoch": 1.678762006403415, "grad_norm": 0.8945345271530879, "learning_rate": 3.259518570542084e-05, "loss": 0.1432, "step": 14157 }, { "epoch": 1.6788805881655402, "grad_norm": 1.0629038040533951, "learning_rate": 3.259289876604477e-05, "loss": 0.1437, "step": 14158 }, { "epoch": 1.6789991699276652, "grad_norm": 0.9884223128565195, "learning_rate": 3.25906117566715e-05, "loss": 0.1958, "step": 14159 }, { "epoch": 1.6791177516897902, "grad_norm": 0.7352986774061605, "learning_rate": 3.258832467732209e-05, "loss": 0.1794, "step": 14160 }, { "epoch": 1.6792363334519151, "grad_norm": 1.25992818464309, "learning_rate": 3.258603752801763e-05, "loss": 0.239, "step": 14161 }, { "epoch": 1.6793549152140401, "grad_norm": 1.1417839935698186, "learning_rate": 3.258375030877921e-05, "loss": 0.1809, "step": 14162 }, { "epoch": 1.679473496976165, "grad_norm": 1.2533050163876454, "learning_rate": 3.2581463019627913e-05, "loss": 0.2986, "step": 14163 }, { "epoch": 1.67959207873829, "grad_norm": 0.8302052370296442, "learning_rate": 3.257917566058482e-05, "loss": 0.166, "step": 14164 }, { "epoch": 1.679710660500415, "grad_norm": 1.0789985666541704, "learning_rate": 3.257688823167103e-05, "loss": 0.1756, "step": 14165 }, { "epoch": 1.67982924226254, "grad_norm": 1.205627368736549, "learning_rate": 3.257460073290761e-05, "loss": 0.2641, "step": 14166 }, { "epoch": 1.679947824024665, "grad_norm": 0.789627565387131, "learning_rate": 3.257231316431567e-05, "loss": 0.2132, "step": 14167 }, { "epoch": 1.68006640578679, "grad_norm": 1.3530929251802595, "learning_rate": 3.257002552591629e-05, "loss": 0.2896, "step": 14168 }, { "epoch": 1.680184987548915, "grad_norm": 0.8121384603060827, "learning_rate": 3.2567737817730545e-05, "loss": 0.1881, "step": 14169 }, { "epoch": 1.68030356931104, "grad_norm": 1.0374155659931033, "learning_rate": 3.256545003977955e-05, "loss": 0.2054, "step": 14170 }, { "epoch": 1.680422151073165, "grad_norm": 1.0425234324717128, "learning_rate": 3.2563162192084375e-05, "loss": 0.1843, "step": 14171 }, { "epoch": 1.68054073283529, "grad_norm": 0.9081117517270473, "learning_rate": 3.256087427466612e-05, "loss": 0.2221, "step": 14172 }, { "epoch": 1.680659314597415, "grad_norm": 1.2840197768703425, "learning_rate": 3.255858628754588e-05, "loss": 0.2976, "step": 14173 }, { "epoch": 1.68077789635954, "grad_norm": 1.3043819292391445, "learning_rate": 3.2556298230744744e-05, "loss": 0.2081, "step": 14174 }, { "epoch": 1.6808964781216649, "grad_norm": 0.8042531158330805, "learning_rate": 3.25540101042838e-05, "loss": 0.1796, "step": 14175 }, { "epoch": 1.6810150598837899, "grad_norm": 0.7118691682462734, "learning_rate": 3.2551721908184145e-05, "loss": 0.1547, "step": 14176 }, { "epoch": 1.6811336416459148, "grad_norm": 0.8172873833619847, "learning_rate": 3.2549433642466875e-05, "loss": 0.1618, "step": 14177 }, { "epoch": 1.6812522234080398, "grad_norm": 1.2715086516488343, "learning_rate": 3.2547145307153095e-05, "loss": 0.2825, "step": 14178 }, { "epoch": 1.6813708051701648, "grad_norm": 0.8839443437972022, "learning_rate": 3.254485690226387e-05, "loss": 0.2099, "step": 14179 }, { "epoch": 1.6814893869322898, "grad_norm": 0.9858074561520345, "learning_rate": 3.254256842782034e-05, "loss": 0.1919, "step": 14180 }, { "epoch": 1.6816079686944148, "grad_norm": 0.9028810723886407, "learning_rate": 3.254027988384356e-05, "loss": 0.1892, "step": 14181 }, { "epoch": 1.6817265504565397, "grad_norm": 0.7047917300779597, "learning_rate": 3.2537991270354656e-05, "loss": 0.1704, "step": 14182 }, { "epoch": 1.6818451322186647, "grad_norm": 1.7462350606544355, "learning_rate": 3.253570258737471e-05, "loss": 0.3977, "step": 14183 }, { "epoch": 1.6819637139807897, "grad_norm": 0.7661245835241396, "learning_rate": 3.253341383492483e-05, "loss": 0.165, "step": 14184 }, { "epoch": 1.682082295742915, "grad_norm": 1.0305763873107472, "learning_rate": 3.253112501302611e-05, "loss": 0.1906, "step": 14185 }, { "epoch": 1.6822008775050397, "grad_norm": 0.7209972495716043, "learning_rate": 3.252883612169966e-05, "loss": 0.1948, "step": 14186 }, { "epoch": 1.6823194592671649, "grad_norm": 1.0351462087452914, "learning_rate": 3.2526547160966566e-05, "loss": 0.1989, "step": 14187 }, { "epoch": 1.6824380410292896, "grad_norm": 0.7453146288535434, "learning_rate": 3.252425813084794e-05, "loss": 0.1548, "step": 14188 }, { "epoch": 1.6825566227914148, "grad_norm": 1.1306396421624252, "learning_rate": 3.2521969031364885e-05, "loss": 0.2613, "step": 14189 }, { "epoch": 1.6826752045535396, "grad_norm": 0.784324188789529, "learning_rate": 3.251967986253849e-05, "loss": 0.1542, "step": 14190 }, { "epoch": 1.6827937863156648, "grad_norm": 0.9101702061055479, "learning_rate": 3.251739062438988e-05, "loss": 0.2053, "step": 14191 }, { "epoch": 1.6829123680777895, "grad_norm": 1.1396151259743459, "learning_rate": 3.251510131694013e-05, "loss": 0.231, "step": 14192 }, { "epoch": 1.6830309498399147, "grad_norm": 0.682181337058548, "learning_rate": 3.2512811940210375e-05, "loss": 0.1308, "step": 14193 }, { "epoch": 1.6831495316020395, "grad_norm": 1.121361887541787, "learning_rate": 3.251052249422171e-05, "loss": 0.239, "step": 14194 }, { "epoch": 1.6832681133641647, "grad_norm": 1.1909209580690958, "learning_rate": 3.250823297899523e-05, "loss": 0.2668, "step": 14195 }, { "epoch": 1.6833866951262895, "grad_norm": 0.7576429723068779, "learning_rate": 3.250594339455205e-05, "loss": 0.1501, "step": 14196 }, { "epoch": 1.6835052768884147, "grad_norm": 0.8437984007028245, "learning_rate": 3.2503653740913275e-05, "loss": 0.2144, "step": 14197 }, { "epoch": 1.6836238586505394, "grad_norm": 1.1785066350433826, "learning_rate": 3.250136401810001e-05, "loss": 0.2081, "step": 14198 }, { "epoch": 1.6837424404126646, "grad_norm": 1.061128124461869, "learning_rate": 3.249907422613337e-05, "loss": 0.1784, "step": 14199 }, { "epoch": 1.6838610221747894, "grad_norm": 1.1765533281827658, "learning_rate": 3.249678436503447e-05, "loss": 0.2499, "step": 14200 }, { "epoch": 1.6839796039369146, "grad_norm": 0.9719483011122969, "learning_rate": 3.2494494434824406e-05, "loss": 0.1815, "step": 14201 }, { "epoch": 1.6840981856990394, "grad_norm": 0.8360546726943604, "learning_rate": 3.2492204435524295e-05, "loss": 0.1786, "step": 14202 }, { "epoch": 1.6842167674611646, "grad_norm": 0.9182910962054558, "learning_rate": 3.248991436715524e-05, "loss": 0.207, "step": 14203 }, { "epoch": 1.6843353492232893, "grad_norm": 1.0308037395543952, "learning_rate": 3.2487624229738364e-05, "loss": 0.2325, "step": 14204 }, { "epoch": 1.6844539309854145, "grad_norm": 0.7911861630228919, "learning_rate": 3.248533402329478e-05, "loss": 0.1398, "step": 14205 }, { "epoch": 1.6845725127475393, "grad_norm": 0.6730710562353177, "learning_rate": 3.248304374784559e-05, "loss": 0.1278, "step": 14206 }, { "epoch": 1.6846910945096645, "grad_norm": 1.1504067899707735, "learning_rate": 3.248075340341191e-05, "loss": 0.2332, "step": 14207 }, { "epoch": 1.6848096762717892, "grad_norm": 1.0583590845014528, "learning_rate": 3.247846299001486e-05, "loss": 0.1892, "step": 14208 }, { "epoch": 1.6849282580339144, "grad_norm": 0.9311837878490906, "learning_rate": 3.2476172507675556e-05, "loss": 0.1663, "step": 14209 }, { "epoch": 1.6850468397960394, "grad_norm": 0.8285293701919322, "learning_rate": 3.2473881956415105e-05, "loss": 0.1872, "step": 14210 }, { "epoch": 1.6851654215581644, "grad_norm": 1.098688604988998, "learning_rate": 3.247159133625462e-05, "loss": 0.2274, "step": 14211 }, { "epoch": 1.6852840033202894, "grad_norm": 0.7601927008568096, "learning_rate": 3.246930064721524e-05, "loss": 0.1844, "step": 14212 }, { "epoch": 1.6854025850824144, "grad_norm": 1.4008895384008968, "learning_rate": 3.2467009889318056e-05, "loss": 0.2782, "step": 14213 }, { "epoch": 1.6855211668445393, "grad_norm": 1.0837098286888756, "learning_rate": 3.24647190625842e-05, "loss": 0.2571, "step": 14214 }, { "epoch": 1.6856397486066643, "grad_norm": 0.9045142341655249, "learning_rate": 3.2462428167034795e-05, "loss": 0.1177, "step": 14215 }, { "epoch": 1.6857583303687893, "grad_norm": 0.8053861254966125, "learning_rate": 3.246013720269095e-05, "loss": 0.149, "step": 14216 }, { "epoch": 1.6858769121309143, "grad_norm": 0.7185803274574651, "learning_rate": 3.245784616957379e-05, "loss": 0.1602, "step": 14217 }, { "epoch": 1.6859954938930393, "grad_norm": 1.009403146007683, "learning_rate": 3.2455555067704426e-05, "loss": 0.2449, "step": 14218 }, { "epoch": 1.6861140756551642, "grad_norm": 0.9145856418686991, "learning_rate": 3.2453263897103994e-05, "loss": 0.1796, "step": 14219 }, { "epoch": 1.6862326574172892, "grad_norm": 1.0888456522416476, "learning_rate": 3.2450972657793605e-05, "loss": 0.2001, "step": 14220 }, { "epoch": 1.6863512391794142, "grad_norm": 0.8143565102214538, "learning_rate": 3.2448681349794386e-05, "loss": 0.1624, "step": 14221 }, { "epoch": 1.6864698209415392, "grad_norm": 0.7383536725781559, "learning_rate": 3.2446389973127454e-05, "loss": 0.1408, "step": 14222 }, { "epoch": 1.6865884027036642, "grad_norm": 1.0293861580753407, "learning_rate": 3.244409852781395e-05, "loss": 0.2156, "step": 14223 }, { "epoch": 1.6867069844657891, "grad_norm": 0.9786622848496125, "learning_rate": 3.244180701387497e-05, "loss": 0.2176, "step": 14224 }, { "epoch": 1.6868255662279141, "grad_norm": 1.097613184227653, "learning_rate": 3.2439515431331675e-05, "loss": 0.1995, "step": 14225 }, { "epoch": 1.6869441479900391, "grad_norm": 0.7190377244710328, "learning_rate": 3.2437223780205156e-05, "loss": 0.1251, "step": 14226 }, { "epoch": 1.687062729752164, "grad_norm": 0.9648372815453783, "learning_rate": 3.243493206051655e-05, "loss": 0.1772, "step": 14227 }, { "epoch": 1.687181311514289, "grad_norm": 0.9917699936904283, "learning_rate": 3.243264027228701e-05, "loss": 0.1764, "step": 14228 }, { "epoch": 1.687299893276414, "grad_norm": 0.8923621209566635, "learning_rate": 3.243034841553762e-05, "loss": 0.1411, "step": 14229 }, { "epoch": 1.687418475038539, "grad_norm": 0.827718758083886, "learning_rate": 3.242805649028953e-05, "loss": 0.1648, "step": 14230 }, { "epoch": 1.687537056800664, "grad_norm": 0.8547815251724015, "learning_rate": 3.2425764496563875e-05, "loss": 0.1552, "step": 14231 }, { "epoch": 1.687655638562789, "grad_norm": 0.7496940968637957, "learning_rate": 3.242347243438178e-05, "loss": 0.1469, "step": 14232 }, { "epoch": 1.687774220324914, "grad_norm": 0.8914162938190356, "learning_rate": 3.242118030376437e-05, "loss": 0.2186, "step": 14233 }, { "epoch": 1.6878928020870392, "grad_norm": 0.8315333114513248, "learning_rate": 3.241888810473278e-05, "loss": 0.16, "step": 14234 }, { "epoch": 1.688011383849164, "grad_norm": 1.1728224040948239, "learning_rate": 3.241659583730813e-05, "loss": 0.2113, "step": 14235 }, { "epoch": 1.6881299656112891, "grad_norm": 0.800048859978388, "learning_rate": 3.241430350151157e-05, "loss": 0.1556, "step": 14236 }, { "epoch": 1.688248547373414, "grad_norm": 0.7857848612107878, "learning_rate": 3.2412011097364226e-05, "loss": 0.1501, "step": 14237 }, { "epoch": 1.688367129135539, "grad_norm": 1.3479007019800349, "learning_rate": 3.2409718624887225e-05, "loss": 0.2792, "step": 14238 }, { "epoch": 1.6884857108976639, "grad_norm": 0.957391662178435, "learning_rate": 3.24074260841017e-05, "loss": 0.1931, "step": 14239 }, { "epoch": 1.688604292659789, "grad_norm": 0.8840508991047685, "learning_rate": 3.240513347502879e-05, "loss": 0.2189, "step": 14240 }, { "epoch": 1.6887228744219138, "grad_norm": 1.3952749182483883, "learning_rate": 3.2402840797689636e-05, "loss": 0.2497, "step": 14241 }, { "epoch": 1.688841456184039, "grad_norm": 1.2936862500617867, "learning_rate": 3.240054805210536e-05, "loss": 0.2476, "step": 14242 }, { "epoch": 1.6889600379461638, "grad_norm": 0.8263270609879297, "learning_rate": 3.239825523829712e-05, "loss": 0.1887, "step": 14243 }, { "epoch": 1.689078619708289, "grad_norm": 0.8188779133838462, "learning_rate": 3.239596235628603e-05, "loss": 0.187, "step": 14244 }, { "epoch": 1.6891972014704137, "grad_norm": 0.5534406163418794, "learning_rate": 3.239366940609324e-05, "loss": 0.1419, "step": 14245 }, { "epoch": 1.689315783232539, "grad_norm": 0.9410360598056432, "learning_rate": 3.239137638773988e-05, "loss": 0.1927, "step": 14246 }, { "epoch": 1.6894343649946637, "grad_norm": 0.8424012487308424, "learning_rate": 3.2389083301247096e-05, "loss": 0.1313, "step": 14247 }, { "epoch": 1.689552946756789, "grad_norm": 0.9047070330292294, "learning_rate": 3.2386790146636025e-05, "loss": 0.1821, "step": 14248 }, { "epoch": 1.6896715285189137, "grad_norm": 0.8681897831646453, "learning_rate": 3.238449692392781e-05, "loss": 0.1729, "step": 14249 }, { "epoch": 1.6897901102810389, "grad_norm": 0.8356865672771268, "learning_rate": 3.238220363314358e-05, "loss": 0.1478, "step": 14250 }, { "epoch": 1.6899086920431636, "grad_norm": 0.9128241354425364, "learning_rate": 3.237991027430449e-05, "loss": 0.1875, "step": 14251 }, { "epoch": 1.6900272738052888, "grad_norm": 0.8379980327344866, "learning_rate": 3.237761684743168e-05, "loss": 0.1585, "step": 14252 }, { "epoch": 1.6901458555674136, "grad_norm": 0.8404635213215531, "learning_rate": 3.237532335254629e-05, "loss": 0.1627, "step": 14253 }, { "epoch": 1.6902644373295388, "grad_norm": 1.062552381039293, "learning_rate": 3.237302978966946e-05, "loss": 0.1737, "step": 14254 }, { "epoch": 1.6903830190916636, "grad_norm": 0.9276685933600548, "learning_rate": 3.2370736158822333e-05, "loss": 0.1815, "step": 14255 }, { "epoch": 1.6905016008537888, "grad_norm": 0.9658675007510016, "learning_rate": 3.236844246002607e-05, "loss": 0.1595, "step": 14256 }, { "epoch": 1.6906201826159135, "grad_norm": 0.8506749963960034, "learning_rate": 3.236614869330179e-05, "loss": 0.1581, "step": 14257 }, { "epoch": 1.6907387643780387, "grad_norm": 0.9771356684623155, "learning_rate": 3.236385485867066e-05, "loss": 0.1894, "step": 14258 }, { "epoch": 1.6908573461401637, "grad_norm": 0.8969976935039412, "learning_rate": 3.236156095615381e-05, "loss": 0.1785, "step": 14259 }, { "epoch": 1.6909759279022887, "grad_norm": 0.6489702016350414, "learning_rate": 3.2359266985772405e-05, "loss": 0.1575, "step": 14260 }, { "epoch": 1.6910945096644137, "grad_norm": 1.7732027576351277, "learning_rate": 3.2356972947547574e-05, "loss": 0.3555, "step": 14261 }, { "epoch": 1.6912130914265386, "grad_norm": 0.7184736013623478, "learning_rate": 3.2354678841500484e-05, "loss": 0.164, "step": 14262 }, { "epoch": 1.6913316731886636, "grad_norm": 1.0240644960556071, "learning_rate": 3.235238466765227e-05, "loss": 0.2389, "step": 14263 }, { "epoch": 1.6914502549507886, "grad_norm": 0.8086787002450174, "learning_rate": 3.235009042602409e-05, "loss": 0.1586, "step": 14264 }, { "epoch": 1.6915688367129136, "grad_norm": 0.9751905361469323, "learning_rate": 3.234779611663707e-05, "loss": 0.1773, "step": 14265 }, { "epoch": 1.6916874184750386, "grad_norm": 0.7571782265437204, "learning_rate": 3.234550173951241e-05, "loss": 0.1598, "step": 14266 }, { "epoch": 1.6918060002371635, "grad_norm": 1.1670019490346188, "learning_rate": 3.234320729467121e-05, "loss": 0.2232, "step": 14267 }, { "epoch": 1.6919245819992885, "grad_norm": 0.9623292975482933, "learning_rate": 3.234091278213465e-05, "loss": 0.206, "step": 14268 }, { "epoch": 1.6920431637614135, "grad_norm": 1.0690629285417756, "learning_rate": 3.233861820192388e-05, "loss": 0.2591, "step": 14269 }, { "epoch": 1.6921617455235385, "grad_norm": 0.7261249934773533, "learning_rate": 3.233632355406004e-05, "loss": 0.1827, "step": 14270 }, { "epoch": 1.6922803272856635, "grad_norm": 0.9836623615577823, "learning_rate": 3.2334028838564294e-05, "loss": 0.2555, "step": 14271 }, { "epoch": 1.6923989090477884, "grad_norm": 0.8678522337562645, "learning_rate": 3.2331734055457795e-05, "loss": 0.1694, "step": 14272 }, { "epoch": 1.6925174908099134, "grad_norm": 0.9216200911527571, "learning_rate": 3.232943920476171e-05, "loss": 0.1631, "step": 14273 }, { "epoch": 1.6926360725720384, "grad_norm": 0.8767130256723576, "learning_rate": 3.2327144286497176e-05, "loss": 0.1835, "step": 14274 }, { "epoch": 1.6927546543341634, "grad_norm": 0.8381689183473718, "learning_rate": 3.232484930068536e-05, "loss": 0.1934, "step": 14275 }, { "epoch": 1.6928732360962884, "grad_norm": 1.1405684477248532, "learning_rate": 3.232255424734741e-05, "loss": 0.1738, "step": 14276 }, { "epoch": 1.6929918178584134, "grad_norm": 0.857045702255255, "learning_rate": 3.23202591265045e-05, "loss": 0.206, "step": 14277 }, { "epoch": 1.6931103996205383, "grad_norm": 1.0265298322474004, "learning_rate": 3.231796393817776e-05, "loss": 0.1798, "step": 14278 }, { "epoch": 1.6932289813826633, "grad_norm": 0.6773863268485648, "learning_rate": 3.231566868238838e-05, "loss": 0.1567, "step": 14279 }, { "epoch": 1.6933475631447883, "grad_norm": 0.7133562260945336, "learning_rate": 3.231337335915751e-05, "loss": 0.1202, "step": 14280 }, { "epoch": 1.6934661449069133, "grad_norm": 1.0181678445902773, "learning_rate": 3.23110779685063e-05, "loss": 0.2152, "step": 14281 }, { "epoch": 1.6935847266690383, "grad_norm": 0.9012381187787735, "learning_rate": 3.230878251045591e-05, "loss": 0.1714, "step": 14282 }, { "epoch": 1.6937033084311632, "grad_norm": 1.0545445204396877, "learning_rate": 3.230648698502752e-05, "loss": 0.2061, "step": 14283 }, { "epoch": 1.6938218901932882, "grad_norm": 0.7643397508945838, "learning_rate": 3.2304191392242266e-05, "loss": 0.1578, "step": 14284 }, { "epoch": 1.6939404719554134, "grad_norm": 0.9102177456013667, "learning_rate": 3.230189573212133e-05, "loss": 0.1853, "step": 14285 }, { "epoch": 1.6940590537175382, "grad_norm": 0.8300364436745771, "learning_rate": 3.229960000468587e-05, "loss": 0.1616, "step": 14286 }, { "epoch": 1.6941776354796634, "grad_norm": 1.0084698586782475, "learning_rate": 3.229730420995705e-05, "loss": 0.2817, "step": 14287 }, { "epoch": 1.6942962172417881, "grad_norm": 0.9636559146502206, "learning_rate": 3.229500834795604e-05, "loss": 0.1892, "step": 14288 }, { "epoch": 1.6944147990039133, "grad_norm": 1.139425922398314, "learning_rate": 3.229271241870399e-05, "loss": 0.2079, "step": 14289 }, { "epoch": 1.694533380766038, "grad_norm": 0.7620230616739522, "learning_rate": 3.2290416422222074e-05, "loss": 0.1705, "step": 14290 }, { "epoch": 1.6946519625281633, "grad_norm": 0.9814617053722875, "learning_rate": 3.228812035853146e-05, "loss": 0.1945, "step": 14291 }, { "epoch": 1.694770544290288, "grad_norm": 0.7171947072142744, "learning_rate": 3.228582422765332e-05, "loss": 0.1257, "step": 14292 }, { "epoch": 1.6948891260524133, "grad_norm": 0.950484921752032, "learning_rate": 3.228352802960881e-05, "loss": 0.1918, "step": 14293 }, { "epoch": 1.695007707814538, "grad_norm": 1.0428456605138074, "learning_rate": 3.22812317644191e-05, "loss": 0.237, "step": 14294 }, { "epoch": 1.6951262895766632, "grad_norm": 0.9159808356419789, "learning_rate": 3.227893543210537e-05, "loss": 0.1978, "step": 14295 }, { "epoch": 1.695244871338788, "grad_norm": 0.878480374632727, "learning_rate": 3.227663903268878e-05, "loss": 0.1673, "step": 14296 }, { "epoch": 1.6953634531009132, "grad_norm": 0.8997148555635859, "learning_rate": 3.22743425661905e-05, "loss": 0.1629, "step": 14297 }, { "epoch": 1.695482034863038, "grad_norm": 1.3192942288418892, "learning_rate": 3.227204603263169e-05, "loss": 0.2537, "step": 14298 }, { "epoch": 1.6956006166251631, "grad_norm": 0.7809831542144268, "learning_rate": 3.226974943203355e-05, "loss": 0.1708, "step": 14299 }, { "epoch": 1.695719198387288, "grad_norm": 1.0755577495876862, "learning_rate": 3.226745276441723e-05, "loss": 0.2065, "step": 14300 }, { "epoch": 1.695837780149413, "grad_norm": 0.7580451012646419, "learning_rate": 3.22651560298039e-05, "loss": 0.1819, "step": 14301 }, { "epoch": 1.6959563619115379, "grad_norm": 0.7659848256761227, "learning_rate": 3.2262859228214746e-05, "loss": 0.191, "step": 14302 }, { "epoch": 1.696074943673663, "grad_norm": 0.9064800590510798, "learning_rate": 3.2260562359670935e-05, "loss": 0.1769, "step": 14303 }, { "epoch": 1.6961935254357878, "grad_norm": 1.0908998892512758, "learning_rate": 3.2258265424193646e-05, "loss": 0.2267, "step": 14304 }, { "epoch": 1.696312107197913, "grad_norm": 0.9011951749173837, "learning_rate": 3.225596842180405e-05, "loss": 0.1577, "step": 14305 }, { "epoch": 1.6964306889600378, "grad_norm": 0.8920911156772733, "learning_rate": 3.2253671352523315e-05, "loss": 0.1834, "step": 14306 }, { "epoch": 1.696549270722163, "grad_norm": 1.0265449277024281, "learning_rate": 3.2251374216372645e-05, "loss": 0.2429, "step": 14307 }, { "epoch": 1.6966678524842878, "grad_norm": 1.3410379472545695, "learning_rate": 3.224907701337317e-05, "loss": 0.338, "step": 14308 }, { "epoch": 1.696786434246413, "grad_norm": 1.1830711028116796, "learning_rate": 3.2246779743546114e-05, "loss": 0.2714, "step": 14309 }, { "epoch": 1.696905016008538, "grad_norm": 0.8928709999156607, "learning_rate": 3.2244482406912626e-05, "loss": 0.2257, "step": 14310 }, { "epoch": 1.697023597770663, "grad_norm": 1.125398531543924, "learning_rate": 3.2242185003493896e-05, "loss": 0.2409, "step": 14311 }, { "epoch": 1.697142179532788, "grad_norm": 1.020751157119398, "learning_rate": 3.22398875333111e-05, "loss": 0.1834, "step": 14312 }, { "epoch": 1.6972607612949129, "grad_norm": 0.7817806795584271, "learning_rate": 3.223758999638542e-05, "loss": 0.1359, "step": 14313 }, { "epoch": 1.6973793430570379, "grad_norm": 0.6310987881217248, "learning_rate": 3.2235292392738045e-05, "loss": 0.1495, "step": 14314 }, { "epoch": 1.6974979248191628, "grad_norm": 0.9521644976480976, "learning_rate": 3.2232994722390136e-05, "loss": 0.232, "step": 14315 }, { "epoch": 1.6976165065812878, "grad_norm": 1.4881539454423385, "learning_rate": 3.223069698536289e-05, "loss": 0.2639, "step": 14316 }, { "epoch": 1.6977350883434128, "grad_norm": 1.082460481074212, "learning_rate": 3.222839918167748e-05, "loss": 0.235, "step": 14317 }, { "epoch": 1.6978536701055378, "grad_norm": 0.9187106016533045, "learning_rate": 3.2226101311355104e-05, "loss": 0.1737, "step": 14318 }, { "epoch": 1.6979722518676628, "grad_norm": 1.0688307675853228, "learning_rate": 3.2223803374416924e-05, "loss": 0.2367, "step": 14319 }, { "epoch": 1.6980908336297877, "grad_norm": 0.9170388538497265, "learning_rate": 3.222150537088413e-05, "loss": 0.1547, "step": 14320 }, { "epoch": 1.6982094153919127, "grad_norm": 0.9391343938354756, "learning_rate": 3.221920730077793e-05, "loss": 0.1974, "step": 14321 }, { "epoch": 1.6983279971540377, "grad_norm": 0.7923324580800825, "learning_rate": 3.221690916411948e-05, "loss": 0.1798, "step": 14322 }, { "epoch": 1.6984465789161627, "grad_norm": 0.9338046786413092, "learning_rate": 3.221461096092998e-05, "loss": 0.2055, "step": 14323 }, { "epoch": 1.6985651606782877, "grad_norm": 0.714069990832568, "learning_rate": 3.221231269123062e-05, "loss": 0.1733, "step": 14324 }, { "epoch": 1.6986837424404126, "grad_norm": 0.9065108220189639, "learning_rate": 3.221001435504257e-05, "loss": 0.2207, "step": 14325 }, { "epoch": 1.6988023242025376, "grad_norm": 0.8354394537242456, "learning_rate": 3.220771595238703e-05, "loss": 0.1568, "step": 14326 }, { "epoch": 1.6989209059646626, "grad_norm": 0.7879303006569716, "learning_rate": 3.2205417483285193e-05, "loss": 0.2342, "step": 14327 }, { "epoch": 1.6990394877267876, "grad_norm": 0.856687765474829, "learning_rate": 3.220311894775824e-05, "loss": 0.1771, "step": 14328 }, { "epoch": 1.6991580694889126, "grad_norm": 1.1320120803381037, "learning_rate": 3.220082034582737e-05, "loss": 0.2494, "step": 14329 }, { "epoch": 1.6992766512510376, "grad_norm": 1.0983544129291527, "learning_rate": 3.219852167751376e-05, "loss": 0.2675, "step": 14330 }, { "epoch": 1.6993952330131625, "grad_norm": 1.0187832635859149, "learning_rate": 3.219622294283861e-05, "loss": 0.2202, "step": 14331 }, { "epoch": 1.6995138147752875, "grad_norm": 1.2233941274895865, "learning_rate": 3.2193924141823106e-05, "loss": 0.3193, "step": 14332 }, { "epoch": 1.6996323965374125, "grad_norm": 0.8376101828071144, "learning_rate": 3.219162527448845e-05, "loss": 0.178, "step": 14333 }, { "epoch": 1.6997509782995377, "grad_norm": 1.0863554769078119, "learning_rate": 3.218932634085582e-05, "loss": 0.2376, "step": 14334 }, { "epoch": 1.6998695600616625, "grad_norm": 1.1392973531363515, "learning_rate": 3.2187027340946424e-05, "loss": 0.2324, "step": 14335 }, { "epoch": 1.6999881418237877, "grad_norm": 1.1596004856360398, "learning_rate": 3.2184728274781446e-05, "loss": 0.2397, "step": 14336 }, { "epoch": 1.7001067235859124, "grad_norm": 1.0485282450148024, "learning_rate": 3.218242914238209e-05, "loss": 0.2537, "step": 14337 }, { "epoch": 1.7002253053480376, "grad_norm": 0.9311356830322288, "learning_rate": 3.218012994376953e-05, "loss": 0.2216, "step": 14338 }, { "epoch": 1.7003438871101624, "grad_norm": 1.112709561758907, "learning_rate": 3.217783067896498e-05, "loss": 0.1778, "step": 14339 }, { "epoch": 1.7004624688722876, "grad_norm": 1.0298722933919786, "learning_rate": 3.217553134798964e-05, "loss": 0.2305, "step": 14340 }, { "epoch": 1.7005810506344123, "grad_norm": 0.9383487513465345, "learning_rate": 3.2173231950864696e-05, "loss": 0.2033, "step": 14341 }, { "epoch": 1.7006996323965375, "grad_norm": 0.8722290617473583, "learning_rate": 3.217093248761136e-05, "loss": 0.2334, "step": 14342 }, { "epoch": 1.7008182141586623, "grad_norm": 1.1348551372330216, "learning_rate": 3.216863295825081e-05, "loss": 0.2602, "step": 14343 }, { "epoch": 1.7009367959207875, "grad_norm": 0.7470808335404696, "learning_rate": 3.2166333362804256e-05, "loss": 0.1536, "step": 14344 }, { "epoch": 1.7010553776829123, "grad_norm": 1.0301171212960625, "learning_rate": 3.21640337012929e-05, "loss": 0.1996, "step": 14345 }, { "epoch": 1.7011739594450375, "grad_norm": 0.6222124543088158, "learning_rate": 3.216173397373794e-05, "loss": 0.1306, "step": 14346 }, { "epoch": 1.7012925412071622, "grad_norm": 1.9380448676891942, "learning_rate": 3.215943418016057e-05, "loss": 0.4509, "step": 14347 }, { "epoch": 1.7014111229692874, "grad_norm": 0.9576820265239561, "learning_rate": 3.2157134320582e-05, "loss": 0.228, "step": 14348 }, { "epoch": 1.7015297047314122, "grad_norm": 0.7868847850207586, "learning_rate": 3.215483439502342e-05, "loss": 0.1628, "step": 14349 }, { "epoch": 1.7016482864935374, "grad_norm": 1.0860000611034908, "learning_rate": 3.2152534403506046e-05, "loss": 0.2482, "step": 14350 }, { "epoch": 1.7017668682556621, "grad_norm": 0.8382223751022105, "learning_rate": 3.215023434605108e-05, "loss": 0.1914, "step": 14351 }, { "epoch": 1.7018854500177873, "grad_norm": 0.9214465297032294, "learning_rate": 3.2147934222679714e-05, "loss": 0.1696, "step": 14352 }, { "epoch": 1.702004031779912, "grad_norm": 1.1731679409524944, "learning_rate": 3.2145634033413165e-05, "loss": 0.2454, "step": 14353 }, { "epoch": 1.7021226135420373, "grad_norm": 1.3920866963807712, "learning_rate": 3.214333377827263e-05, "loss": 0.2976, "step": 14354 }, { "epoch": 1.702241195304162, "grad_norm": 0.7516369243577307, "learning_rate": 3.214103345727932e-05, "loss": 0.1316, "step": 14355 }, { "epoch": 1.7023597770662873, "grad_norm": 1.0042020473530435, "learning_rate": 3.213873307045443e-05, "loss": 0.2031, "step": 14356 }, { "epoch": 1.702478358828412, "grad_norm": 1.2436057194094277, "learning_rate": 3.213643261781919e-05, "loss": 0.2655, "step": 14357 }, { "epoch": 1.7025969405905372, "grad_norm": 0.9988436050462884, "learning_rate": 3.2134132099394785e-05, "loss": 0.2326, "step": 14358 }, { "epoch": 1.7027155223526622, "grad_norm": 0.7582132336274057, "learning_rate": 3.2131831515202424e-05, "loss": 0.1549, "step": 14359 }, { "epoch": 1.7028341041147872, "grad_norm": 0.7838316232042197, "learning_rate": 3.212953086526333e-05, "loss": 0.1634, "step": 14360 }, { "epoch": 1.7029526858769122, "grad_norm": 0.8156811148349372, "learning_rate": 3.2127230149598705e-05, "loss": 0.1974, "step": 14361 }, { "epoch": 1.7030712676390372, "grad_norm": 1.0278841209748253, "learning_rate": 3.212492936822975e-05, "loss": 0.1902, "step": 14362 }, { "epoch": 1.7031898494011621, "grad_norm": 0.9891798769655182, "learning_rate": 3.21226285211777e-05, "loss": 0.2114, "step": 14363 }, { "epoch": 1.7033084311632871, "grad_norm": 1.0232011285856142, "learning_rate": 3.212032760846374e-05, "loss": 0.1523, "step": 14364 }, { "epoch": 1.703427012925412, "grad_norm": 1.1735963085960446, "learning_rate": 3.211802663010909e-05, "loss": 0.2042, "step": 14365 }, { "epoch": 1.703545594687537, "grad_norm": 0.9256364140426385, "learning_rate": 3.211572558613496e-05, "loss": 0.1884, "step": 14366 }, { "epoch": 1.703664176449662, "grad_norm": 1.1856690785888904, "learning_rate": 3.211342447656257e-05, "loss": 0.3231, "step": 14367 }, { "epoch": 1.703782758211787, "grad_norm": 0.6223672381479726, "learning_rate": 3.2111123301413125e-05, "loss": 0.1332, "step": 14368 }, { "epoch": 1.703901339973912, "grad_norm": 0.9280122208304437, "learning_rate": 3.210882206070784e-05, "loss": 0.2137, "step": 14369 }, { "epoch": 1.704019921736037, "grad_norm": 0.7933607842249649, "learning_rate": 3.210652075446794e-05, "loss": 0.148, "step": 14370 }, { "epoch": 1.704138503498162, "grad_norm": 0.8177689811325, "learning_rate": 3.2104219382714634e-05, "loss": 0.1521, "step": 14371 }, { "epoch": 1.704257085260287, "grad_norm": 0.9672172544348444, "learning_rate": 3.210191794546914e-05, "loss": 0.1784, "step": 14372 }, { "epoch": 1.704375667022412, "grad_norm": 0.8651484599126852, "learning_rate": 3.2099616442752664e-05, "loss": 0.2006, "step": 14373 }, { "epoch": 1.704494248784537, "grad_norm": 1.0411017717012006, "learning_rate": 3.2097314874586434e-05, "loss": 0.1767, "step": 14374 }, { "epoch": 1.704612830546662, "grad_norm": 0.8963069927170062, "learning_rate": 3.209501324099167e-05, "loss": 0.1779, "step": 14375 }, { "epoch": 1.7047314123087869, "grad_norm": 0.8736136327189761, "learning_rate": 3.209271154198958e-05, "loss": 0.2011, "step": 14376 }, { "epoch": 1.7048499940709119, "grad_norm": 1.3124292852641823, "learning_rate": 3.2090409777601384e-05, "loss": 0.2738, "step": 14377 }, { "epoch": 1.7049685758330368, "grad_norm": 1.05992843279346, "learning_rate": 3.208810794784831e-05, "loss": 0.2229, "step": 14378 }, { "epoch": 1.7050871575951618, "grad_norm": 0.981017518873297, "learning_rate": 3.2085806052751565e-05, "loss": 0.1746, "step": 14379 }, { "epoch": 1.7052057393572868, "grad_norm": 0.9036265900102088, "learning_rate": 3.208350409233238e-05, "loss": 0.2209, "step": 14380 }, { "epoch": 1.7053243211194118, "grad_norm": 1.1464752497406812, "learning_rate": 3.208120206661197e-05, "loss": 0.2591, "step": 14381 }, { "epoch": 1.7054429028815368, "grad_norm": 0.7523007703751239, "learning_rate": 3.207889997561157e-05, "loss": 0.1714, "step": 14382 }, { "epoch": 1.7055614846436618, "grad_norm": 0.6646801582795583, "learning_rate": 3.207659781935239e-05, "loss": 0.1582, "step": 14383 }, { "epoch": 1.7056800664057867, "grad_norm": 0.724726640756388, "learning_rate": 3.207429559785565e-05, "loss": 0.1659, "step": 14384 }, { "epoch": 1.705798648167912, "grad_norm": 0.8541630499487631, "learning_rate": 3.207199331114259e-05, "loss": 0.1417, "step": 14385 }, { "epoch": 1.7059172299300367, "grad_norm": 0.7007196704498605, "learning_rate": 3.206969095923441e-05, "loss": 0.1577, "step": 14386 }, { "epoch": 1.706035811692162, "grad_norm": 1.1821680770203713, "learning_rate": 3.206738854215237e-05, "loss": 0.2503, "step": 14387 }, { "epoch": 1.7061543934542867, "grad_norm": 0.9030027573591347, "learning_rate": 3.206508605991765e-05, "loss": 0.2073, "step": 14388 }, { "epoch": 1.7062729752164119, "grad_norm": 0.7834400114695371, "learning_rate": 3.206278351255151e-05, "loss": 0.163, "step": 14389 }, { "epoch": 1.7063915569785366, "grad_norm": 0.6964224228006413, "learning_rate": 3.206048090007517e-05, "loss": 0.171, "step": 14390 }, { "epoch": 1.7065101387406618, "grad_norm": 0.8464832744019939, "learning_rate": 3.2058178222509854e-05, "loss": 0.1823, "step": 14391 }, { "epoch": 1.7066287205027866, "grad_norm": 0.5881905811237658, "learning_rate": 3.205587547987678e-05, "loss": 0.1487, "step": 14392 }, { "epoch": 1.7067473022649118, "grad_norm": 0.8637996161147478, "learning_rate": 3.2053572672197206e-05, "loss": 0.1172, "step": 14393 }, { "epoch": 1.7068658840270365, "grad_norm": 0.897821095796049, "learning_rate": 3.2051269799492326e-05, "loss": 0.2323, "step": 14394 }, { "epoch": 1.7069844657891617, "grad_norm": 1.3442617969415227, "learning_rate": 3.2048966861783394e-05, "loss": 0.233, "step": 14395 }, { "epoch": 1.7071030475512865, "grad_norm": 0.7950285480812964, "learning_rate": 3.2046663859091625e-05, "loss": 0.1609, "step": 14396 }, { "epoch": 1.7072216293134117, "grad_norm": 0.9085797349747294, "learning_rate": 3.204436079143826e-05, "loss": 0.2348, "step": 14397 }, { "epoch": 1.7073402110755365, "grad_norm": 1.32027297954256, "learning_rate": 3.2042057658844525e-05, "loss": 0.217, "step": 14398 }, { "epoch": 1.7074587928376617, "grad_norm": 0.7015987263759242, "learning_rate": 3.2039754461331655e-05, "loss": 0.1374, "step": 14399 }, { "epoch": 1.7075773745997864, "grad_norm": 1.207298092078933, "learning_rate": 3.203745119892088e-05, "loss": 0.2366, "step": 14400 }, { "epoch": 1.7076959563619116, "grad_norm": 0.5733050438931029, "learning_rate": 3.2035147871633436e-05, "loss": 0.1328, "step": 14401 }, { "epoch": 1.7078145381240364, "grad_norm": 0.6859523994557106, "learning_rate": 3.203284447949056e-05, "loss": 0.1391, "step": 14402 }, { "epoch": 1.7079331198861616, "grad_norm": 1.026465653364107, "learning_rate": 3.2030541022513475e-05, "loss": 0.2301, "step": 14403 }, { "epoch": 1.7080517016482863, "grad_norm": 1.1178728951405832, "learning_rate": 3.202823750072343e-05, "loss": 0.1783, "step": 14404 }, { "epoch": 1.7081702834104115, "grad_norm": 1.0310775165539028, "learning_rate": 3.202593391414165e-05, "loss": 0.2206, "step": 14405 }, { "epoch": 1.7082888651725363, "grad_norm": 0.7659303446911929, "learning_rate": 3.2023630262789375e-05, "loss": 0.1729, "step": 14406 }, { "epoch": 1.7084074469346615, "grad_norm": 0.7804771486671647, "learning_rate": 3.202132654668784e-05, "loss": 0.164, "step": 14407 }, { "epoch": 1.7085260286967863, "grad_norm": 0.8717018199386842, "learning_rate": 3.201902276585828e-05, "loss": 0.1714, "step": 14408 }, { "epoch": 1.7086446104589115, "grad_norm": 0.7940625192900783, "learning_rate": 3.201671892032195e-05, "loss": 0.1388, "step": 14409 }, { "epoch": 1.7087631922210365, "grad_norm": 0.9017368388438781, "learning_rate": 3.201441501010007e-05, "loss": 0.2225, "step": 14410 }, { "epoch": 1.7088817739831614, "grad_norm": 0.7926553873046902, "learning_rate": 3.2012111035213885e-05, "loss": 0.1898, "step": 14411 }, { "epoch": 1.7090003557452864, "grad_norm": 1.6557510366067474, "learning_rate": 3.200980699568463e-05, "loss": 0.3016, "step": 14412 }, { "epoch": 1.7091189375074114, "grad_norm": 1.1992075927114936, "learning_rate": 3.200750289153356e-05, "loss": 0.2226, "step": 14413 }, { "epoch": 1.7092375192695364, "grad_norm": 0.7734003556793038, "learning_rate": 3.2005198722781905e-05, "loss": 0.1768, "step": 14414 }, { "epoch": 1.7093561010316614, "grad_norm": 1.3195009908092206, "learning_rate": 3.200289448945091e-05, "loss": 0.2834, "step": 14415 }, { "epoch": 1.7094746827937863, "grad_norm": 1.012636333412804, "learning_rate": 3.20005901915618e-05, "loss": 0.1815, "step": 14416 }, { "epoch": 1.7095932645559113, "grad_norm": 1.505726959635743, "learning_rate": 3.199828582913585e-05, "loss": 0.3652, "step": 14417 }, { "epoch": 1.7097118463180363, "grad_norm": 1.3079484695785732, "learning_rate": 3.199598140219428e-05, "loss": 0.2513, "step": 14418 }, { "epoch": 1.7098304280801613, "grad_norm": 1.1767643983439344, "learning_rate": 3.199367691075834e-05, "loss": 0.2825, "step": 14419 }, { "epoch": 1.7099490098422863, "grad_norm": 0.8973611552476327, "learning_rate": 3.199137235484927e-05, "loss": 0.1243, "step": 14420 }, { "epoch": 1.7100675916044112, "grad_norm": 0.8312354682888484, "learning_rate": 3.1989067734488335e-05, "loss": 0.1287, "step": 14421 }, { "epoch": 1.7101861733665362, "grad_norm": 0.8103411608119966, "learning_rate": 3.1986763049696753e-05, "loss": 0.1335, "step": 14422 }, { "epoch": 1.7103047551286612, "grad_norm": 1.199392976776145, "learning_rate": 3.198445830049579e-05, "loss": 0.2333, "step": 14423 }, { "epoch": 1.7104233368907862, "grad_norm": 0.7904740900447842, "learning_rate": 3.198215348690668e-05, "loss": 0.1563, "step": 14424 }, { "epoch": 1.7105419186529112, "grad_norm": 0.8613864847459698, "learning_rate": 3.197984860895069e-05, "loss": 0.2045, "step": 14425 }, { "epoch": 1.7106605004150361, "grad_norm": 0.5740395435457274, "learning_rate": 3.197754366664904e-05, "loss": 0.1137, "step": 14426 }, { "epoch": 1.7107790821771611, "grad_norm": 0.8255860097000646, "learning_rate": 3.1975238660022996e-05, "loss": 0.2004, "step": 14427 }, { "epoch": 1.710897663939286, "grad_norm": 1.0083153750957559, "learning_rate": 3.1972933589093814e-05, "loss": 0.2223, "step": 14428 }, { "epoch": 1.711016245701411, "grad_norm": 0.9552836604904347, "learning_rate": 3.1970628453882725e-05, "loss": 0.2089, "step": 14429 }, { "epoch": 1.711134827463536, "grad_norm": 1.801131278881435, "learning_rate": 3.1968323254411e-05, "loss": 0.454, "step": 14430 }, { "epoch": 1.711253409225661, "grad_norm": 0.9183282144579255, "learning_rate": 3.196601799069987e-05, "loss": 0.1873, "step": 14431 }, { "epoch": 1.711371990987786, "grad_norm": 0.9465792578950817, "learning_rate": 3.1963712662770606e-05, "loss": 0.2504, "step": 14432 }, { "epoch": 1.711490572749911, "grad_norm": 0.7555496378763414, "learning_rate": 3.1961407270644436e-05, "loss": 0.1287, "step": 14433 }, { "epoch": 1.7116091545120362, "grad_norm": 0.8323256270271441, "learning_rate": 3.1959101814342646e-05, "loss": 0.1891, "step": 14434 }, { "epoch": 1.711727736274161, "grad_norm": 1.1165041249613685, "learning_rate": 3.195679629388646e-05, "loss": 0.294, "step": 14435 }, { "epoch": 1.7118463180362862, "grad_norm": 1.3542579324728636, "learning_rate": 3.195449070929715e-05, "loss": 0.2636, "step": 14436 }, { "epoch": 1.711964899798411, "grad_norm": 1.3517711025316923, "learning_rate": 3.195218506059596e-05, "loss": 0.2875, "step": 14437 }, { "epoch": 1.7120834815605361, "grad_norm": 0.834909995254865, "learning_rate": 3.194987934780415e-05, "loss": 0.1645, "step": 14438 }, { "epoch": 1.712202063322661, "grad_norm": 0.8012395779549538, "learning_rate": 3.194757357094297e-05, "loss": 0.1913, "step": 14439 }, { "epoch": 1.712320645084786, "grad_norm": 0.79732840573039, "learning_rate": 3.1945267730033686e-05, "loss": 0.1733, "step": 14440 }, { "epoch": 1.7124392268469109, "grad_norm": 0.8590946993592105, "learning_rate": 3.194296182509755e-05, "loss": 0.1909, "step": 14441 }, { "epoch": 1.712557808609036, "grad_norm": 1.2009172648701005, "learning_rate": 3.194065585615581e-05, "loss": 0.3501, "step": 14442 }, { "epoch": 1.7126763903711608, "grad_norm": 1.1693442182592593, "learning_rate": 3.193834982322975e-05, "loss": 0.2055, "step": 14443 }, { "epoch": 1.712794972133286, "grad_norm": 0.9665331778877507, "learning_rate": 3.19360437263406e-05, "loss": 0.2199, "step": 14444 }, { "epoch": 1.7129135538954108, "grad_norm": 1.25097070289549, "learning_rate": 3.1933737565509645e-05, "loss": 0.2817, "step": 14445 }, { "epoch": 1.713032135657536, "grad_norm": 0.7072413272076769, "learning_rate": 3.1931431340758124e-05, "loss": 0.1373, "step": 14446 }, { "epoch": 1.7131507174196607, "grad_norm": 0.810403515792779, "learning_rate": 3.192912505210731e-05, "loss": 0.1488, "step": 14447 }, { "epoch": 1.713269299181786, "grad_norm": 0.7917813560033571, "learning_rate": 3.192681869957845e-05, "loss": 0.1323, "step": 14448 }, { "epoch": 1.7133878809439107, "grad_norm": 0.618844083562031, "learning_rate": 3.192451228319282e-05, "loss": 0.1104, "step": 14449 }, { "epoch": 1.713506462706036, "grad_norm": 1.4926167421919925, "learning_rate": 3.192220580297169e-05, "loss": 0.3628, "step": 14450 }, { "epoch": 1.7136250444681607, "grad_norm": 1.1214632623017813, "learning_rate": 3.19198992589363e-05, "loss": 0.2879, "step": 14451 }, { "epoch": 1.7137436262302859, "grad_norm": 0.7843127695533759, "learning_rate": 3.191759265110793e-05, "loss": 0.1737, "step": 14452 }, { "epoch": 1.7138622079924106, "grad_norm": 0.7532382931113236, "learning_rate": 3.191528597950784e-05, "loss": 0.1836, "step": 14453 }, { "epoch": 1.7139807897545358, "grad_norm": 0.956089162159907, "learning_rate": 3.191297924415729e-05, "loss": 0.235, "step": 14454 }, { "epoch": 1.7140993715166606, "grad_norm": 0.7601254303996268, "learning_rate": 3.1910672445077544e-05, "loss": 0.1711, "step": 14455 }, { "epoch": 1.7142179532787858, "grad_norm": 0.8127578512115333, "learning_rate": 3.190836558228988e-05, "loss": 0.1721, "step": 14456 }, { "epoch": 1.7143365350409105, "grad_norm": 0.8721585421318665, "learning_rate": 3.190605865581555e-05, "loss": 0.1829, "step": 14457 }, { "epoch": 1.7144551168030358, "grad_norm": 0.7232642153583739, "learning_rate": 3.190375166567584e-05, "loss": 0.1414, "step": 14458 }, { "epoch": 1.7145736985651607, "grad_norm": 0.8396165863250783, "learning_rate": 3.190144461189199e-05, "loss": 0.1581, "step": 14459 }, { "epoch": 1.7146922803272857, "grad_norm": 0.939144813352631, "learning_rate": 3.189913749448529e-05, "loss": 0.2342, "step": 14460 }, { "epoch": 1.7148108620894107, "grad_norm": 0.7733284907121141, "learning_rate": 3.1896830313477e-05, "loss": 0.2024, "step": 14461 }, { "epoch": 1.7149294438515357, "grad_norm": 0.8416801156159839, "learning_rate": 3.18945230688884e-05, "loss": 0.1444, "step": 14462 }, { "epoch": 1.7150480256136607, "grad_norm": 0.9890691389491307, "learning_rate": 3.189221576074075e-05, "loss": 0.1663, "step": 14463 }, { "epoch": 1.7151666073757856, "grad_norm": 1.5299653158251565, "learning_rate": 3.188990838905532e-05, "loss": 0.3492, "step": 14464 }, { "epoch": 1.7152851891379106, "grad_norm": 1.3525288257426356, "learning_rate": 3.188760095385338e-05, "loss": 0.2389, "step": 14465 }, { "epoch": 1.7154037709000356, "grad_norm": 1.2092095101000393, "learning_rate": 3.188529345515621e-05, "loss": 0.3104, "step": 14466 }, { "epoch": 1.7155223526621606, "grad_norm": 0.7222490010699802, "learning_rate": 3.188298589298507e-05, "loss": 0.1803, "step": 14467 }, { "epoch": 1.7156409344242856, "grad_norm": 1.5314844263476861, "learning_rate": 3.1880678267361255e-05, "loss": 0.2898, "step": 14468 }, { "epoch": 1.7157595161864105, "grad_norm": 1.0589536622103803, "learning_rate": 3.187837057830601e-05, "loss": 0.2293, "step": 14469 }, { "epoch": 1.7158780979485355, "grad_norm": 0.8692651385264556, "learning_rate": 3.1876062825840624e-05, "loss": 0.1834, "step": 14470 }, { "epoch": 1.7159966797106605, "grad_norm": 0.8809533546791133, "learning_rate": 3.1873755009986386e-05, "loss": 0.208, "step": 14471 }, { "epoch": 1.7161152614727855, "grad_norm": 1.0042004019198645, "learning_rate": 3.187144713076454e-05, "loss": 0.2313, "step": 14472 }, { "epoch": 1.7162338432349105, "grad_norm": 0.9594416094795362, "learning_rate": 3.186913918819639e-05, "loss": 0.1747, "step": 14473 }, { "epoch": 1.7163524249970354, "grad_norm": 0.7783018548883502, "learning_rate": 3.186683118230319e-05, "loss": 0.1678, "step": 14474 }, { "epoch": 1.7164710067591604, "grad_norm": 0.6484011055315203, "learning_rate": 3.1864523113106236e-05, "loss": 0.135, "step": 14475 }, { "epoch": 1.7165895885212854, "grad_norm": 0.8111429776270523, "learning_rate": 3.186221498062679e-05, "loss": 0.1995, "step": 14476 }, { "epoch": 1.7167081702834104, "grad_norm": 0.9302421863355155, "learning_rate": 3.185990678488614e-05, "loss": 0.1929, "step": 14477 }, { "epoch": 1.7168267520455354, "grad_norm": 0.793150857126126, "learning_rate": 3.185759852590556e-05, "loss": 0.1509, "step": 14478 }, { "epoch": 1.7169453338076603, "grad_norm": 0.7944312864803509, "learning_rate": 3.185529020370633e-05, "loss": 0.1586, "step": 14479 }, { "epoch": 1.7170639155697853, "grad_norm": 0.8599864137641585, "learning_rate": 3.1852981818309724e-05, "loss": 0.2101, "step": 14480 }, { "epoch": 1.7171824973319103, "grad_norm": 0.8693744679999772, "learning_rate": 3.185067336973705e-05, "loss": 0.1665, "step": 14481 }, { "epoch": 1.7173010790940353, "grad_norm": 0.8081608916265947, "learning_rate": 3.184836485800955e-05, "loss": 0.1766, "step": 14482 }, { "epoch": 1.7174196608561603, "grad_norm": 0.9715105916256286, "learning_rate": 3.184605628314853e-05, "loss": 0.1701, "step": 14483 }, { "epoch": 1.7175382426182852, "grad_norm": 0.9581644864955162, "learning_rate": 3.184374764517526e-05, "loss": 0.1882, "step": 14484 }, { "epoch": 1.7176568243804105, "grad_norm": 0.8833760651880589, "learning_rate": 3.1841438944111035e-05, "loss": 0.2013, "step": 14485 }, { "epoch": 1.7177754061425352, "grad_norm": 1.0214604903444913, "learning_rate": 3.183913017997714e-05, "loss": 0.2223, "step": 14486 }, { "epoch": 1.7178939879046604, "grad_norm": 0.5776017659102184, "learning_rate": 3.1836821352794844e-05, "loss": 0.131, "step": 14487 }, { "epoch": 1.7180125696667852, "grad_norm": 0.9063835995731884, "learning_rate": 3.183451246258543e-05, "loss": 0.177, "step": 14488 }, { "epoch": 1.7181311514289104, "grad_norm": 1.3085849540585721, "learning_rate": 3.18322035093702e-05, "loss": 0.2757, "step": 14489 }, { "epoch": 1.7182497331910351, "grad_norm": 1.0870020520330599, "learning_rate": 3.1829894493170434e-05, "loss": 0.2189, "step": 14490 }, { "epoch": 1.7183683149531603, "grad_norm": 0.908359803694033, "learning_rate": 3.1827585414007416e-05, "loss": 0.1393, "step": 14491 }, { "epoch": 1.718486896715285, "grad_norm": 1.038490096805204, "learning_rate": 3.182527627190244e-05, "loss": 0.1598, "step": 14492 }, { "epoch": 1.7186054784774103, "grad_norm": 1.2932794047699467, "learning_rate": 3.1822967066876775e-05, "loss": 0.3022, "step": 14493 }, { "epoch": 1.718724060239535, "grad_norm": 1.1627319558844944, "learning_rate": 3.1820657798951726e-05, "loss": 0.2563, "step": 14494 }, { "epoch": 1.7188426420016603, "grad_norm": 1.2859874581480395, "learning_rate": 3.1818348468148576e-05, "loss": 0.2711, "step": 14495 }, { "epoch": 1.718961223763785, "grad_norm": 0.8738329261586313, "learning_rate": 3.1816039074488614e-05, "loss": 0.1693, "step": 14496 }, { "epoch": 1.7190798055259102, "grad_norm": 0.829466313832031, "learning_rate": 3.181372961799313e-05, "loss": 0.1599, "step": 14497 }, { "epoch": 1.719198387288035, "grad_norm": 0.9080881280498784, "learning_rate": 3.1811420098683404e-05, "loss": 0.1888, "step": 14498 }, { "epoch": 1.7193169690501602, "grad_norm": 1.2063387813040105, "learning_rate": 3.180911051658074e-05, "loss": 0.2997, "step": 14499 }, { "epoch": 1.719435550812285, "grad_norm": 0.6674956455369669, "learning_rate": 3.1806800871706436e-05, "loss": 0.1575, "step": 14500 }, { "epoch": 1.7195541325744101, "grad_norm": 1.2815825218293206, "learning_rate": 3.180449116408178e-05, "loss": 0.2943, "step": 14501 }, { "epoch": 1.719672714336535, "grad_norm": 0.8348729240341957, "learning_rate": 3.1802181393728046e-05, "loss": 0.1606, "step": 14502 }, { "epoch": 1.71979129609866, "grad_norm": 0.999547458441492, "learning_rate": 3.179987156066655e-05, "loss": 0.2131, "step": 14503 }, { "epoch": 1.7199098778607849, "grad_norm": 1.131820309122839, "learning_rate": 3.1797561664918575e-05, "loss": 0.2113, "step": 14504 }, { "epoch": 1.72002845962291, "grad_norm": 0.8241425445268646, "learning_rate": 3.179525170650542e-05, "loss": 0.2499, "step": 14505 }, { "epoch": 1.7201470413850348, "grad_norm": 1.0487402773783634, "learning_rate": 3.1792941685448366e-05, "loss": 0.2458, "step": 14506 }, { "epoch": 1.72026562314716, "grad_norm": 0.9065824519651341, "learning_rate": 3.1790631601768735e-05, "loss": 0.2162, "step": 14507 }, { "epoch": 1.7203842049092848, "grad_norm": 0.9726148633173768, "learning_rate": 3.17883214554878e-05, "loss": 0.1741, "step": 14508 }, { "epoch": 1.72050278667141, "grad_norm": 0.8307844591905694, "learning_rate": 3.178601124662686e-05, "loss": 0.168, "step": 14509 }, { "epoch": 1.720621368433535, "grad_norm": 0.8227844705909498, "learning_rate": 3.178370097520722e-05, "loss": 0.1375, "step": 14510 }, { "epoch": 1.72073995019566, "grad_norm": 0.7566125071633455, "learning_rate": 3.178139064125018e-05, "loss": 0.1541, "step": 14511 }, { "epoch": 1.720858531957785, "grad_norm": 0.9510337561407485, "learning_rate": 3.177908024477703e-05, "loss": 0.1731, "step": 14512 }, { "epoch": 1.72097711371991, "grad_norm": 1.7527500078133933, "learning_rate": 3.177676978580908e-05, "loss": 0.3238, "step": 14513 }, { "epoch": 1.721095695482035, "grad_norm": 0.7056392366113586, "learning_rate": 3.1774459264367616e-05, "loss": 0.1839, "step": 14514 }, { "epoch": 1.7212142772441599, "grad_norm": 0.7917041230967323, "learning_rate": 3.177214868047395e-05, "loss": 0.1616, "step": 14515 }, { "epoch": 1.7213328590062849, "grad_norm": 0.8645588110964616, "learning_rate": 3.176983803414938e-05, "loss": 0.1584, "step": 14516 }, { "epoch": 1.7214514407684098, "grad_norm": 0.9805061859537454, "learning_rate": 3.1767527325415195e-05, "loss": 0.2019, "step": 14517 }, { "epoch": 1.7215700225305348, "grad_norm": 1.2901144715957598, "learning_rate": 3.176521655429271e-05, "loss": 0.2659, "step": 14518 }, { "epoch": 1.7216886042926598, "grad_norm": 0.905454507554192, "learning_rate": 3.1762905720803226e-05, "loss": 0.1713, "step": 14519 }, { "epoch": 1.7218071860547848, "grad_norm": 0.9696607283145809, "learning_rate": 3.1760594824968056e-05, "loss": 0.2167, "step": 14520 }, { "epoch": 1.7219257678169098, "grad_norm": 0.9436504721834307, "learning_rate": 3.1758283866808474e-05, "loss": 0.2003, "step": 14521 }, { "epoch": 1.7220443495790347, "grad_norm": 0.8668647695847328, "learning_rate": 3.1755972846345816e-05, "loss": 0.1763, "step": 14522 }, { "epoch": 1.7221629313411597, "grad_norm": 0.8201692519258736, "learning_rate": 3.175366176360136e-05, "loss": 0.1608, "step": 14523 }, { "epoch": 1.7222815131032847, "grad_norm": 0.7972566510047387, "learning_rate": 3.175135061859644e-05, "loss": 0.1858, "step": 14524 }, { "epoch": 1.7224000948654097, "grad_norm": 1.0115792461850481, "learning_rate": 3.174903941135233e-05, "loss": 0.2111, "step": 14525 }, { "epoch": 1.7225186766275347, "grad_norm": 1.0401181340598753, "learning_rate": 3.174672814189037e-05, "loss": 0.2422, "step": 14526 }, { "epoch": 1.7226372583896596, "grad_norm": 1.1008857995270602, "learning_rate": 3.1744416810231837e-05, "loss": 0.1973, "step": 14527 }, { "epoch": 1.7227558401517846, "grad_norm": 0.7867896563390347, "learning_rate": 3.174210541639805e-05, "loss": 0.1782, "step": 14528 }, { "epoch": 1.7228744219139096, "grad_norm": 0.6187479793525065, "learning_rate": 3.1739793960410325e-05, "loss": 0.0961, "step": 14529 }, { "epoch": 1.7229930036760346, "grad_norm": 0.9962805683900869, "learning_rate": 3.173748244228996e-05, "loss": 0.1841, "step": 14530 }, { "epoch": 1.7231115854381596, "grad_norm": 1.1940362274642846, "learning_rate": 3.173517086205828e-05, "loss": 0.2741, "step": 14531 }, { "epoch": 1.7232301672002845, "grad_norm": 0.7958229115003665, "learning_rate": 3.173285921973657e-05, "loss": 0.1235, "step": 14532 }, { "epoch": 1.7233487489624095, "grad_norm": 0.8913708104572627, "learning_rate": 3.173054751534617e-05, "loss": 0.1707, "step": 14533 }, { "epoch": 1.7234673307245347, "grad_norm": 0.9908137164689471, "learning_rate": 3.172823574890836e-05, "loss": 0.2132, "step": 14534 }, { "epoch": 1.7235859124866595, "grad_norm": 0.808241532981632, "learning_rate": 3.1725923920444476e-05, "loss": 0.1921, "step": 14535 }, { "epoch": 1.7237044942487847, "grad_norm": 1.0061510629146675, "learning_rate": 3.172361202997582e-05, "loss": 0.1966, "step": 14536 }, { "epoch": 1.7238230760109094, "grad_norm": 0.8448355590640095, "learning_rate": 3.1721300077523705e-05, "loss": 0.1726, "step": 14537 }, { "epoch": 1.7239416577730347, "grad_norm": 0.9487251009104689, "learning_rate": 3.1718988063109436e-05, "loss": 0.1685, "step": 14538 }, { "epoch": 1.7240602395351594, "grad_norm": 1.189346051791899, "learning_rate": 3.171667598675435e-05, "loss": 0.188, "step": 14539 }, { "epoch": 1.7241788212972846, "grad_norm": 1.016714922959491, "learning_rate": 3.171436384847974e-05, "loss": 0.1669, "step": 14540 }, { "epoch": 1.7242974030594094, "grad_norm": 1.2061718095399188, "learning_rate": 3.171205164830693e-05, "loss": 0.1919, "step": 14541 }, { "epoch": 1.7244159848215346, "grad_norm": 0.9893678822433706, "learning_rate": 3.170973938625724e-05, "loss": 0.209, "step": 14542 }, { "epoch": 1.7245345665836593, "grad_norm": 0.6586631830792824, "learning_rate": 3.170742706235198e-05, "loss": 0.1459, "step": 14543 }, { "epoch": 1.7246531483457845, "grad_norm": 0.973762807934994, "learning_rate": 3.170511467661247e-05, "loss": 0.183, "step": 14544 }, { "epoch": 1.7247717301079093, "grad_norm": 1.3280351125221626, "learning_rate": 3.170280222906002e-05, "loss": 0.2568, "step": 14545 }, { "epoch": 1.7248903118700345, "grad_norm": 1.004824430485803, "learning_rate": 3.1700489719715956e-05, "loss": 0.2022, "step": 14546 }, { "epoch": 1.7250088936321593, "grad_norm": 0.8886490918581185, "learning_rate": 3.1698177148601595e-05, "loss": 0.2135, "step": 14547 }, { "epoch": 1.7251274753942845, "grad_norm": 1.1793397168902264, "learning_rate": 3.169586451573825e-05, "loss": 0.2246, "step": 14548 }, { "epoch": 1.7252460571564092, "grad_norm": 1.2359827850832896, "learning_rate": 3.169355182114724e-05, "loss": 0.2182, "step": 14549 }, { "epoch": 1.7253646389185344, "grad_norm": 1.0388691469616662, "learning_rate": 3.16912390648499e-05, "loss": 0.1664, "step": 14550 }, { "epoch": 1.7254832206806592, "grad_norm": 1.6728604780754945, "learning_rate": 3.1688926246867534e-05, "loss": 0.3015, "step": 14551 }, { "epoch": 1.7256018024427844, "grad_norm": 0.7428732951354476, "learning_rate": 3.1686613367221476e-05, "loss": 0.1739, "step": 14552 }, { "epoch": 1.7257203842049091, "grad_norm": 1.0511553399653415, "learning_rate": 3.1684300425933036e-05, "loss": 0.2352, "step": 14553 }, { "epoch": 1.7258389659670343, "grad_norm": 2.008443789479163, "learning_rate": 3.168198742302355e-05, "loss": 0.4831, "step": 14554 }, { "epoch": 1.725957547729159, "grad_norm": 1.3110276843950641, "learning_rate": 3.1679674358514335e-05, "loss": 0.2863, "step": 14555 }, { "epoch": 1.7260761294912843, "grad_norm": 1.0181102225666052, "learning_rate": 3.1677361232426704e-05, "loss": 0.1899, "step": 14556 }, { "epoch": 1.726194711253409, "grad_norm": 0.6860789077262858, "learning_rate": 3.1675048044782004e-05, "loss": 0.1553, "step": 14557 }, { "epoch": 1.7263132930155343, "grad_norm": 0.6816219434686372, "learning_rate": 3.167273479560153e-05, "loss": 0.1331, "step": 14558 }, { "epoch": 1.7264318747776592, "grad_norm": 0.7550833610404455, "learning_rate": 3.1670421484906635e-05, "loss": 0.1712, "step": 14559 }, { "epoch": 1.7265504565397842, "grad_norm": 0.9961086947219595, "learning_rate": 3.166810811271863e-05, "loss": 0.2045, "step": 14560 }, { "epoch": 1.7266690383019092, "grad_norm": 0.8420799147957879, "learning_rate": 3.1665794679058854e-05, "loss": 0.1726, "step": 14561 }, { "epoch": 1.7267876200640342, "grad_norm": 0.7567175492683405, "learning_rate": 3.1663481183948614e-05, "loss": 0.1751, "step": 14562 }, { "epoch": 1.7269062018261592, "grad_norm": 1.0151720802247088, "learning_rate": 3.1661167627409255e-05, "loss": 0.1996, "step": 14563 }, { "epoch": 1.7270247835882842, "grad_norm": 0.8140414757305688, "learning_rate": 3.1658854009462093e-05, "loss": 0.1773, "step": 14564 }, { "epoch": 1.7271433653504091, "grad_norm": 0.9275532328169278, "learning_rate": 3.165654033012847e-05, "loss": 0.1851, "step": 14565 }, { "epoch": 1.7272619471125341, "grad_norm": 0.8035196969120909, "learning_rate": 3.16542265894297e-05, "loss": 0.1629, "step": 14566 }, { "epoch": 1.727380528874659, "grad_norm": 0.838801866696642, "learning_rate": 3.165191278738713e-05, "loss": 0.2433, "step": 14567 }, { "epoch": 1.727499110636784, "grad_norm": 1.1776987855088579, "learning_rate": 3.164959892402207e-05, "loss": 0.3074, "step": 14568 }, { "epoch": 1.727617692398909, "grad_norm": 1.0205170180036778, "learning_rate": 3.164728499935587e-05, "loss": 0.2612, "step": 14569 }, { "epoch": 1.727736274161034, "grad_norm": 1.0445462073270133, "learning_rate": 3.164497101340986e-05, "loss": 0.2617, "step": 14570 }, { "epoch": 1.727854855923159, "grad_norm": 0.7260524627453735, "learning_rate": 3.1642656966205354e-05, "loss": 0.1674, "step": 14571 }, { "epoch": 1.727973437685284, "grad_norm": 0.830858008551378, "learning_rate": 3.164034285776371e-05, "loss": 0.1752, "step": 14572 }, { "epoch": 1.728092019447409, "grad_norm": 1.108440877848984, "learning_rate": 3.163802868810623e-05, "loss": 0.2058, "step": 14573 }, { "epoch": 1.728210601209534, "grad_norm": 0.7151242084311277, "learning_rate": 3.1635714457254275e-05, "loss": 0.1831, "step": 14574 }, { "epoch": 1.728329182971659, "grad_norm": 0.9413052227877987, "learning_rate": 3.1633400165229166e-05, "loss": 0.2308, "step": 14575 }, { "epoch": 1.728447764733784, "grad_norm": 0.9464620916904293, "learning_rate": 3.163108581205225e-05, "loss": 0.1895, "step": 14576 }, { "epoch": 1.728566346495909, "grad_norm": 0.9523520357798898, "learning_rate": 3.162877139774485e-05, "loss": 0.1841, "step": 14577 }, { "epoch": 1.7286849282580339, "grad_norm": 1.599303402928309, "learning_rate": 3.16264569223283e-05, "loss": 0.3194, "step": 14578 }, { "epoch": 1.7288035100201589, "grad_norm": 0.7017707138477537, "learning_rate": 3.162414238582395e-05, "loss": 0.1688, "step": 14579 }, { "epoch": 1.7289220917822838, "grad_norm": 1.060493981671192, "learning_rate": 3.162182778825313e-05, "loss": 0.1684, "step": 14580 }, { "epoch": 1.7290406735444088, "grad_norm": 0.7601197371626818, "learning_rate": 3.161951312963718e-05, "loss": 0.1362, "step": 14581 }, { "epoch": 1.7291592553065338, "grad_norm": 1.2950632929470187, "learning_rate": 3.1617198409997435e-05, "loss": 0.2586, "step": 14582 }, { "epoch": 1.7292778370686588, "grad_norm": 1.0317522655834799, "learning_rate": 3.161488362935523e-05, "loss": 0.2065, "step": 14583 }, { "epoch": 1.7293964188307838, "grad_norm": 0.6782691345509848, "learning_rate": 3.161256878773191e-05, "loss": 0.1371, "step": 14584 }, { "epoch": 1.729515000592909, "grad_norm": 0.7985036823169253, "learning_rate": 3.161025388514882e-05, "loss": 0.1513, "step": 14585 }, { "epoch": 1.7296335823550337, "grad_norm": 0.9648452353997926, "learning_rate": 3.1607938921627293e-05, "loss": 0.204, "step": 14586 }, { "epoch": 1.729752164117159, "grad_norm": 0.8430465558485014, "learning_rate": 3.160562389718867e-05, "loss": 0.2599, "step": 14587 }, { "epoch": 1.7298707458792837, "grad_norm": 0.7966728869493312, "learning_rate": 3.1603308811854305e-05, "loss": 0.1918, "step": 14588 }, { "epoch": 1.729989327641409, "grad_norm": 0.7246191189165064, "learning_rate": 3.1600993665645516e-05, "loss": 0.1508, "step": 14589 }, { "epoch": 1.7301079094035337, "grad_norm": 0.7071757025152917, "learning_rate": 3.159867845858367e-05, "loss": 0.1439, "step": 14590 }, { "epoch": 1.7302264911656589, "grad_norm": 1.136315322726186, "learning_rate": 3.1596363190690095e-05, "loss": 0.2378, "step": 14591 }, { "epoch": 1.7303450729277836, "grad_norm": 1.4774917302485608, "learning_rate": 3.159404786198614e-05, "loss": 0.3528, "step": 14592 }, { "epoch": 1.7304636546899088, "grad_norm": 1.0654817530331917, "learning_rate": 3.1591732472493164e-05, "loss": 0.2021, "step": 14593 }, { "epoch": 1.7305822364520336, "grad_norm": 0.9225743927354116, "learning_rate": 3.158941702223248e-05, "loss": 0.2455, "step": 14594 }, { "epoch": 1.7307008182141588, "grad_norm": 0.7872308672175207, "learning_rate": 3.158710151122546e-05, "loss": 0.1379, "step": 14595 }, { "epoch": 1.7308193999762835, "grad_norm": 0.8917213221501692, "learning_rate": 3.158478593949344e-05, "loss": 0.1863, "step": 14596 }, { "epoch": 1.7309379817384087, "grad_norm": 0.7908576061182048, "learning_rate": 3.158247030705777e-05, "loss": 0.1464, "step": 14597 }, { "epoch": 1.7310565635005335, "grad_norm": 0.9576004296033396, "learning_rate": 3.158015461393979e-05, "loss": 0.1866, "step": 14598 }, { "epoch": 1.7311751452626587, "grad_norm": 0.6751727429561115, "learning_rate": 3.157783886016085e-05, "loss": 0.1171, "step": 14599 }, { "epoch": 1.7312937270247835, "grad_norm": 0.7363996852075781, "learning_rate": 3.157552304574232e-05, "loss": 0.1479, "step": 14600 }, { "epoch": 1.7314123087869087, "grad_norm": 1.0778173419746042, "learning_rate": 3.1573207170705515e-05, "loss": 0.2107, "step": 14601 }, { "epoch": 1.7315308905490334, "grad_norm": 0.9959831348027576, "learning_rate": 3.157089123507181e-05, "loss": 0.2288, "step": 14602 }, { "epoch": 1.7316494723111586, "grad_norm": 0.674288171399284, "learning_rate": 3.156857523886254e-05, "loss": 0.1527, "step": 14603 }, { "epoch": 1.7317680540732834, "grad_norm": 0.9296508603858928, "learning_rate": 3.1566259182099064e-05, "loss": 0.2045, "step": 14604 }, { "epoch": 1.7318866358354086, "grad_norm": 0.8454764310049978, "learning_rate": 3.1563943064802725e-05, "loss": 0.1707, "step": 14605 }, { "epoch": 1.7320052175975333, "grad_norm": 0.8484282397264464, "learning_rate": 3.156162688699489e-05, "loss": 0.1963, "step": 14606 }, { "epoch": 1.7321237993596585, "grad_norm": 0.676255786693846, "learning_rate": 3.155931064869689e-05, "loss": 0.1232, "step": 14607 }, { "epoch": 1.7322423811217833, "grad_norm": 0.863420367854814, "learning_rate": 3.15569943499301e-05, "loss": 0.2294, "step": 14608 }, { "epoch": 1.7323609628839085, "grad_norm": 1.1163547090511354, "learning_rate": 3.155467799071585e-05, "loss": 0.2566, "step": 14609 }, { "epoch": 1.7324795446460335, "grad_norm": 0.8473735338153475, "learning_rate": 3.155236157107552e-05, "loss": 0.1706, "step": 14610 }, { "epoch": 1.7325981264081585, "grad_norm": 1.1931074992604154, "learning_rate": 3.1550045091030445e-05, "loss": 0.3056, "step": 14611 }, { "epoch": 1.7327167081702834, "grad_norm": 1.2011965730113097, "learning_rate": 3.154772855060198e-05, "loss": 0.2517, "step": 14612 }, { "epoch": 1.7328352899324084, "grad_norm": 0.9249255906668935, "learning_rate": 3.1545411949811506e-05, "loss": 0.1781, "step": 14613 }, { "epoch": 1.7329538716945334, "grad_norm": 0.9204208714281856, "learning_rate": 3.154309528868035e-05, "loss": 0.1341, "step": 14614 }, { "epoch": 1.7330724534566584, "grad_norm": 0.8442626442464138, "learning_rate": 3.154077856722988e-05, "loss": 0.1649, "step": 14615 }, { "epoch": 1.7331910352187834, "grad_norm": 0.9538591087898891, "learning_rate": 3.1538461785481444e-05, "loss": 0.2245, "step": 14616 }, { "epoch": 1.7333096169809084, "grad_norm": 0.9659436755430877, "learning_rate": 3.153614494345641e-05, "loss": 0.1975, "step": 14617 }, { "epoch": 1.7334281987430333, "grad_norm": 0.855935387391529, "learning_rate": 3.153382804117614e-05, "loss": 0.1741, "step": 14618 }, { "epoch": 1.7335467805051583, "grad_norm": 1.1605035618214385, "learning_rate": 3.153151107866199e-05, "loss": 0.3194, "step": 14619 }, { "epoch": 1.7336653622672833, "grad_norm": 0.9518832406339551, "learning_rate": 3.152919405593532e-05, "loss": 0.2597, "step": 14620 }, { "epoch": 1.7337839440294083, "grad_norm": 0.6711723714983292, "learning_rate": 3.152687697301748e-05, "loss": 0.1614, "step": 14621 }, { "epoch": 1.7339025257915333, "grad_norm": 0.9607624733729908, "learning_rate": 3.152455982992984e-05, "loss": 0.2748, "step": 14622 }, { "epoch": 1.7340211075536582, "grad_norm": 1.076818075516996, "learning_rate": 3.152224262669377e-05, "loss": 0.217, "step": 14623 }, { "epoch": 1.7341396893157832, "grad_norm": 0.7858193273533192, "learning_rate": 3.1519925363330606e-05, "loss": 0.1582, "step": 14624 }, { "epoch": 1.7342582710779082, "grad_norm": 1.0064975525861173, "learning_rate": 3.151760803986173e-05, "loss": 0.2395, "step": 14625 }, { "epoch": 1.7343768528400332, "grad_norm": 0.9340366254937932, "learning_rate": 3.15152906563085e-05, "loss": 0.1831, "step": 14626 }, { "epoch": 1.7344954346021582, "grad_norm": 0.7819281658446631, "learning_rate": 3.151297321269228e-05, "loss": 0.1342, "step": 14627 }, { "epoch": 1.7346140163642831, "grad_norm": 1.2661173762974, "learning_rate": 3.151065570903443e-05, "loss": 0.2252, "step": 14628 }, { "epoch": 1.7347325981264081, "grad_norm": 0.9826860408305178, "learning_rate": 3.150833814535632e-05, "loss": 0.2036, "step": 14629 }, { "epoch": 1.734851179888533, "grad_norm": 0.7638686625338466, "learning_rate": 3.150602052167931e-05, "loss": 0.1996, "step": 14630 }, { "epoch": 1.734969761650658, "grad_norm": 0.8372998969847668, "learning_rate": 3.150370283802478e-05, "loss": 0.1865, "step": 14631 }, { "epoch": 1.735088343412783, "grad_norm": 0.8884921928459256, "learning_rate": 3.150138509441408e-05, "loss": 0.2263, "step": 14632 }, { "epoch": 1.735206925174908, "grad_norm": 0.9680458205053256, "learning_rate": 3.149906729086858e-05, "loss": 0.1605, "step": 14633 }, { "epoch": 1.7353255069370332, "grad_norm": 1.1690871115166355, "learning_rate": 3.1496749427409654e-05, "loss": 0.2408, "step": 14634 }, { "epoch": 1.735444088699158, "grad_norm": 1.1263368538043, "learning_rate": 3.149443150405866e-05, "loss": 0.2864, "step": 14635 }, { "epoch": 1.7355626704612832, "grad_norm": 0.8298346028091645, "learning_rate": 3.149211352083698e-05, "loss": 0.1391, "step": 14636 }, { "epoch": 1.735681252223408, "grad_norm": 0.9003543713327644, "learning_rate": 3.1489795477765963e-05, "loss": 0.2127, "step": 14637 }, { "epoch": 1.7357998339855332, "grad_norm": 0.8106678442471635, "learning_rate": 3.1487477374867e-05, "loss": 0.1644, "step": 14638 }, { "epoch": 1.735918415747658, "grad_norm": 1.0491964902023354, "learning_rate": 3.148515921216144e-05, "loss": 0.2362, "step": 14639 }, { "epoch": 1.7360369975097831, "grad_norm": 0.803025307255877, "learning_rate": 3.148284098967067e-05, "loss": 0.1733, "step": 14640 }, { "epoch": 1.736155579271908, "grad_norm": 1.2399545162843797, "learning_rate": 3.148052270741606e-05, "loss": 0.3142, "step": 14641 }, { "epoch": 1.736274161034033, "grad_norm": 0.9781859532475387, "learning_rate": 3.1478204365418976e-05, "loss": 0.2073, "step": 14642 }, { "epoch": 1.7363927427961579, "grad_norm": 0.9455098026669628, "learning_rate": 3.147588596370079e-05, "loss": 0.1891, "step": 14643 }, { "epoch": 1.736511324558283, "grad_norm": 1.0463011930881334, "learning_rate": 3.147356750228287e-05, "loss": 0.2418, "step": 14644 }, { "epoch": 1.7366299063204078, "grad_norm": 0.9096292332461895, "learning_rate": 3.147124898118661e-05, "loss": 0.1994, "step": 14645 }, { "epoch": 1.736748488082533, "grad_norm": 1.0838290261818042, "learning_rate": 3.1468930400433364e-05, "loss": 0.2262, "step": 14646 }, { "epoch": 1.7368670698446578, "grad_norm": 1.0711042929792685, "learning_rate": 3.1466611760044505e-05, "loss": 0.2154, "step": 14647 }, { "epoch": 1.736985651606783, "grad_norm": 0.7310156044854647, "learning_rate": 3.146429306004142e-05, "loss": 0.1525, "step": 14648 }, { "epoch": 1.7371042333689077, "grad_norm": 1.1269248614109024, "learning_rate": 3.146197430044549e-05, "loss": 0.2381, "step": 14649 }, { "epoch": 1.737222815131033, "grad_norm": 0.934010888190883, "learning_rate": 3.1459655481278066e-05, "loss": 0.1818, "step": 14650 }, { "epoch": 1.7373413968931577, "grad_norm": 0.9602209243940671, "learning_rate": 3.145733660256055e-05, "loss": 0.155, "step": 14651 }, { "epoch": 1.737459978655283, "grad_norm": 0.9278154860693225, "learning_rate": 3.14550176643143e-05, "loss": 0.1763, "step": 14652 }, { "epoch": 1.7375785604174077, "grad_norm": 0.9770993884313157, "learning_rate": 3.145269866656071e-05, "loss": 0.2057, "step": 14653 }, { "epoch": 1.7376971421795329, "grad_norm": 0.9023934822206634, "learning_rate": 3.145037960932115e-05, "loss": 0.1559, "step": 14654 }, { "epoch": 1.7378157239416576, "grad_norm": 0.682003572272264, "learning_rate": 3.1448060492617e-05, "loss": 0.1823, "step": 14655 }, { "epoch": 1.7379343057037828, "grad_norm": 1.4923543521820113, "learning_rate": 3.1445741316469644e-05, "loss": 0.3213, "step": 14656 }, { "epoch": 1.7380528874659076, "grad_norm": 0.9884594838152617, "learning_rate": 3.144342208090045e-05, "loss": 0.1809, "step": 14657 }, { "epoch": 1.7381714692280328, "grad_norm": 0.800214508745925, "learning_rate": 3.14411027859308e-05, "loss": 0.1595, "step": 14658 }, { "epoch": 1.7382900509901578, "grad_norm": 0.5996948967104088, "learning_rate": 3.143878343158209e-05, "loss": 0.168, "step": 14659 }, { "epoch": 1.7384086327522827, "grad_norm": 0.9062228148534073, "learning_rate": 3.14364640178757e-05, "loss": 0.1924, "step": 14660 }, { "epoch": 1.7385272145144077, "grad_norm": 0.9670159204822516, "learning_rate": 3.143414454483299e-05, "loss": 0.1505, "step": 14661 }, { "epoch": 1.7386457962765327, "grad_norm": 0.7681103738219432, "learning_rate": 3.143182501247537e-05, "loss": 0.1295, "step": 14662 }, { "epoch": 1.7387643780386577, "grad_norm": 0.8287071645455922, "learning_rate": 3.14295054208242e-05, "loss": 0.1675, "step": 14663 }, { "epoch": 1.7388829598007827, "grad_norm": 0.7773036165296797, "learning_rate": 3.142718576990088e-05, "loss": 0.1401, "step": 14664 }, { "epoch": 1.7390015415629076, "grad_norm": 1.8203964931477934, "learning_rate": 3.1424866059726785e-05, "loss": 0.4113, "step": 14665 }, { "epoch": 1.7391201233250326, "grad_norm": 0.8889366335839831, "learning_rate": 3.142254629032331e-05, "loss": 0.2143, "step": 14666 }, { "epoch": 1.7392387050871576, "grad_norm": 0.8328197467821113, "learning_rate": 3.142022646171183e-05, "loss": 0.157, "step": 14667 }, { "epoch": 1.7393572868492826, "grad_norm": 0.8258561661134728, "learning_rate": 3.141790657391374e-05, "loss": 0.1526, "step": 14668 }, { "epoch": 1.7394758686114076, "grad_norm": 0.9240301264708763, "learning_rate": 3.141558662695041e-05, "loss": 0.165, "step": 14669 }, { "epoch": 1.7395944503735326, "grad_norm": 0.9156998262947461, "learning_rate": 3.141326662084324e-05, "loss": 0.2143, "step": 14670 }, { "epoch": 1.7397130321356575, "grad_norm": 0.5399444281783027, "learning_rate": 3.141094655561363e-05, "loss": 0.1053, "step": 14671 }, { "epoch": 1.7398316138977825, "grad_norm": 0.7862811982302823, "learning_rate": 3.140862643128294e-05, "loss": 0.1598, "step": 14672 }, { "epoch": 1.7399501956599075, "grad_norm": 1.154699509546788, "learning_rate": 3.1406306247872585e-05, "loss": 0.2476, "step": 14673 }, { "epoch": 1.7400687774220325, "grad_norm": 0.8827236670093243, "learning_rate": 3.140398600540393e-05, "loss": 0.1817, "step": 14674 }, { "epoch": 1.7401873591841575, "grad_norm": 0.9605166983042341, "learning_rate": 3.1401665703898385e-05, "loss": 0.179, "step": 14675 }, { "epoch": 1.7403059409462824, "grad_norm": 0.9189651617858207, "learning_rate": 3.139934534337734e-05, "loss": 0.2002, "step": 14676 }, { "epoch": 1.7404245227084074, "grad_norm": 0.8032194573203678, "learning_rate": 3.139702492386216e-05, "loss": 0.1679, "step": 14677 }, { "epoch": 1.7405431044705324, "grad_norm": 1.3067340320942569, "learning_rate": 3.1394704445374264e-05, "loss": 0.2741, "step": 14678 }, { "epoch": 1.7406616862326574, "grad_norm": 1.3831913318649027, "learning_rate": 3.139238390793504e-05, "loss": 0.3667, "step": 14679 }, { "epoch": 1.7407802679947824, "grad_norm": 0.9110327401318978, "learning_rate": 3.1390063311565865e-05, "loss": 0.1533, "step": 14680 }, { "epoch": 1.7408988497569073, "grad_norm": 0.9316356382908979, "learning_rate": 3.138774265628815e-05, "loss": 0.1608, "step": 14681 }, { "epoch": 1.7410174315190323, "grad_norm": 0.8554466667344999, "learning_rate": 3.1385421942123275e-05, "loss": 0.2001, "step": 14682 }, { "epoch": 1.7411360132811575, "grad_norm": 1.1070359285081497, "learning_rate": 3.138310116909265e-05, "loss": 0.2224, "step": 14683 }, { "epoch": 1.7412545950432823, "grad_norm": 0.8542681652410032, "learning_rate": 3.138078033721765e-05, "loss": 0.1676, "step": 14684 }, { "epoch": 1.7413731768054075, "grad_norm": 0.9376684157773574, "learning_rate": 3.137845944651968e-05, "loss": 0.1995, "step": 14685 }, { "epoch": 1.7414917585675322, "grad_norm": 0.8309221645578139, "learning_rate": 3.137613849702014e-05, "loss": 0.1598, "step": 14686 }, { "epoch": 1.7416103403296574, "grad_norm": 0.7711950793526482, "learning_rate": 3.137381748874042e-05, "loss": 0.1608, "step": 14687 }, { "epoch": 1.7417289220917822, "grad_norm": 0.9142569069403164, "learning_rate": 3.137149642170192e-05, "loss": 0.1697, "step": 14688 }, { "epoch": 1.7418475038539074, "grad_norm": 0.7734034215765511, "learning_rate": 3.1369175295926027e-05, "loss": 0.1824, "step": 14689 }, { "epoch": 1.7419660856160322, "grad_norm": 1.3452277105230492, "learning_rate": 3.1366854111434164e-05, "loss": 0.2598, "step": 14690 }, { "epoch": 1.7420846673781574, "grad_norm": 0.9950671312883218, "learning_rate": 3.13645328682477e-05, "loss": 0.2288, "step": 14691 }, { "epoch": 1.7422032491402821, "grad_norm": 0.7882612121448725, "learning_rate": 3.136221156638806e-05, "loss": 0.1324, "step": 14692 }, { "epoch": 1.7423218309024073, "grad_norm": 0.8620196054126018, "learning_rate": 3.135989020587663e-05, "loss": 0.195, "step": 14693 }, { "epoch": 1.742440412664532, "grad_norm": 0.9606716670992508, "learning_rate": 3.1357568786734805e-05, "loss": 0.1661, "step": 14694 }, { "epoch": 1.7425589944266573, "grad_norm": 1.1342449205856742, "learning_rate": 3.135524730898399e-05, "loss": 0.2408, "step": 14695 }, { "epoch": 1.742677576188782, "grad_norm": 2.6422525428849837, "learning_rate": 3.135292577264559e-05, "loss": 0.2807, "step": 14696 }, { "epoch": 1.7427961579509073, "grad_norm": 1.616996971615701, "learning_rate": 3.1350604177741e-05, "loss": 0.3264, "step": 14697 }, { "epoch": 1.742914739713032, "grad_norm": 1.8423757275384607, "learning_rate": 3.134828252429163e-05, "loss": 0.4758, "step": 14698 }, { "epoch": 1.7430333214751572, "grad_norm": 1.1188241421723581, "learning_rate": 3.134596081231889e-05, "loss": 0.1735, "step": 14699 }, { "epoch": 1.743151903237282, "grad_norm": 1.0000767243701667, "learning_rate": 3.134363904184416e-05, "loss": 0.2055, "step": 14700 }, { "epoch": 1.7432704849994072, "grad_norm": 1.1967815762690743, "learning_rate": 3.1341317212888874e-05, "loss": 0.3134, "step": 14701 }, { "epoch": 1.743389066761532, "grad_norm": 0.6963059326845474, "learning_rate": 3.133899532547441e-05, "loss": 0.1605, "step": 14702 }, { "epoch": 1.7435076485236571, "grad_norm": 1.6968112237836632, "learning_rate": 3.133667337962218e-05, "loss": 0.4621, "step": 14703 }, { "epoch": 1.743626230285782, "grad_norm": 0.7482893613933279, "learning_rate": 3.133435137535359e-05, "loss": 0.1992, "step": 14704 }, { "epoch": 1.743744812047907, "grad_norm": 0.7230953010387683, "learning_rate": 3.1332029312690056e-05, "loss": 0.1686, "step": 14705 }, { "epoch": 1.7438633938100319, "grad_norm": 0.9106451481290773, "learning_rate": 3.132970719165297e-05, "loss": 0.1929, "step": 14706 }, { "epoch": 1.743981975572157, "grad_norm": 0.7639473137429754, "learning_rate": 3.132738501226375e-05, "loss": 0.1575, "step": 14707 }, { "epoch": 1.744100557334282, "grad_norm": 0.8380643505564083, "learning_rate": 3.13250627745438e-05, "loss": 0.1975, "step": 14708 }, { "epoch": 1.744219139096407, "grad_norm": 0.8443983736718295, "learning_rate": 3.132274047851452e-05, "loss": 0.183, "step": 14709 }, { "epoch": 1.744337720858532, "grad_norm": 0.9331108825718925, "learning_rate": 3.132041812419733e-05, "loss": 0.2171, "step": 14710 }, { "epoch": 1.744456302620657, "grad_norm": 0.638278176435028, "learning_rate": 3.1318095711613643e-05, "loss": 0.154, "step": 14711 }, { "epoch": 1.744574884382782, "grad_norm": 0.7813491811190624, "learning_rate": 3.131577324078485e-05, "loss": 0.1864, "step": 14712 }, { "epoch": 1.744693466144907, "grad_norm": 0.9461946798100636, "learning_rate": 3.131345071173238e-05, "loss": 0.1783, "step": 14713 }, { "epoch": 1.744812047907032, "grad_norm": 0.8909720894461108, "learning_rate": 3.131112812447763e-05, "loss": 0.156, "step": 14714 }, { "epoch": 1.744930629669157, "grad_norm": 0.8328765881558634, "learning_rate": 3.1308805479042026e-05, "loss": 0.1633, "step": 14715 }, { "epoch": 1.7450492114312819, "grad_norm": 0.9608920141694961, "learning_rate": 3.1306482775446974e-05, "loss": 0.1893, "step": 14716 }, { "epoch": 1.7451677931934069, "grad_norm": 1.2340694978707625, "learning_rate": 3.130416001371387e-05, "loss": 0.2641, "step": 14717 }, { "epoch": 1.7452863749555318, "grad_norm": 0.7715223509986534, "learning_rate": 3.1301837193864144e-05, "loss": 0.1799, "step": 14718 }, { "epoch": 1.7454049567176568, "grad_norm": 0.9640005608566047, "learning_rate": 3.129951431591921e-05, "loss": 0.2533, "step": 14719 }, { "epoch": 1.7455235384797818, "grad_norm": 0.905653941381957, "learning_rate": 3.1297191379900495e-05, "loss": 0.2196, "step": 14720 }, { "epoch": 1.7456421202419068, "grad_norm": 0.6405748074877983, "learning_rate": 3.129486838582938e-05, "loss": 0.1378, "step": 14721 }, { "epoch": 1.7457607020040318, "grad_norm": 1.183329449936503, "learning_rate": 3.1292545333727304e-05, "loss": 0.2353, "step": 14722 }, { "epoch": 1.7458792837661568, "grad_norm": 1.0219283291141261, "learning_rate": 3.1290222223615676e-05, "loss": 0.2589, "step": 14723 }, { "epoch": 1.7459978655282817, "grad_norm": 1.0576685873294662, "learning_rate": 3.1287899055515915e-05, "loss": 0.2196, "step": 14724 }, { "epoch": 1.7461164472904067, "grad_norm": 0.7991294323974482, "learning_rate": 3.128557582944942e-05, "loss": 0.1679, "step": 14725 }, { "epoch": 1.7462350290525317, "grad_norm": 0.9288195276321649, "learning_rate": 3.128325254543764e-05, "loss": 0.1965, "step": 14726 }, { "epoch": 1.7463536108146567, "grad_norm": 0.8027063002262543, "learning_rate": 3.128092920350196e-05, "loss": 0.1476, "step": 14727 }, { "epoch": 1.7464721925767817, "grad_norm": 1.3905991500351387, "learning_rate": 3.1278605803663826e-05, "loss": 0.2546, "step": 14728 }, { "epoch": 1.7465907743389066, "grad_norm": 1.375260733284016, "learning_rate": 3.127628234594465e-05, "loss": 0.3339, "step": 14729 }, { "epoch": 1.7467093561010316, "grad_norm": 1.0193104876446104, "learning_rate": 3.1273958830365844e-05, "loss": 0.2334, "step": 14730 }, { "epoch": 1.7468279378631566, "grad_norm": 0.9776589915037509, "learning_rate": 3.127163525694884e-05, "loss": 0.2348, "step": 14731 }, { "epoch": 1.7469465196252816, "grad_norm": 0.9696481908786015, "learning_rate": 3.1269311625715035e-05, "loss": 0.1854, "step": 14732 }, { "epoch": 1.7470651013874066, "grad_norm": 1.0568378729643966, "learning_rate": 3.126698793668588e-05, "loss": 0.2669, "step": 14733 }, { "epoch": 1.7471836831495318, "grad_norm": 1.126053108898335, "learning_rate": 3.1264664189882764e-05, "loss": 0.2103, "step": 14734 }, { "epoch": 1.7473022649116565, "grad_norm": 1.0752732976341552, "learning_rate": 3.1262340385327136e-05, "loss": 0.2102, "step": 14735 }, { "epoch": 1.7474208466737817, "grad_norm": 0.7112875099746112, "learning_rate": 3.1260016523040405e-05, "loss": 0.1934, "step": 14736 }, { "epoch": 1.7475394284359065, "grad_norm": 1.068287474430743, "learning_rate": 3.1257692603044e-05, "loss": 0.1701, "step": 14737 }, { "epoch": 1.7476580101980317, "grad_norm": 0.6446506295830842, "learning_rate": 3.125536862535934e-05, "loss": 0.1267, "step": 14738 }, { "epoch": 1.7477765919601564, "grad_norm": 1.1681980035917268, "learning_rate": 3.1253044590007854e-05, "loss": 0.1785, "step": 14739 }, { "epoch": 1.7478951737222816, "grad_norm": 0.7095332795504519, "learning_rate": 3.1250720497010965e-05, "loss": 0.1563, "step": 14740 }, { "epoch": 1.7480137554844064, "grad_norm": 1.1152570389086014, "learning_rate": 3.1248396346390105e-05, "loss": 0.2397, "step": 14741 }, { "epoch": 1.7481323372465316, "grad_norm": 0.8677553296943814, "learning_rate": 3.124607213816669e-05, "loss": 0.1894, "step": 14742 }, { "epoch": 1.7482509190086564, "grad_norm": 0.8603168201419683, "learning_rate": 3.124374787236214e-05, "loss": 0.1551, "step": 14743 }, { "epoch": 1.7483695007707816, "grad_norm": 0.7571817149681475, "learning_rate": 3.1241423548997905e-05, "loss": 0.1471, "step": 14744 }, { "epoch": 1.7484880825329063, "grad_norm": 0.7195406685536916, "learning_rate": 3.123909916809539e-05, "loss": 0.1635, "step": 14745 }, { "epoch": 1.7486066642950315, "grad_norm": 1.3518617374469146, "learning_rate": 3.1236774729676025e-05, "loss": 0.2909, "step": 14746 }, { "epoch": 1.7487252460571563, "grad_norm": 1.1358711523202842, "learning_rate": 3.1234450233761256e-05, "loss": 0.2231, "step": 14747 }, { "epoch": 1.7488438278192815, "grad_norm": 0.8602899634793333, "learning_rate": 3.12321256803725e-05, "loss": 0.1922, "step": 14748 }, { "epoch": 1.7489624095814063, "grad_norm": 0.7821568922409815, "learning_rate": 3.1229801069531186e-05, "loss": 0.1454, "step": 14749 }, { "epoch": 1.7490809913435315, "grad_norm": 0.6876107848519728, "learning_rate": 3.122747640125875e-05, "loss": 0.1492, "step": 14750 }, { "epoch": 1.7491995731056562, "grad_norm": 0.8074976490003586, "learning_rate": 3.1225151675576606e-05, "loss": 0.1924, "step": 14751 }, { "epoch": 1.7493181548677814, "grad_norm": 1.0429273827022623, "learning_rate": 3.122282689250621e-05, "loss": 0.2313, "step": 14752 }, { "epoch": 1.7494367366299062, "grad_norm": 1.2574876430952961, "learning_rate": 3.1220502052068974e-05, "loss": 0.2791, "step": 14753 }, { "epoch": 1.7495553183920314, "grad_norm": 0.7557862888492397, "learning_rate": 3.121817715428634e-05, "loss": 0.1504, "step": 14754 }, { "epoch": 1.7496739001541561, "grad_norm": 0.7787964771864523, "learning_rate": 3.121585219917973e-05, "loss": 0.1472, "step": 14755 }, { "epoch": 1.7497924819162813, "grad_norm": 0.9327345736794969, "learning_rate": 3.12135271867706e-05, "loss": 0.1806, "step": 14756 }, { "epoch": 1.749911063678406, "grad_norm": 1.1406996299268843, "learning_rate": 3.121120211708036e-05, "loss": 0.2737, "step": 14757 }, { "epoch": 1.7500296454405313, "grad_norm": 0.8233042585643373, "learning_rate": 3.120887699013045e-05, "loss": 0.1744, "step": 14758 }, { "epoch": 1.7501482272026563, "grad_norm": 0.7156943581001208, "learning_rate": 3.1206551805942315e-05, "loss": 0.1177, "step": 14759 }, { "epoch": 1.7502668089647813, "grad_norm": 0.8039082845097842, "learning_rate": 3.1204226564537376e-05, "loss": 0.1337, "step": 14760 }, { "epoch": 1.7503853907269062, "grad_norm": 0.5788806248357289, "learning_rate": 3.1201901265937086e-05, "loss": 0.123, "step": 14761 }, { "epoch": 1.7505039724890312, "grad_norm": 0.9380202183654657, "learning_rate": 3.119957591016286e-05, "loss": 0.2249, "step": 14762 }, { "epoch": 1.7506225542511562, "grad_norm": 0.8686238872008828, "learning_rate": 3.119725049723616e-05, "loss": 0.1584, "step": 14763 }, { "epoch": 1.7507411360132812, "grad_norm": 1.1543270811281086, "learning_rate": 3.1194925027178404e-05, "loss": 0.1993, "step": 14764 }, { "epoch": 1.7508597177754062, "grad_norm": 0.7476332772636242, "learning_rate": 3.119259950001103e-05, "loss": 0.2021, "step": 14765 }, { "epoch": 1.7509782995375311, "grad_norm": 1.2288452085247499, "learning_rate": 3.1190273915755486e-05, "loss": 0.3274, "step": 14766 }, { "epoch": 1.7510968812996561, "grad_norm": 0.791181531632165, "learning_rate": 3.118794827443321e-05, "loss": 0.2077, "step": 14767 }, { "epoch": 1.751215463061781, "grad_norm": 0.9159606029800984, "learning_rate": 3.118562257606563e-05, "loss": 0.1934, "step": 14768 }, { "epoch": 1.751334044823906, "grad_norm": 0.973074791491308, "learning_rate": 3.118329682067421e-05, "loss": 0.1658, "step": 14769 }, { "epoch": 1.751452626586031, "grad_norm": 0.8867746450918721, "learning_rate": 3.118097100828037e-05, "loss": 0.184, "step": 14770 }, { "epoch": 1.751571208348156, "grad_norm": 0.9659115836581564, "learning_rate": 3.1178645138905546e-05, "loss": 0.2077, "step": 14771 }, { "epoch": 1.751689790110281, "grad_norm": 1.1018051489028053, "learning_rate": 3.1176319212571206e-05, "loss": 0.2311, "step": 14772 }, { "epoch": 1.751808371872406, "grad_norm": 0.9148713969804662, "learning_rate": 3.117399322929877e-05, "loss": 0.1677, "step": 14773 }, { "epoch": 1.751926953634531, "grad_norm": 1.1134692663359749, "learning_rate": 3.1171667189109695e-05, "loss": 0.2657, "step": 14774 }, { "epoch": 1.752045535396656, "grad_norm": 1.0058727704719947, "learning_rate": 3.1169341092025405e-05, "loss": 0.1817, "step": 14775 }, { "epoch": 1.752164117158781, "grad_norm": 0.9396905789555027, "learning_rate": 3.116701493806736e-05, "loss": 0.2142, "step": 14776 }, { "epoch": 1.752282698920906, "grad_norm": 0.9316735214847013, "learning_rate": 3.1164688727257e-05, "loss": 0.1357, "step": 14777 }, { "epoch": 1.752401280683031, "grad_norm": 0.8234442536315836, "learning_rate": 3.1162362459615765e-05, "loss": 0.1387, "step": 14778 }, { "epoch": 1.752519862445156, "grad_norm": 0.9204061486823071, "learning_rate": 3.116003613516511e-05, "loss": 0.2236, "step": 14779 }, { "epoch": 1.7526384442072809, "grad_norm": 0.7365520341161451, "learning_rate": 3.1157709753926475e-05, "loss": 0.1365, "step": 14780 }, { "epoch": 1.7527570259694059, "grad_norm": 0.6235580931159266, "learning_rate": 3.115538331592131e-05, "loss": 0.1367, "step": 14781 }, { "epoch": 1.7528756077315308, "grad_norm": 0.8200355547090945, "learning_rate": 3.1153056821171064e-05, "loss": 0.1879, "step": 14782 }, { "epoch": 1.752994189493656, "grad_norm": 1.1105314428172732, "learning_rate": 3.115073026969717e-05, "loss": 0.2443, "step": 14783 }, { "epoch": 1.7531127712557808, "grad_norm": 1.032984283698428, "learning_rate": 3.1148403661521086e-05, "loss": 0.2422, "step": 14784 }, { "epoch": 1.753231353017906, "grad_norm": 0.9740496894024482, "learning_rate": 3.114607699666426e-05, "loss": 0.2253, "step": 14785 }, { "epoch": 1.7533499347800308, "grad_norm": 1.0272721348404499, "learning_rate": 3.1143750275148144e-05, "loss": 0.2141, "step": 14786 }, { "epoch": 1.753468516542156, "grad_norm": 0.9121106254829645, "learning_rate": 3.114142349699418e-05, "loss": 0.1756, "step": 14787 }, { "epoch": 1.7535870983042807, "grad_norm": 0.8818531638663641, "learning_rate": 3.113909666222382e-05, "loss": 0.2013, "step": 14788 }, { "epoch": 1.753705680066406, "grad_norm": 0.8193679282455799, "learning_rate": 3.1136769770858534e-05, "loss": 0.1804, "step": 14789 }, { "epoch": 1.7538242618285307, "grad_norm": 0.6742735831461614, "learning_rate": 3.1134442822919735e-05, "loss": 0.1236, "step": 14790 }, { "epoch": 1.7539428435906559, "grad_norm": 0.7067647421730557, "learning_rate": 3.113211581842891e-05, "loss": 0.1548, "step": 14791 }, { "epoch": 1.7540614253527806, "grad_norm": 0.6760108282135261, "learning_rate": 3.112978875740749e-05, "loss": 0.1412, "step": 14792 }, { "epoch": 1.7541800071149058, "grad_norm": 1.0067637672678211, "learning_rate": 3.112746163987694e-05, "loss": 0.1796, "step": 14793 }, { "epoch": 1.7542985888770306, "grad_norm": 1.0472344816540369, "learning_rate": 3.11251344658587e-05, "loss": 0.2555, "step": 14794 }, { "epoch": 1.7544171706391558, "grad_norm": 1.2206057895173847, "learning_rate": 3.112280723537424e-05, "loss": 0.1968, "step": 14795 }, { "epoch": 1.7545357524012806, "grad_norm": 0.7860388739489768, "learning_rate": 3.1120479948445e-05, "loss": 0.2181, "step": 14796 }, { "epoch": 1.7546543341634058, "grad_norm": 1.2562880033526425, "learning_rate": 3.1118152605092445e-05, "loss": 0.2479, "step": 14797 }, { "epoch": 1.7547729159255305, "grad_norm": 0.714937064824541, "learning_rate": 3.111582520533802e-05, "loss": 0.1731, "step": 14798 }, { "epoch": 1.7548914976876557, "grad_norm": 0.9142893659595972, "learning_rate": 3.111349774920318e-05, "loss": 0.1947, "step": 14799 }, { "epoch": 1.7550100794497805, "grad_norm": 0.9846530101392512, "learning_rate": 3.11111702367094e-05, "loss": 0.1851, "step": 14800 }, { "epoch": 1.7551286612119057, "grad_norm": 1.2490529333031106, "learning_rate": 3.110884266787812e-05, "loss": 0.2408, "step": 14801 }, { "epoch": 1.7552472429740305, "grad_norm": 0.8151878648435398, "learning_rate": 3.1106515042730805e-05, "loss": 0.1827, "step": 14802 }, { "epoch": 1.7553658247361557, "grad_norm": 0.9334809806449261, "learning_rate": 3.1104187361288904e-05, "loss": 0.2064, "step": 14803 }, { "epoch": 1.7554844064982804, "grad_norm": 0.8293816320351532, "learning_rate": 3.110185962357389e-05, "loss": 0.1723, "step": 14804 }, { "epoch": 1.7556029882604056, "grad_norm": 1.2568819103594926, "learning_rate": 3.1099531829607203e-05, "loss": 0.2721, "step": 14805 }, { "epoch": 1.7557215700225304, "grad_norm": 1.1244477694223052, "learning_rate": 3.109720397941032e-05, "loss": 0.2121, "step": 14806 }, { "epoch": 1.7558401517846556, "grad_norm": 0.7259247614922956, "learning_rate": 3.109487607300468e-05, "loss": 0.1249, "step": 14807 }, { "epoch": 1.7559587335467806, "grad_norm": 1.0451422002873032, "learning_rate": 3.109254811041177e-05, "loss": 0.1781, "step": 14808 }, { "epoch": 1.7560773153089055, "grad_norm": 0.8314172859284606, "learning_rate": 3.1090220091653035e-05, "loss": 0.1733, "step": 14809 }, { "epoch": 1.7561958970710305, "grad_norm": 1.1016417046454445, "learning_rate": 3.108789201674994e-05, "loss": 0.2009, "step": 14810 }, { "epoch": 1.7563144788331555, "grad_norm": 0.9438207625630018, "learning_rate": 3.108556388572394e-05, "loss": 0.2224, "step": 14811 }, { "epoch": 1.7564330605952805, "grad_norm": 1.0515292978023176, "learning_rate": 3.1083235698596505e-05, "loss": 0.2187, "step": 14812 }, { "epoch": 1.7565516423574055, "grad_norm": 0.930839341522019, "learning_rate": 3.1080907455389105e-05, "loss": 0.2361, "step": 14813 }, { "epoch": 1.7566702241195304, "grad_norm": 1.2809523209110942, "learning_rate": 3.107857915612318e-05, "loss": 0.2432, "step": 14814 }, { "epoch": 1.7567888058816554, "grad_norm": 0.7698859945847877, "learning_rate": 3.107625080082022e-05, "loss": 0.1254, "step": 14815 }, { "epoch": 1.7569073876437804, "grad_norm": 0.8398453376141921, "learning_rate": 3.107392238950168e-05, "loss": 0.1705, "step": 14816 }, { "epoch": 1.7570259694059054, "grad_norm": 1.3656750425212543, "learning_rate": 3.107159392218902e-05, "loss": 0.2861, "step": 14817 }, { "epoch": 1.7571445511680304, "grad_norm": 1.0553287706207823, "learning_rate": 3.106926539890371e-05, "loss": 0.2001, "step": 14818 }, { "epoch": 1.7572631329301553, "grad_norm": 0.7387132463740662, "learning_rate": 3.1066936819667214e-05, "loss": 0.1679, "step": 14819 }, { "epoch": 1.7573817146922803, "grad_norm": 1.0785606281740425, "learning_rate": 3.1064608184501e-05, "loss": 0.2516, "step": 14820 }, { "epoch": 1.7575002964544053, "grad_norm": 0.6860230116532628, "learning_rate": 3.1062279493426546e-05, "loss": 0.1499, "step": 14821 }, { "epoch": 1.7576188782165303, "grad_norm": 1.2550899718069486, "learning_rate": 3.1059950746465295e-05, "loss": 0.2505, "step": 14822 }, { "epoch": 1.7577374599786553, "grad_norm": 0.6844196692592889, "learning_rate": 3.105762194363874e-05, "loss": 0.1729, "step": 14823 }, { "epoch": 1.7578560417407803, "grad_norm": 0.7690059080033667, "learning_rate": 3.105529308496833e-05, "loss": 0.1652, "step": 14824 }, { "epoch": 1.7579746235029052, "grad_norm": 0.9627255035898441, "learning_rate": 3.105296417047555e-05, "loss": 0.2157, "step": 14825 }, { "epoch": 1.7580932052650302, "grad_norm": 1.1026628267394825, "learning_rate": 3.105063520018186e-05, "loss": 0.2269, "step": 14826 }, { "epoch": 1.7582117870271552, "grad_norm": 0.9586249589782364, "learning_rate": 3.104830617410873e-05, "loss": 0.1996, "step": 14827 }, { "epoch": 1.7583303687892802, "grad_norm": 1.3464576578263285, "learning_rate": 3.104597709227764e-05, "loss": 0.2908, "step": 14828 }, { "epoch": 1.7584489505514052, "grad_norm": 0.758243279189508, "learning_rate": 3.104364795471005e-05, "loss": 0.1541, "step": 14829 }, { "epoch": 1.7585675323135301, "grad_norm": 0.9140795583679873, "learning_rate": 3.104131876142744e-05, "loss": 0.2163, "step": 14830 }, { "epoch": 1.7586861140756551, "grad_norm": 0.752817430527757, "learning_rate": 3.1038989512451276e-05, "loss": 0.1419, "step": 14831 }, { "epoch": 1.75880469583778, "grad_norm": 0.6897713157901155, "learning_rate": 3.1036660207803045e-05, "loss": 0.144, "step": 14832 }, { "epoch": 1.758923277599905, "grad_norm": 1.020275813633126, "learning_rate": 3.10343308475042e-05, "loss": 0.2451, "step": 14833 }, { "epoch": 1.7590418593620303, "grad_norm": 1.1055411503175445, "learning_rate": 3.103200143157623e-05, "loss": 0.2883, "step": 14834 }, { "epoch": 1.759160441124155, "grad_norm": 0.8050548160315656, "learning_rate": 3.102967196004059e-05, "loss": 0.1498, "step": 14835 }, { "epoch": 1.7592790228862802, "grad_norm": 1.2688358731443792, "learning_rate": 3.1027342432918785e-05, "loss": 0.2424, "step": 14836 }, { "epoch": 1.759397604648405, "grad_norm": 0.7735939519645442, "learning_rate": 3.102501285023227e-05, "loss": 0.177, "step": 14837 }, { "epoch": 1.7595161864105302, "grad_norm": 0.7722088482400123, "learning_rate": 3.102268321200252e-05, "loss": 0.2002, "step": 14838 }, { "epoch": 1.759634768172655, "grad_norm": 0.7144675888642609, "learning_rate": 3.1020353518251014e-05, "loss": 0.1572, "step": 14839 }, { "epoch": 1.7597533499347802, "grad_norm": 0.9478802615539146, "learning_rate": 3.101802376899924e-05, "loss": 0.1799, "step": 14840 }, { "epoch": 1.759871931696905, "grad_norm": 0.8745511797730666, "learning_rate": 3.101569396426866e-05, "loss": 0.1675, "step": 14841 }, { "epoch": 1.7599905134590301, "grad_norm": 1.1451252451607612, "learning_rate": 3.1013364104080764e-05, "loss": 0.2262, "step": 14842 }, { "epoch": 1.7601090952211549, "grad_norm": 0.8799421430990599, "learning_rate": 3.101103418845702e-05, "loss": 0.1882, "step": 14843 }, { "epoch": 1.76022767698328, "grad_norm": 0.9975476227910289, "learning_rate": 3.1008704217418915e-05, "loss": 0.1673, "step": 14844 }, { "epoch": 1.7603462587454048, "grad_norm": 0.9194817215568818, "learning_rate": 3.100637419098793e-05, "loss": 0.1644, "step": 14845 }, { "epoch": 1.76046484050753, "grad_norm": 1.0648347752798892, "learning_rate": 3.100404410918553e-05, "loss": 0.2222, "step": 14846 }, { "epoch": 1.7605834222696548, "grad_norm": 0.9026856360847394, "learning_rate": 3.100171397203321e-05, "loss": 0.1811, "step": 14847 }, { "epoch": 1.76070200403178, "grad_norm": 0.736796544618207, "learning_rate": 3.099938377955245e-05, "loss": 0.157, "step": 14848 }, { "epoch": 1.7608205857939048, "grad_norm": 0.9164514304698249, "learning_rate": 3.099705353176473e-05, "loss": 0.1738, "step": 14849 }, { "epoch": 1.76093916755603, "grad_norm": 0.8438788198776679, "learning_rate": 3.099472322869152e-05, "loss": 0.1793, "step": 14850 }, { "epoch": 1.7610577493181547, "grad_norm": 0.818062052804721, "learning_rate": 3.099239287035432e-05, "loss": 0.1471, "step": 14851 }, { "epoch": 1.76117633108028, "grad_norm": 1.0953319062713938, "learning_rate": 3.099006245677461e-05, "loss": 0.2215, "step": 14852 }, { "epoch": 1.7612949128424047, "grad_norm": 0.6900863229846177, "learning_rate": 3.098773198797387e-05, "loss": 0.1516, "step": 14853 }, { "epoch": 1.76141349460453, "grad_norm": 1.3250150747414815, "learning_rate": 3.098540146397358e-05, "loss": 0.2375, "step": 14854 }, { "epoch": 1.7615320763666547, "grad_norm": 0.8426236083124936, "learning_rate": 3.0983070884795224e-05, "loss": 0.162, "step": 14855 }, { "epoch": 1.7616506581287799, "grad_norm": 1.644844554889216, "learning_rate": 3.098074025046029e-05, "loss": 0.3373, "step": 14856 }, { "epoch": 1.7617692398909046, "grad_norm": 1.0056981877051856, "learning_rate": 3.097840956099027e-05, "loss": 0.1874, "step": 14857 }, { "epoch": 1.7618878216530298, "grad_norm": 1.8666892052221433, "learning_rate": 3.097607881640665e-05, "loss": 0.4126, "step": 14858 }, { "epoch": 1.7620064034151548, "grad_norm": 0.8536813796677761, "learning_rate": 3.097374801673091e-05, "loss": 0.1517, "step": 14859 }, { "epoch": 1.7621249851772798, "grad_norm": 0.8759794903410523, "learning_rate": 3.0971417161984534e-05, "loss": 0.1344, "step": 14860 }, { "epoch": 1.7622435669394048, "grad_norm": 0.9307098463954941, "learning_rate": 3.096908625218902e-05, "loss": 0.2269, "step": 14861 }, { "epoch": 1.7623621487015297, "grad_norm": 0.6872345344468768, "learning_rate": 3.096675528736585e-05, "loss": 0.1812, "step": 14862 }, { "epoch": 1.7624807304636547, "grad_norm": 0.9745679827003817, "learning_rate": 3.09644242675365e-05, "loss": 0.1809, "step": 14863 }, { "epoch": 1.7625993122257797, "grad_norm": 1.1339729070058702, "learning_rate": 3.096209319272249e-05, "loss": 0.2187, "step": 14864 }, { "epoch": 1.7627178939879047, "grad_norm": 1.1232845296982497, "learning_rate": 3.095976206294528e-05, "loss": 0.2575, "step": 14865 }, { "epoch": 1.7628364757500297, "grad_norm": 0.7777146269260895, "learning_rate": 3.095743087822637e-05, "loss": 0.1693, "step": 14866 }, { "epoch": 1.7629550575121546, "grad_norm": 0.9068219057166435, "learning_rate": 3.095509963858726e-05, "loss": 0.2083, "step": 14867 }, { "epoch": 1.7630736392742796, "grad_norm": 0.7762841243825908, "learning_rate": 3.095276834404944e-05, "loss": 0.1443, "step": 14868 }, { "epoch": 1.7631922210364046, "grad_norm": 1.1436813341941612, "learning_rate": 3.095043699463439e-05, "loss": 0.2448, "step": 14869 }, { "epoch": 1.7633108027985296, "grad_norm": 0.8748980642054183, "learning_rate": 3.0948105590363604e-05, "loss": 0.1577, "step": 14870 }, { "epoch": 1.7634293845606546, "grad_norm": 0.7238568695665142, "learning_rate": 3.0945774131258584e-05, "loss": 0.145, "step": 14871 }, { "epoch": 1.7635479663227795, "grad_norm": 1.0429400334298955, "learning_rate": 3.0943442617340815e-05, "loss": 0.1963, "step": 14872 }, { "epoch": 1.7636665480849045, "grad_norm": 1.0819674123791656, "learning_rate": 3.094111104863179e-05, "loss": 0.2487, "step": 14873 }, { "epoch": 1.7637851298470295, "grad_norm": 1.027277875988551, "learning_rate": 3.093877942515301e-05, "loss": 0.1963, "step": 14874 }, { "epoch": 1.7639037116091545, "grad_norm": 0.8512638129905388, "learning_rate": 3.0936447746925965e-05, "loss": 0.1716, "step": 14875 }, { "epoch": 1.7640222933712795, "grad_norm": 0.9607419498915598, "learning_rate": 3.093411601397215e-05, "loss": 0.2385, "step": 14876 }, { "epoch": 1.7641408751334045, "grad_norm": 0.6807954200171579, "learning_rate": 3.0931784226313065e-05, "loss": 0.1272, "step": 14877 }, { "epoch": 1.7642594568955294, "grad_norm": 0.9326963816414402, "learning_rate": 3.0929452383970204e-05, "loss": 0.2625, "step": 14878 }, { "epoch": 1.7643780386576544, "grad_norm": 0.7705740664861088, "learning_rate": 3.092712048696507e-05, "loss": 0.157, "step": 14879 }, { "epoch": 1.7644966204197794, "grad_norm": 0.8713794370736185, "learning_rate": 3.092478853531914e-05, "loss": 0.1859, "step": 14880 }, { "epoch": 1.7646152021819044, "grad_norm": 0.7536909456644649, "learning_rate": 3.092245652905393e-05, "loss": 0.2062, "step": 14881 }, { "epoch": 1.7647337839440294, "grad_norm": 0.9061926521508114, "learning_rate": 3.092012446819094e-05, "loss": 0.1515, "step": 14882 }, { "epoch": 1.7648523657061546, "grad_norm": 0.9931740595933588, "learning_rate": 3.091779235275166e-05, "loss": 0.1845, "step": 14883 }, { "epoch": 1.7649709474682793, "grad_norm": 1.0183512887573098, "learning_rate": 3.091546018275758e-05, "loss": 0.1629, "step": 14884 }, { "epoch": 1.7650895292304045, "grad_norm": 1.3535742509181703, "learning_rate": 3.091312795823022e-05, "loss": 0.3306, "step": 14885 }, { "epoch": 1.7652081109925293, "grad_norm": 1.0705997614139597, "learning_rate": 3.091079567919107e-05, "loss": 0.25, "step": 14886 }, { "epoch": 1.7653266927546545, "grad_norm": 1.4562354521449878, "learning_rate": 3.0908463345661626e-05, "loss": 0.3469, "step": 14887 }, { "epoch": 1.7654452745167792, "grad_norm": 1.1274859500662144, "learning_rate": 3.0906130957663406e-05, "loss": 0.184, "step": 14888 }, { "epoch": 1.7655638562789044, "grad_norm": 0.9178547106632408, "learning_rate": 3.0903798515217895e-05, "loss": 0.1945, "step": 14889 }, { "epoch": 1.7656824380410292, "grad_norm": 0.6894766091951534, "learning_rate": 3.090146601834661e-05, "loss": 0.1535, "step": 14890 }, { "epoch": 1.7658010198031544, "grad_norm": 0.7460550147882922, "learning_rate": 3.089913346707104e-05, "loss": 0.2011, "step": 14891 }, { "epoch": 1.7659196015652792, "grad_norm": 1.2387376068597697, "learning_rate": 3.089680086141269e-05, "loss": 0.2803, "step": 14892 }, { "epoch": 1.7660381833274044, "grad_norm": 1.3093663064585344, "learning_rate": 3.089446820139307e-05, "loss": 0.2886, "step": 14893 }, { "epoch": 1.7661567650895291, "grad_norm": 0.8905581770664908, "learning_rate": 3.089213548703368e-05, "loss": 0.182, "step": 14894 }, { "epoch": 1.7662753468516543, "grad_norm": 0.8540266705495438, "learning_rate": 3.088980271835603e-05, "loss": 0.1836, "step": 14895 }, { "epoch": 1.766393928613779, "grad_norm": 0.7212904008625894, "learning_rate": 3.0887469895381616e-05, "loss": 0.1648, "step": 14896 }, { "epoch": 1.7665125103759043, "grad_norm": 0.7934357530574591, "learning_rate": 3.088513701813195e-05, "loss": 0.1855, "step": 14897 }, { "epoch": 1.766631092138029, "grad_norm": 0.7026571016118718, "learning_rate": 3.088280408662854e-05, "loss": 0.1334, "step": 14898 }, { "epoch": 1.7667496739001542, "grad_norm": 1.099803772713782, "learning_rate": 3.088047110089289e-05, "loss": 0.2431, "step": 14899 }, { "epoch": 1.766868255662279, "grad_norm": 0.872946891277008, "learning_rate": 3.08781380609465e-05, "loss": 0.1782, "step": 14900 }, { "epoch": 1.7669868374244042, "grad_norm": 0.7424835877095736, "learning_rate": 3.08758049668109e-05, "loss": 0.17, "step": 14901 }, { "epoch": 1.767105419186529, "grad_norm": 0.7889994749197983, "learning_rate": 3.087347181850757e-05, "loss": 0.1852, "step": 14902 }, { "epoch": 1.7672240009486542, "grad_norm": 0.9506042980693984, "learning_rate": 3.087113861605804e-05, "loss": 0.2179, "step": 14903 }, { "epoch": 1.767342582710779, "grad_norm": 0.7053111609151248, "learning_rate": 3.086880535948381e-05, "loss": 0.1705, "step": 14904 }, { "epoch": 1.7674611644729041, "grad_norm": 1.0261610365139568, "learning_rate": 3.0866472048806394e-05, "loss": 0.209, "step": 14905 }, { "epoch": 1.767579746235029, "grad_norm": 0.8434221740293864, "learning_rate": 3.086413868404729e-05, "loss": 0.1926, "step": 14906 }, { "epoch": 1.767698327997154, "grad_norm": 1.3377230680880616, "learning_rate": 3.0861805265228035e-05, "loss": 0.2924, "step": 14907 }, { "epoch": 1.767816909759279, "grad_norm": 1.722104145074528, "learning_rate": 3.085947179237011e-05, "loss": 0.3928, "step": 14908 }, { "epoch": 1.767935491521404, "grad_norm": 0.8086444676962713, "learning_rate": 3.085713826549505e-05, "loss": 0.2293, "step": 14909 }, { "epoch": 1.768054073283529, "grad_norm": 1.586877750638903, "learning_rate": 3.085480468462435e-05, "loss": 0.3532, "step": 14910 }, { "epoch": 1.768172655045654, "grad_norm": 1.1244217399819385, "learning_rate": 3.0852471049779534e-05, "loss": 0.2448, "step": 14911 }, { "epoch": 1.768291236807779, "grad_norm": 1.0249064296568375, "learning_rate": 3.0850137360982104e-05, "loss": 0.2498, "step": 14912 }, { "epoch": 1.768409818569904, "grad_norm": 0.8252280907053989, "learning_rate": 3.084780361825359e-05, "loss": 0.1725, "step": 14913 }, { "epoch": 1.768528400332029, "grad_norm": 0.8174667627816152, "learning_rate": 3.08454698216155e-05, "loss": 0.17, "step": 14914 }, { "epoch": 1.768646982094154, "grad_norm": 0.743417016603677, "learning_rate": 3.084313597108934e-05, "loss": 0.1907, "step": 14915 }, { "epoch": 1.768765563856279, "grad_norm": 1.4697328983173994, "learning_rate": 3.084080206669663e-05, "loss": 0.2824, "step": 14916 }, { "epoch": 1.768884145618404, "grad_norm": 0.7494188534559302, "learning_rate": 3.083846810845889e-05, "loss": 0.142, "step": 14917 }, { "epoch": 1.7690027273805289, "grad_norm": 1.146947017402319, "learning_rate": 3.083613409639764e-05, "loss": 0.2644, "step": 14918 }, { "epoch": 1.7691213091426539, "grad_norm": 0.8244525448884426, "learning_rate": 3.083380003053438e-05, "loss": 0.1455, "step": 14919 }, { "epoch": 1.7692398909047788, "grad_norm": 0.8250536612431655, "learning_rate": 3.083146591089064e-05, "loss": 0.1432, "step": 14920 }, { "epoch": 1.7693584726669038, "grad_norm": 1.5362425789321994, "learning_rate": 3.082913173748794e-05, "loss": 0.3025, "step": 14921 }, { "epoch": 1.7694770544290288, "grad_norm": 0.6047014533119218, "learning_rate": 3.08267975103478e-05, "loss": 0.1183, "step": 14922 }, { "epoch": 1.7695956361911538, "grad_norm": 0.9864138487203894, "learning_rate": 3.082446322949172e-05, "loss": 0.2414, "step": 14923 }, { "epoch": 1.7697142179532788, "grad_norm": 0.7380453223072034, "learning_rate": 3.082212889494124e-05, "loss": 0.1589, "step": 14924 }, { "epoch": 1.7698327997154037, "grad_norm": 1.133064609588666, "learning_rate": 3.081979450671787e-05, "loss": 0.2509, "step": 14925 }, { "epoch": 1.7699513814775287, "grad_norm": 0.9825869699262936, "learning_rate": 3.0817460064843115e-05, "loss": 0.2453, "step": 14926 }, { "epoch": 1.7700699632396537, "grad_norm": 0.8381221991911467, "learning_rate": 3.0815125569338526e-05, "loss": 0.1471, "step": 14927 }, { "epoch": 1.7701885450017787, "grad_norm": 0.8460066512651663, "learning_rate": 3.081279102022561e-05, "loss": 0.1849, "step": 14928 }, { "epoch": 1.7703071267639037, "grad_norm": 0.6333603294719242, "learning_rate": 3.081045641752589e-05, "loss": 0.1176, "step": 14929 }, { "epoch": 1.7704257085260287, "grad_norm": 0.9039309878310735, "learning_rate": 3.0808121761260885e-05, "loss": 0.1611, "step": 14930 }, { "epoch": 1.7705442902881536, "grad_norm": 0.648047987368884, "learning_rate": 3.0805787051452115e-05, "loss": 0.1185, "step": 14931 }, { "epoch": 1.7706628720502786, "grad_norm": 1.4091414157375028, "learning_rate": 3.0803452288121116e-05, "loss": 0.2722, "step": 14932 }, { "epoch": 1.7707814538124036, "grad_norm": 0.7659554995026467, "learning_rate": 3.08011174712894e-05, "loss": 0.1684, "step": 14933 }, { "epoch": 1.7709000355745288, "grad_norm": 0.773453295120672, "learning_rate": 3.07987826009785e-05, "loss": 0.1639, "step": 14934 }, { "epoch": 1.7710186173366536, "grad_norm": 1.1035646769564353, "learning_rate": 3.0796447677209927e-05, "loss": 0.2582, "step": 14935 }, { "epoch": 1.7711371990987788, "grad_norm": 0.9472112866666685, "learning_rate": 3.079411270000521e-05, "loss": 0.1885, "step": 14936 }, { "epoch": 1.7712557808609035, "grad_norm": 1.0260743711977982, "learning_rate": 3.0791777669385895e-05, "loss": 0.2233, "step": 14937 }, { "epoch": 1.7713743626230287, "grad_norm": 0.8121638761776354, "learning_rate": 3.078944258537349e-05, "loss": 0.1674, "step": 14938 }, { "epoch": 1.7714929443851535, "grad_norm": 1.235920609229254, "learning_rate": 3.078710744798951e-05, "loss": 0.2424, "step": 14939 }, { "epoch": 1.7716115261472787, "grad_norm": 0.5843569087846404, "learning_rate": 3.078477225725551e-05, "loss": 0.1221, "step": 14940 }, { "epoch": 1.7717301079094034, "grad_norm": 0.7104824937842203, "learning_rate": 3.0782437013192985e-05, "loss": 0.1165, "step": 14941 }, { "epoch": 1.7718486896715286, "grad_norm": 0.7050943187254827, "learning_rate": 3.078010171582351e-05, "loss": 0.1623, "step": 14942 }, { "epoch": 1.7719672714336534, "grad_norm": 0.769093795182177, "learning_rate": 3.077776636516856e-05, "loss": 0.1526, "step": 14943 }, { "epoch": 1.7720858531957786, "grad_norm": 1.5062514343798399, "learning_rate": 3.07754309612497e-05, "loss": 0.3496, "step": 14944 }, { "epoch": 1.7722044349579034, "grad_norm": 0.6705266041276721, "learning_rate": 3.077309550408845e-05, "loss": 0.1375, "step": 14945 }, { "epoch": 1.7723230167200286, "grad_norm": 0.75812204408352, "learning_rate": 3.077075999370633e-05, "loss": 0.1544, "step": 14946 }, { "epoch": 1.7724415984821533, "grad_norm": 0.529950134762947, "learning_rate": 3.076842443012489e-05, "loss": 0.0821, "step": 14947 }, { "epoch": 1.7725601802442785, "grad_norm": 0.6327788617322649, "learning_rate": 3.076608881336565e-05, "loss": 0.1239, "step": 14948 }, { "epoch": 1.7726787620064033, "grad_norm": 0.7738752739076232, "learning_rate": 3.076375314345013e-05, "loss": 0.1719, "step": 14949 }, { "epoch": 1.7727973437685285, "grad_norm": 0.8541319348220874, "learning_rate": 3.076141742039989e-05, "loss": 0.1447, "step": 14950 }, { "epoch": 1.7729159255306532, "grad_norm": 1.1103026571327383, "learning_rate": 3.075908164423643e-05, "loss": 0.2498, "step": 14951 }, { "epoch": 1.7730345072927784, "grad_norm": 1.0602905206385898, "learning_rate": 3.0756745814981315e-05, "loss": 0.2753, "step": 14952 }, { "epoch": 1.7731530890549032, "grad_norm": 0.9444171922395216, "learning_rate": 3.0754409932656055e-05, "loss": 0.1999, "step": 14953 }, { "epoch": 1.7732716708170284, "grad_norm": 0.7149022722468383, "learning_rate": 3.075207399728219e-05, "loss": 0.1363, "step": 14954 }, { "epoch": 1.7733902525791532, "grad_norm": 0.7854202989572453, "learning_rate": 3.0749738008881254e-05, "loss": 0.1254, "step": 14955 }, { "epoch": 1.7735088343412784, "grad_norm": 0.939213903478177, "learning_rate": 3.074740196747479e-05, "loss": 0.1941, "step": 14956 }, { "epoch": 1.7736274161034031, "grad_norm": 0.7966194573334988, "learning_rate": 3.0745065873084336e-05, "loss": 0.1822, "step": 14957 }, { "epoch": 1.7737459978655283, "grad_norm": 0.9311032569758605, "learning_rate": 3.0742729725731404e-05, "loss": 0.1686, "step": 14958 }, { "epoch": 1.7738645796276533, "grad_norm": 0.7449887078203834, "learning_rate": 3.074039352543756e-05, "loss": 0.1467, "step": 14959 }, { "epoch": 1.7739831613897783, "grad_norm": 1.1204953385657457, "learning_rate": 3.073805727222432e-05, "loss": 0.2418, "step": 14960 }, { "epoch": 1.7741017431519033, "grad_norm": 0.8153322633095746, "learning_rate": 3.073572096611323e-05, "loss": 0.2334, "step": 14961 }, { "epoch": 1.7742203249140283, "grad_norm": 0.926695429131681, "learning_rate": 3.073338460712582e-05, "loss": 0.1579, "step": 14962 }, { "epoch": 1.7743389066761532, "grad_norm": 0.793108352436663, "learning_rate": 3.073104819528365e-05, "loss": 0.1728, "step": 14963 }, { "epoch": 1.7744574884382782, "grad_norm": 1.0332402582618052, "learning_rate": 3.072871173060823e-05, "loss": 0.1847, "step": 14964 }, { "epoch": 1.7745760702004032, "grad_norm": 0.7789862081011926, "learning_rate": 3.0726375213121116e-05, "loss": 0.1526, "step": 14965 }, { "epoch": 1.7746946519625282, "grad_norm": 0.7982639673737543, "learning_rate": 3.072403864284384e-05, "loss": 0.205, "step": 14966 }, { "epoch": 1.7748132337246532, "grad_norm": 0.9861523152445598, "learning_rate": 3.072170201979795e-05, "loss": 0.2242, "step": 14967 }, { "epoch": 1.7749318154867781, "grad_norm": 0.7284610270076973, "learning_rate": 3.071936534400499e-05, "loss": 0.1317, "step": 14968 }, { "epoch": 1.7750503972489031, "grad_norm": 0.9728520346767124, "learning_rate": 3.071702861548649e-05, "loss": 0.1993, "step": 14969 }, { "epoch": 1.775168979011028, "grad_norm": 0.8971094188345041, "learning_rate": 3.0714691834263995e-05, "loss": 0.1976, "step": 14970 }, { "epoch": 1.775287560773153, "grad_norm": 0.8033992267568183, "learning_rate": 3.071235500035906e-05, "loss": 0.1952, "step": 14971 }, { "epoch": 1.775406142535278, "grad_norm": 0.9105917140746532, "learning_rate": 3.07100181137932e-05, "loss": 0.1681, "step": 14972 }, { "epoch": 1.775524724297403, "grad_norm": 0.8952590319991965, "learning_rate": 3.070768117458799e-05, "loss": 0.1764, "step": 14973 }, { "epoch": 1.775643306059528, "grad_norm": 0.9202958717461408, "learning_rate": 3.0705344182764956e-05, "loss": 0.2099, "step": 14974 }, { "epoch": 1.775761887821653, "grad_norm": 0.9213130553559689, "learning_rate": 3.070300713834564e-05, "loss": 0.2051, "step": 14975 }, { "epoch": 1.775880469583778, "grad_norm": 1.0180509796176664, "learning_rate": 3.0700670041351586e-05, "loss": 0.2582, "step": 14976 }, { "epoch": 1.775999051345903, "grad_norm": 0.551568828264305, "learning_rate": 3.0698332891804356e-05, "loss": 0.1147, "step": 14977 }, { "epoch": 1.776117633108028, "grad_norm": 0.8904363811469934, "learning_rate": 3.069599568972548e-05, "loss": 0.2116, "step": 14978 }, { "epoch": 1.776236214870153, "grad_norm": 0.8851490267346912, "learning_rate": 3.0693658435136504e-05, "loss": 0.1751, "step": 14979 }, { "epoch": 1.776354796632278, "grad_norm": 1.1680443817644814, "learning_rate": 3.0691321128058994e-05, "loss": 0.2197, "step": 14980 }, { "epoch": 1.776473378394403, "grad_norm": 0.9532568099802776, "learning_rate": 3.068898376851447e-05, "loss": 0.2169, "step": 14981 }, { "epoch": 1.7765919601565279, "grad_norm": 0.8198402535109108, "learning_rate": 3.068664635652449e-05, "loss": 0.1888, "step": 14982 }, { "epoch": 1.776710541918653, "grad_norm": 0.9429668379177729, "learning_rate": 3.068430889211061e-05, "loss": 0.188, "step": 14983 }, { "epoch": 1.7768291236807778, "grad_norm": 0.7535248557971345, "learning_rate": 3.068197137529437e-05, "loss": 0.1439, "step": 14984 }, { "epoch": 1.776947705442903, "grad_norm": 1.0537731571207458, "learning_rate": 3.067963380609732e-05, "loss": 0.1749, "step": 14985 }, { "epoch": 1.7770662872050278, "grad_norm": 0.9216070360814999, "learning_rate": 3.0677296184541016e-05, "loss": 0.187, "step": 14986 }, { "epoch": 1.777184868967153, "grad_norm": 0.7119286720533268, "learning_rate": 3.0674958510647004e-05, "loss": 0.1601, "step": 14987 }, { "epoch": 1.7773034507292778, "grad_norm": 0.9395134727814879, "learning_rate": 3.067262078443683e-05, "loss": 0.1757, "step": 14988 }, { "epoch": 1.777422032491403, "grad_norm": 0.7366965566647228, "learning_rate": 3.067028300593205e-05, "loss": 0.1326, "step": 14989 }, { "epoch": 1.7775406142535277, "grad_norm": 1.0205203795746545, "learning_rate": 3.066794517515422e-05, "loss": 0.1959, "step": 14990 }, { "epoch": 1.777659196015653, "grad_norm": 0.6816006946145708, "learning_rate": 3.066560729212488e-05, "loss": 0.1276, "step": 14991 }, { "epoch": 1.7777777777777777, "grad_norm": 0.6798234913047136, "learning_rate": 3.066326935686558e-05, "loss": 0.1377, "step": 14992 }, { "epoch": 1.7778963595399029, "grad_norm": 1.2113379719927337, "learning_rate": 3.066093136939789e-05, "loss": 0.2466, "step": 14993 }, { "epoch": 1.7780149413020276, "grad_norm": 0.7218613853612004, "learning_rate": 3.065859332974336e-05, "loss": 0.1771, "step": 14994 }, { "epoch": 1.7781335230641528, "grad_norm": 0.8149034277011729, "learning_rate": 3.065625523792353e-05, "loss": 0.1707, "step": 14995 }, { "epoch": 1.7782521048262776, "grad_norm": 1.584067850863626, "learning_rate": 3.065391709395996e-05, "loss": 0.2827, "step": 14996 }, { "epoch": 1.7783706865884028, "grad_norm": 0.9126565251704829, "learning_rate": 3.065157889787422e-05, "loss": 0.1604, "step": 14997 }, { "epoch": 1.7784892683505276, "grad_norm": 1.0352062493238612, "learning_rate": 3.0649240649687844e-05, "loss": 0.2165, "step": 14998 }, { "epoch": 1.7786078501126528, "grad_norm": 0.8600052620672848, "learning_rate": 3.0646902349422394e-05, "loss": 0.1965, "step": 14999 }, { "epoch": 1.7787264318747775, "grad_norm": 0.7377764338695952, "learning_rate": 3.0644563997099444e-05, "loss": 0.1608, "step": 15000 }, { "epoch": 1.7788450136369027, "grad_norm": 1.176408847205642, "learning_rate": 3.0642225592740515e-05, "loss": 0.2198, "step": 15001 }, { "epoch": 1.7789635953990275, "grad_norm": 1.0422527805153636, "learning_rate": 3.0639887136367204e-05, "loss": 0.1901, "step": 15002 }, { "epoch": 1.7790821771611527, "grad_norm": 1.3167774756997066, "learning_rate": 3.063754862800103e-05, "loss": 0.3092, "step": 15003 }, { "epoch": 1.7792007589232774, "grad_norm": 0.8650113684213617, "learning_rate": 3.063521006766358e-05, "loss": 0.1668, "step": 15004 }, { "epoch": 1.7793193406854027, "grad_norm": 1.045663671482237, "learning_rate": 3.063287145537641e-05, "loss": 0.1797, "step": 15005 }, { "epoch": 1.7794379224475274, "grad_norm": 0.8360143987195683, "learning_rate": 3.0630532791161074e-05, "loss": 0.1275, "step": 15006 }, { "epoch": 1.7795565042096526, "grad_norm": 0.7902629330071124, "learning_rate": 3.062819407503912e-05, "loss": 0.1997, "step": 15007 }, { "epoch": 1.7796750859717776, "grad_norm": 0.954664613654714, "learning_rate": 3.062585530703213e-05, "loss": 0.2607, "step": 15008 }, { "epoch": 1.7797936677339026, "grad_norm": 0.9263305888728587, "learning_rate": 3.062351648716164e-05, "loss": 0.1859, "step": 15009 }, { "epoch": 1.7799122494960276, "grad_norm": 0.817246767281114, "learning_rate": 3.0621177615449235e-05, "loss": 0.187, "step": 15010 }, { "epoch": 1.7800308312581525, "grad_norm": 1.2079901303750278, "learning_rate": 3.061883869191646e-05, "loss": 0.2487, "step": 15011 }, { "epoch": 1.7801494130202775, "grad_norm": 0.9699270278591883, "learning_rate": 3.061649971658488e-05, "loss": 0.1738, "step": 15012 }, { "epoch": 1.7802679947824025, "grad_norm": 0.7507735242219331, "learning_rate": 3.0614160689476064e-05, "loss": 0.1377, "step": 15013 }, { "epoch": 1.7803865765445275, "grad_norm": 1.1141087734442858, "learning_rate": 3.0611821610611565e-05, "loss": 0.2101, "step": 15014 }, { "epoch": 1.7805051583066525, "grad_norm": 0.6532339637609492, "learning_rate": 3.060948248001296e-05, "loss": 0.1807, "step": 15015 }, { "epoch": 1.7806237400687774, "grad_norm": 0.5643074455594136, "learning_rate": 3.0607143297701796e-05, "loss": 0.1427, "step": 15016 }, { "epoch": 1.7807423218309024, "grad_norm": 0.7555458571314678, "learning_rate": 3.060480406369966e-05, "loss": 0.1542, "step": 15017 }, { "epoch": 1.7808609035930274, "grad_norm": 1.2327339806365896, "learning_rate": 3.060246477802809e-05, "loss": 0.2766, "step": 15018 }, { "epoch": 1.7809794853551524, "grad_norm": 1.0940902733850615, "learning_rate": 3.0600125440708675e-05, "loss": 0.2074, "step": 15019 }, { "epoch": 1.7810980671172774, "grad_norm": 1.2063116015688733, "learning_rate": 3.0597786051762965e-05, "loss": 0.2749, "step": 15020 }, { "epoch": 1.7812166488794023, "grad_norm": 1.062567650995576, "learning_rate": 3.059544661121254e-05, "loss": 0.184, "step": 15021 }, { "epoch": 1.7813352306415273, "grad_norm": 0.9317571605688232, "learning_rate": 3.059310711907896e-05, "loss": 0.2265, "step": 15022 }, { "epoch": 1.7814538124036523, "grad_norm": 0.7919411203336657, "learning_rate": 3.0590767575383784e-05, "loss": 0.1988, "step": 15023 }, { "epoch": 1.7815723941657773, "grad_norm": 1.0198363396393977, "learning_rate": 3.058842798014858e-05, "loss": 0.2363, "step": 15024 }, { "epoch": 1.7816909759279023, "grad_norm": 1.480774275320996, "learning_rate": 3.058608833339493e-05, "loss": 0.2577, "step": 15025 }, { "epoch": 1.7818095576900272, "grad_norm": 0.8630360784642233, "learning_rate": 3.058374863514439e-05, "loss": 0.1718, "step": 15026 }, { "epoch": 1.7819281394521522, "grad_norm": 0.8706196892826681, "learning_rate": 3.058140888541854e-05, "loss": 0.1833, "step": 15027 }, { "epoch": 1.7820467212142772, "grad_norm": 0.9402048825943531, "learning_rate": 3.057906908423895e-05, "loss": 0.202, "step": 15028 }, { "epoch": 1.7821653029764022, "grad_norm": 0.8326133698185489, "learning_rate": 3.0576729231627184e-05, "loss": 0.2166, "step": 15029 }, { "epoch": 1.7822838847385272, "grad_norm": 0.7161868396325797, "learning_rate": 3.0574389327604805e-05, "loss": 0.1385, "step": 15030 }, { "epoch": 1.7824024665006521, "grad_norm": 1.1933798182262316, "learning_rate": 3.05720493721934e-05, "loss": 0.204, "step": 15031 }, { "epoch": 1.7825210482627771, "grad_norm": 1.042768934772802, "learning_rate": 3.056970936541453e-05, "loss": 0.2543, "step": 15032 }, { "epoch": 1.782639630024902, "grad_norm": 0.9144502665515345, "learning_rate": 3.056736930728977e-05, "loss": 0.164, "step": 15033 }, { "epoch": 1.7827582117870273, "grad_norm": 0.8355971731378653, "learning_rate": 3.056502919784069e-05, "loss": 0.195, "step": 15034 }, { "epoch": 1.782876793549152, "grad_norm": 0.7211392836998718, "learning_rate": 3.056268903708886e-05, "loss": 0.1276, "step": 15035 }, { "epoch": 1.7829953753112773, "grad_norm": 0.869305008338028, "learning_rate": 3.056034882505587e-05, "loss": 0.2169, "step": 15036 }, { "epoch": 1.783113957073402, "grad_norm": 1.1494142636927818, "learning_rate": 3.055800856176327e-05, "loss": 0.2094, "step": 15037 }, { "epoch": 1.7832325388355272, "grad_norm": 0.7256864512178909, "learning_rate": 3.055566824723266e-05, "loss": 0.1495, "step": 15038 }, { "epoch": 1.783351120597652, "grad_norm": 0.9934954311585353, "learning_rate": 3.05533278814856e-05, "loss": 0.1864, "step": 15039 }, { "epoch": 1.7834697023597772, "grad_norm": 0.9194564824181981, "learning_rate": 3.055098746454366e-05, "loss": 0.2458, "step": 15040 }, { "epoch": 1.783588284121902, "grad_norm": 0.831704591642692, "learning_rate": 3.054864699642842e-05, "loss": 0.1453, "step": 15041 }, { "epoch": 1.7837068658840272, "grad_norm": 0.9378154091020408, "learning_rate": 3.054630647716146e-05, "loss": 0.2276, "step": 15042 }, { "epoch": 1.783825447646152, "grad_norm": 0.8962476704165832, "learning_rate": 3.0543965906764364e-05, "loss": 0.1842, "step": 15043 }, { "epoch": 1.7839440294082771, "grad_norm": 0.9313003693192361, "learning_rate": 3.0541625285258695e-05, "loss": 0.194, "step": 15044 }, { "epoch": 1.7840626111704019, "grad_norm": 0.7060279050752727, "learning_rate": 3.0539284612666036e-05, "loss": 0.1588, "step": 15045 }, { "epoch": 1.784181192932527, "grad_norm": 0.6942513710876012, "learning_rate": 3.053694388900796e-05, "loss": 0.1285, "step": 15046 }, { "epoch": 1.7842997746946518, "grad_norm": 0.8277999247796552, "learning_rate": 3.0534603114306065e-05, "loss": 0.154, "step": 15047 }, { "epoch": 1.784418356456777, "grad_norm": 0.8362993102099181, "learning_rate": 3.053226228858191e-05, "loss": 0.2005, "step": 15048 }, { "epoch": 1.7845369382189018, "grad_norm": 0.7007602192444733, "learning_rate": 3.0529921411857083e-05, "loss": 0.1432, "step": 15049 }, { "epoch": 1.784655519981027, "grad_norm": 0.7728600934580757, "learning_rate": 3.052758048415316e-05, "loss": 0.1928, "step": 15050 }, { "epoch": 1.7847741017431518, "grad_norm": 1.0406956162897327, "learning_rate": 3.052523950549172e-05, "loss": 0.1924, "step": 15051 }, { "epoch": 1.784892683505277, "grad_norm": 0.6120556733696315, "learning_rate": 3.052289847589435e-05, "loss": 0.1499, "step": 15052 }, { "epoch": 1.7850112652674017, "grad_norm": 0.906324946952052, "learning_rate": 3.052055739538263e-05, "loss": 0.1948, "step": 15053 }, { "epoch": 1.785129847029527, "grad_norm": 1.2520699830206519, "learning_rate": 3.0518216263978136e-05, "loss": 0.2368, "step": 15054 }, { "epoch": 1.7852484287916517, "grad_norm": 1.0189197408386639, "learning_rate": 3.0515875081702455e-05, "loss": 0.2049, "step": 15055 }, { "epoch": 1.785367010553777, "grad_norm": 0.9821141041547082, "learning_rate": 3.051353384857717e-05, "loss": 0.2048, "step": 15056 }, { "epoch": 1.7854855923159016, "grad_norm": 0.6675484179948086, "learning_rate": 3.051119256462387e-05, "loss": 0.1505, "step": 15057 }, { "epoch": 1.7856041740780269, "grad_norm": 1.0225506906417692, "learning_rate": 3.0508851229864126e-05, "loss": 0.2176, "step": 15058 }, { "epoch": 1.7857227558401518, "grad_norm": 0.8316844997245436, "learning_rate": 3.0506509844319536e-05, "loss": 0.184, "step": 15059 }, { "epoch": 1.7858413376022768, "grad_norm": 1.1294858162788888, "learning_rate": 3.0504168408011675e-05, "loss": 0.2417, "step": 15060 }, { "epoch": 1.7859599193644018, "grad_norm": 0.8891842119211248, "learning_rate": 3.050182692096213e-05, "loss": 0.1762, "step": 15061 }, { "epoch": 1.7860785011265268, "grad_norm": 1.1480686747320858, "learning_rate": 3.0499485383192488e-05, "loss": 0.2262, "step": 15062 }, { "epoch": 1.7861970828886518, "grad_norm": 0.7085558198366173, "learning_rate": 3.0497143794724337e-05, "loss": 0.1709, "step": 15063 }, { "epoch": 1.7863156646507767, "grad_norm": 1.1634498230188466, "learning_rate": 3.0494802155579255e-05, "loss": 0.1955, "step": 15064 }, { "epoch": 1.7864342464129017, "grad_norm": 0.8670576851963812, "learning_rate": 3.0492460465778837e-05, "loss": 0.1618, "step": 15065 }, { "epoch": 1.7865528281750267, "grad_norm": 0.8514071422251481, "learning_rate": 3.0490118725344674e-05, "loss": 0.1645, "step": 15066 }, { "epoch": 1.7866714099371517, "grad_norm": 1.5799228254459239, "learning_rate": 3.048777693429834e-05, "loss": 0.3154, "step": 15067 }, { "epoch": 1.7867899916992767, "grad_norm": 0.7002940213229736, "learning_rate": 3.048543509266144e-05, "loss": 0.1402, "step": 15068 }, { "epoch": 1.7869085734614016, "grad_norm": 0.808569163294969, "learning_rate": 3.0483093200455555e-05, "loss": 0.1812, "step": 15069 }, { "epoch": 1.7870271552235266, "grad_norm": 0.9085508765663662, "learning_rate": 3.0480751257702267e-05, "loss": 0.2018, "step": 15070 }, { "epoch": 1.7871457369856516, "grad_norm": 1.1590469291362477, "learning_rate": 3.0478409264423185e-05, "loss": 0.2492, "step": 15071 }, { "epoch": 1.7872643187477766, "grad_norm": 0.8165589159503883, "learning_rate": 3.047606722063988e-05, "loss": 0.1915, "step": 15072 }, { "epoch": 1.7873829005099016, "grad_norm": 0.7786564104557779, "learning_rate": 3.0473725126373952e-05, "loss": 0.1444, "step": 15073 }, { "epoch": 1.7875014822720265, "grad_norm": 1.2685882888561644, "learning_rate": 3.047138298164699e-05, "loss": 0.2116, "step": 15074 }, { "epoch": 1.7876200640341515, "grad_norm": 0.9161617640014321, "learning_rate": 3.0469040786480587e-05, "loss": 0.2539, "step": 15075 }, { "epoch": 1.7877386457962765, "grad_norm": 0.7121051470008238, "learning_rate": 3.0466698540896333e-05, "loss": 0.1494, "step": 15076 }, { "epoch": 1.7878572275584015, "grad_norm": 1.3318210647411703, "learning_rate": 3.0464356244915825e-05, "loss": 0.219, "step": 15077 }, { "epoch": 1.7879758093205265, "grad_norm": 0.9342947771225024, "learning_rate": 3.0462013898560648e-05, "loss": 0.2032, "step": 15078 }, { "epoch": 1.7880943910826514, "grad_norm": 0.96882676450013, "learning_rate": 3.045967150185241e-05, "loss": 0.1979, "step": 15079 }, { "epoch": 1.7882129728447764, "grad_norm": 0.7143986377637299, "learning_rate": 3.0457329054812688e-05, "loss": 0.149, "step": 15080 }, { "epoch": 1.7883315546069014, "grad_norm": 1.0163838122433735, "learning_rate": 3.045498655746309e-05, "loss": 0.2226, "step": 15081 }, { "epoch": 1.7884501363690264, "grad_norm": 0.7641621925583265, "learning_rate": 3.04526440098252e-05, "loss": 0.163, "step": 15082 }, { "epoch": 1.7885687181311516, "grad_norm": 0.9102147617446654, "learning_rate": 3.0450301411920618e-05, "loss": 0.1707, "step": 15083 }, { "epoch": 1.7886872998932764, "grad_norm": 1.130981273940528, "learning_rate": 3.0447958763770946e-05, "loss": 0.2366, "step": 15084 }, { "epoch": 1.7888058816554016, "grad_norm": 0.9359421468752543, "learning_rate": 3.0445616065397763e-05, "loss": 0.2055, "step": 15085 }, { "epoch": 1.7889244634175263, "grad_norm": 0.873492877673776, "learning_rate": 3.0443273316822696e-05, "loss": 0.2034, "step": 15086 }, { "epoch": 1.7890430451796515, "grad_norm": 0.7094584358565091, "learning_rate": 3.0440930518067313e-05, "loss": 0.1612, "step": 15087 }, { "epoch": 1.7891616269417763, "grad_norm": 0.7015105064174699, "learning_rate": 3.0438587669153228e-05, "loss": 0.156, "step": 15088 }, { "epoch": 1.7892802087039015, "grad_norm": 1.3203278050531417, "learning_rate": 3.043624477010203e-05, "loss": 0.2429, "step": 15089 }, { "epoch": 1.7893987904660262, "grad_norm": 1.002352749516082, "learning_rate": 3.0433901820935323e-05, "loss": 0.2344, "step": 15090 }, { "epoch": 1.7895173722281514, "grad_norm": 1.0472292542025867, "learning_rate": 3.04315588216747e-05, "loss": 0.204, "step": 15091 }, { "epoch": 1.7896359539902762, "grad_norm": 0.8671128480233045, "learning_rate": 3.0429215772341773e-05, "loss": 0.1446, "step": 15092 }, { "epoch": 1.7897545357524014, "grad_norm": 0.949239721749239, "learning_rate": 3.0426872672958127e-05, "loss": 0.2235, "step": 15093 }, { "epoch": 1.7898731175145262, "grad_norm": 0.7749236962079176, "learning_rate": 3.0424529523545375e-05, "loss": 0.1497, "step": 15094 }, { "epoch": 1.7899916992766514, "grad_norm": 1.3278212978568515, "learning_rate": 3.0422186324125106e-05, "loss": 0.2652, "step": 15095 }, { "epoch": 1.7901102810387761, "grad_norm": 0.787656950762092, "learning_rate": 3.041984307471894e-05, "loss": 0.1946, "step": 15096 }, { "epoch": 1.7902288628009013, "grad_norm": 0.8533510174075591, "learning_rate": 3.0417499775348456e-05, "loss": 0.2073, "step": 15097 }, { "epoch": 1.790347444563026, "grad_norm": 0.8411985974456255, "learning_rate": 3.0415156426035275e-05, "loss": 0.1779, "step": 15098 }, { "epoch": 1.7904660263251513, "grad_norm": 0.9947739802413572, "learning_rate": 3.041281302680099e-05, "loss": 0.2616, "step": 15099 }, { "epoch": 1.790584608087276, "grad_norm": 1.0104347502952047, "learning_rate": 3.04104695776672e-05, "loss": 0.198, "step": 15100 }, { "epoch": 1.7907031898494012, "grad_norm": 0.8749769007912434, "learning_rate": 3.040812607865553e-05, "loss": 0.1507, "step": 15101 }, { "epoch": 1.790821771611526, "grad_norm": 1.0426841810320806, "learning_rate": 3.040578252978756e-05, "loss": 0.189, "step": 15102 }, { "epoch": 1.7909403533736512, "grad_norm": 0.8119972545994057, "learning_rate": 3.0403438931084906e-05, "loss": 0.2008, "step": 15103 }, { "epoch": 1.791058935135776, "grad_norm": 0.9879411518619514, "learning_rate": 3.0401095282569164e-05, "loss": 0.1612, "step": 15104 }, { "epoch": 1.7911775168979012, "grad_norm": 1.175734808141919, "learning_rate": 3.039875158426195e-05, "loss": 0.3046, "step": 15105 }, { "epoch": 1.791296098660026, "grad_norm": 0.8854419016125191, "learning_rate": 3.039640783618487e-05, "loss": 0.1798, "step": 15106 }, { "epoch": 1.7914146804221511, "grad_norm": 1.4170225891725052, "learning_rate": 3.0394064038359526e-05, "loss": 0.2838, "step": 15107 }, { "epoch": 1.791533262184276, "grad_norm": 1.0318708646502612, "learning_rate": 3.0391720190807528e-05, "loss": 0.1907, "step": 15108 }, { "epoch": 1.791651843946401, "grad_norm": 0.7220823211156863, "learning_rate": 3.038937629355048e-05, "loss": 0.19, "step": 15109 }, { "epoch": 1.791770425708526, "grad_norm": 0.9113796520628461, "learning_rate": 3.0387032346609995e-05, "loss": 0.1505, "step": 15110 }, { "epoch": 1.791889007470651, "grad_norm": 0.8636816079809962, "learning_rate": 3.0384688350007673e-05, "loss": 0.1737, "step": 15111 }, { "epoch": 1.792007589232776, "grad_norm": 0.684387995312786, "learning_rate": 3.038234430376513e-05, "loss": 0.1551, "step": 15112 }, { "epoch": 1.792126170994901, "grad_norm": 0.995834711040808, "learning_rate": 3.038000020790397e-05, "loss": 0.1999, "step": 15113 }, { "epoch": 1.792244752757026, "grad_norm": 0.9854973981995784, "learning_rate": 3.03776560624458e-05, "loss": 0.2123, "step": 15114 }, { "epoch": 1.792363334519151, "grad_norm": 1.2093926812717746, "learning_rate": 3.037531186741225e-05, "loss": 0.2837, "step": 15115 }, { "epoch": 1.792481916281276, "grad_norm": 0.7347787174386884, "learning_rate": 3.037296762282491e-05, "loss": 0.1628, "step": 15116 }, { "epoch": 1.792600498043401, "grad_norm": 0.7990073752812888, "learning_rate": 3.0370623328705396e-05, "loss": 0.1594, "step": 15117 }, { "epoch": 1.792719079805526, "grad_norm": 0.7580084617810435, "learning_rate": 3.036827898507532e-05, "loss": 0.1729, "step": 15118 }, { "epoch": 1.792837661567651, "grad_norm": 0.8370990284197574, "learning_rate": 3.0365934591956297e-05, "loss": 0.1524, "step": 15119 }, { "epoch": 1.7929562433297759, "grad_norm": 0.8575889348462968, "learning_rate": 3.0363590149369935e-05, "loss": 0.1648, "step": 15120 }, { "epoch": 1.7930748250919009, "grad_norm": 0.7589098387048121, "learning_rate": 3.036124565733785e-05, "loss": 0.1481, "step": 15121 }, { "epoch": 1.7931934068540258, "grad_norm": 0.7493453031170967, "learning_rate": 3.035890111588166e-05, "loss": 0.1805, "step": 15122 }, { "epoch": 1.7933119886161508, "grad_norm": 1.1922993449938477, "learning_rate": 3.0356556525022967e-05, "loss": 0.2409, "step": 15123 }, { "epoch": 1.7934305703782758, "grad_norm": 0.9332618609896659, "learning_rate": 3.035421188478339e-05, "loss": 0.1698, "step": 15124 }, { "epoch": 1.7935491521404008, "grad_norm": 0.9296148605729271, "learning_rate": 3.0351867195184546e-05, "loss": 0.1838, "step": 15125 }, { "epoch": 1.7936677339025258, "grad_norm": 0.9865476139977759, "learning_rate": 3.0349522456248054e-05, "loss": 0.1866, "step": 15126 }, { "epoch": 1.7937863156646507, "grad_norm": 0.6661975462218911, "learning_rate": 3.034717766799552e-05, "loss": 0.1232, "step": 15127 }, { "epoch": 1.7939048974267757, "grad_norm": 0.6364870741996531, "learning_rate": 3.0344832830448566e-05, "loss": 0.1378, "step": 15128 }, { "epoch": 1.7940234791889007, "grad_norm": 0.8690302360288122, "learning_rate": 3.0342487943628812e-05, "loss": 0.1837, "step": 15129 }, { "epoch": 1.7941420609510257, "grad_norm": 1.0647984479267858, "learning_rate": 3.0340143007557864e-05, "loss": 0.2676, "step": 15130 }, { "epoch": 1.7942606427131507, "grad_norm": 0.7935735954744033, "learning_rate": 3.0337798022257353e-05, "loss": 0.1464, "step": 15131 }, { "epoch": 1.7943792244752759, "grad_norm": 0.9625630148857761, "learning_rate": 3.0335452987748887e-05, "loss": 0.2092, "step": 15132 }, { "epoch": 1.7944978062374006, "grad_norm": 0.8680981898868116, "learning_rate": 3.0333107904054082e-05, "loss": 0.1884, "step": 15133 }, { "epoch": 1.7946163879995258, "grad_norm": 1.022396140066591, "learning_rate": 3.0330762771194564e-05, "loss": 0.1762, "step": 15134 }, { "epoch": 1.7947349697616506, "grad_norm": 1.0445331361915915, "learning_rate": 3.0328417589191953e-05, "loss": 0.1821, "step": 15135 }, { "epoch": 1.7948535515237758, "grad_norm": 0.9287804388885226, "learning_rate": 3.0326072358067865e-05, "loss": 0.1658, "step": 15136 }, { "epoch": 1.7949721332859006, "grad_norm": 0.7848682682898168, "learning_rate": 3.0323727077843927e-05, "loss": 0.1365, "step": 15137 }, { "epoch": 1.7950907150480258, "grad_norm": 0.9035300127518623, "learning_rate": 3.0321381748541744e-05, "loss": 0.1803, "step": 15138 }, { "epoch": 1.7952092968101505, "grad_norm": 0.9237393098342, "learning_rate": 3.0319036370182957e-05, "loss": 0.2235, "step": 15139 }, { "epoch": 1.7953278785722757, "grad_norm": 0.7577108488858775, "learning_rate": 3.0316690942789168e-05, "loss": 0.1602, "step": 15140 }, { "epoch": 1.7954464603344005, "grad_norm": 0.8902109742225938, "learning_rate": 3.0314345466382014e-05, "loss": 0.1374, "step": 15141 }, { "epoch": 1.7955650420965257, "grad_norm": 1.2491200537282414, "learning_rate": 3.031199994098311e-05, "loss": 0.2703, "step": 15142 }, { "epoch": 1.7956836238586504, "grad_norm": 0.8512950645413768, "learning_rate": 3.0309654366614077e-05, "loss": 0.2045, "step": 15143 }, { "epoch": 1.7958022056207756, "grad_norm": 0.8153526486194095, "learning_rate": 3.0307308743296543e-05, "loss": 0.1865, "step": 15144 }, { "epoch": 1.7959207873829004, "grad_norm": 0.7205293282425662, "learning_rate": 3.030496307105213e-05, "loss": 0.1219, "step": 15145 }, { "epoch": 1.7960393691450256, "grad_norm": 0.8432247174457126, "learning_rate": 3.0302617349902467e-05, "loss": 0.1537, "step": 15146 }, { "epoch": 1.7961579509071504, "grad_norm": 0.9184750924442107, "learning_rate": 3.0300271579869173e-05, "loss": 0.1787, "step": 15147 }, { "epoch": 1.7962765326692756, "grad_norm": 0.7055807895837582, "learning_rate": 3.0297925760973876e-05, "loss": 0.1603, "step": 15148 }, { "epoch": 1.7963951144314003, "grad_norm": 0.926546078610508, "learning_rate": 3.0295579893238196e-05, "loss": 0.2097, "step": 15149 }, { "epoch": 1.7965136961935255, "grad_norm": 0.8381360355131298, "learning_rate": 3.029323397668377e-05, "loss": 0.2092, "step": 15150 }, { "epoch": 1.7966322779556503, "grad_norm": 0.6224614451050496, "learning_rate": 3.0290888011332212e-05, "loss": 0.1095, "step": 15151 }, { "epoch": 1.7967508597177755, "grad_norm": 1.0446531147203062, "learning_rate": 3.0288541997205162e-05, "loss": 0.2509, "step": 15152 }, { "epoch": 1.7968694414799002, "grad_norm": 1.1698832100488008, "learning_rate": 3.028619593432423e-05, "loss": 0.2497, "step": 15153 }, { "epoch": 1.7969880232420254, "grad_norm": 0.923408046294736, "learning_rate": 3.0283849822711057e-05, "loss": 0.1524, "step": 15154 }, { "epoch": 1.7971066050041502, "grad_norm": 0.6647565274894796, "learning_rate": 3.0281503662387266e-05, "loss": 0.1357, "step": 15155 }, { "epoch": 1.7972251867662754, "grad_norm": 1.1012529538844202, "learning_rate": 3.027915745337449e-05, "loss": 0.2267, "step": 15156 }, { "epoch": 1.7973437685284004, "grad_norm": 1.1055065388469718, "learning_rate": 3.027681119569436e-05, "loss": 0.2895, "step": 15157 }, { "epoch": 1.7974623502905254, "grad_norm": 1.659422312752869, "learning_rate": 3.02744648893685e-05, "loss": 0.3695, "step": 15158 }, { "epoch": 1.7975809320526503, "grad_norm": 0.9575122080837081, "learning_rate": 3.0272118534418542e-05, "loss": 0.1872, "step": 15159 }, { "epoch": 1.7976995138147753, "grad_norm": 0.8921310913126703, "learning_rate": 3.0269772130866116e-05, "loss": 0.1713, "step": 15160 }, { "epoch": 1.7978180955769003, "grad_norm": 0.891996189661989, "learning_rate": 3.0267425678732853e-05, "loss": 0.2029, "step": 15161 }, { "epoch": 1.7979366773390253, "grad_norm": 0.9277615167572174, "learning_rate": 3.0265079178040378e-05, "loss": 0.202, "step": 15162 }, { "epoch": 1.7980552591011503, "grad_norm": 0.7906371024841173, "learning_rate": 3.0262732628810336e-05, "loss": 0.175, "step": 15163 }, { "epoch": 1.7981738408632753, "grad_norm": 1.2059077480745255, "learning_rate": 3.0260386031064348e-05, "loss": 0.3199, "step": 15164 }, { "epoch": 1.7982924226254002, "grad_norm": 1.196806725936194, "learning_rate": 3.025803938482406e-05, "loss": 0.1791, "step": 15165 }, { "epoch": 1.7984110043875252, "grad_norm": 0.8649698282977638, "learning_rate": 3.025569269011109e-05, "loss": 0.1758, "step": 15166 }, { "epoch": 1.7985295861496502, "grad_norm": 0.8237529866245534, "learning_rate": 3.025334594694708e-05, "loss": 0.1727, "step": 15167 }, { "epoch": 1.7986481679117752, "grad_norm": 0.8799517695554747, "learning_rate": 3.0250999155353665e-05, "loss": 0.1801, "step": 15168 }, { "epoch": 1.7987667496739002, "grad_norm": 0.8653762478060637, "learning_rate": 3.0248652315352476e-05, "loss": 0.1531, "step": 15169 }, { "epoch": 1.7988853314360251, "grad_norm": 0.9687217116318256, "learning_rate": 3.024630542696515e-05, "loss": 0.1865, "step": 15170 }, { "epoch": 1.7990039131981501, "grad_norm": 0.9614879824581637, "learning_rate": 3.0243958490213314e-05, "loss": 0.1723, "step": 15171 }, { "epoch": 1.799122494960275, "grad_norm": 0.8152498587171324, "learning_rate": 3.0241611505118617e-05, "loss": 0.146, "step": 15172 }, { "epoch": 1.7992410767224, "grad_norm": 0.7503507808764435, "learning_rate": 3.023926447170269e-05, "loss": 0.1786, "step": 15173 }, { "epoch": 1.799359658484525, "grad_norm": 0.9909166457345013, "learning_rate": 3.0236917389987164e-05, "loss": 0.1819, "step": 15174 }, { "epoch": 1.79947824024665, "grad_norm": 0.6490971410775556, "learning_rate": 3.023457025999368e-05, "loss": 0.1364, "step": 15175 }, { "epoch": 1.799596822008775, "grad_norm": 0.6271221231773376, "learning_rate": 3.0232223081743895e-05, "loss": 0.1285, "step": 15176 }, { "epoch": 1.7997154037709, "grad_norm": 1.1269145907481626, "learning_rate": 3.0229875855259414e-05, "loss": 0.318, "step": 15177 }, { "epoch": 1.799833985533025, "grad_norm": 0.7080285625080588, "learning_rate": 3.02275285805619e-05, "loss": 0.1437, "step": 15178 }, { "epoch": 1.79995256729515, "grad_norm": 1.195582649079599, "learning_rate": 3.0225181257672974e-05, "loss": 0.1973, "step": 15179 }, { "epoch": 1.800071149057275, "grad_norm": 0.7056648506827501, "learning_rate": 3.0222833886614287e-05, "loss": 0.1537, "step": 15180 }, { "epoch": 1.8001897308194, "grad_norm": 0.7634380181371664, "learning_rate": 3.0220486467407476e-05, "loss": 0.1679, "step": 15181 }, { "epoch": 1.800308312581525, "grad_norm": 0.8305258626733053, "learning_rate": 3.0218139000074184e-05, "loss": 0.2006, "step": 15182 }, { "epoch": 1.80042689434365, "grad_norm": 0.9614430946074176, "learning_rate": 3.0215791484636045e-05, "loss": 0.2417, "step": 15183 }, { "epoch": 1.8005454761057749, "grad_norm": 0.8289136051339754, "learning_rate": 3.021344392111471e-05, "loss": 0.2345, "step": 15184 }, { "epoch": 1.8006640578679, "grad_norm": 0.9665032960955334, "learning_rate": 3.021109630953181e-05, "loss": 0.1767, "step": 15185 }, { "epoch": 1.8007826396300248, "grad_norm": 1.0381549283258653, "learning_rate": 3.0208748649908992e-05, "loss": 0.2341, "step": 15186 }, { "epoch": 1.80090122139215, "grad_norm": 1.0135276992509468, "learning_rate": 3.0206400942267903e-05, "loss": 0.1753, "step": 15187 }, { "epoch": 1.8010198031542748, "grad_norm": 0.9851074408076888, "learning_rate": 3.0204053186630184e-05, "loss": 0.2157, "step": 15188 }, { "epoch": 1.8011383849164, "grad_norm": 0.7069709081805873, "learning_rate": 3.0201705383017477e-05, "loss": 0.1486, "step": 15189 }, { "epoch": 1.8012569666785248, "grad_norm": 1.058078552829204, "learning_rate": 3.0199357531451415e-05, "loss": 0.216, "step": 15190 }, { "epoch": 1.80137554844065, "grad_norm": 0.7255260183921318, "learning_rate": 3.019700963195366e-05, "loss": 0.1623, "step": 15191 }, { "epoch": 1.8014941302027747, "grad_norm": 1.5467491054195392, "learning_rate": 3.019466168454585e-05, "loss": 0.3077, "step": 15192 }, { "epoch": 1.8016127119649, "grad_norm": 0.5831057638334622, "learning_rate": 3.0192313689249623e-05, "loss": 0.131, "step": 15193 }, { "epoch": 1.8017312937270247, "grad_norm": 1.1086214986819922, "learning_rate": 3.0189965646086634e-05, "loss": 0.2681, "step": 15194 }, { "epoch": 1.8018498754891499, "grad_norm": 0.6323700057636477, "learning_rate": 3.0187617555078527e-05, "loss": 0.1433, "step": 15195 }, { "epoch": 1.8019684572512746, "grad_norm": 1.3728689354747305, "learning_rate": 3.0185269416246948e-05, "loss": 0.2884, "step": 15196 }, { "epoch": 1.8020870390133998, "grad_norm": 0.7284811163355518, "learning_rate": 3.018292122961355e-05, "loss": 0.1647, "step": 15197 }, { "epoch": 1.8022056207755246, "grad_norm": 0.6014879788591126, "learning_rate": 3.0180572995199962e-05, "loss": 0.1184, "step": 15198 }, { "epoch": 1.8023242025376498, "grad_norm": 1.009939388979859, "learning_rate": 3.0178224713027847e-05, "loss": 0.175, "step": 15199 }, { "epoch": 1.8024427842997746, "grad_norm": 1.1736012838801961, "learning_rate": 3.0175876383118855e-05, "loss": 0.2469, "step": 15200 }, { "epoch": 1.8025613660618998, "grad_norm": 1.0809549657714579, "learning_rate": 3.0173528005494623e-05, "loss": 0.3018, "step": 15201 }, { "epoch": 1.8026799478240245, "grad_norm": 0.8707304119369763, "learning_rate": 3.0171179580176816e-05, "loss": 0.2143, "step": 15202 }, { "epoch": 1.8027985295861497, "grad_norm": 0.7763017402391296, "learning_rate": 3.016883110718707e-05, "loss": 0.2098, "step": 15203 }, { "epoch": 1.8029171113482745, "grad_norm": 1.0676342060227528, "learning_rate": 3.0166482586547033e-05, "loss": 0.2159, "step": 15204 }, { "epoch": 1.8030356931103997, "grad_norm": 1.3177373353827886, "learning_rate": 3.0164134018278366e-05, "loss": 0.2086, "step": 15205 }, { "epoch": 1.8031542748725244, "grad_norm": 0.8231907594151363, "learning_rate": 3.0161785402402726e-05, "loss": 0.1829, "step": 15206 }, { "epoch": 1.8032728566346496, "grad_norm": 0.6775343531058204, "learning_rate": 3.0159436738941744e-05, "loss": 0.1595, "step": 15207 }, { "epoch": 1.8033914383967746, "grad_norm": 1.1345164424781191, "learning_rate": 3.0157088027917085e-05, "loss": 0.1845, "step": 15208 }, { "epoch": 1.8035100201588996, "grad_norm": 0.8007594495605799, "learning_rate": 3.0154739269350397e-05, "loss": 0.1715, "step": 15209 }, { "epoch": 1.8036286019210246, "grad_norm": 1.2253192155462074, "learning_rate": 3.015239046326334e-05, "loss": 0.3129, "step": 15210 }, { "epoch": 1.8037471836831496, "grad_norm": 0.44685152774035564, "learning_rate": 3.0150041609677554e-05, "loss": 0.1181, "step": 15211 }, { "epoch": 1.8038657654452745, "grad_norm": 0.7157588618011987, "learning_rate": 3.01476927086147e-05, "loss": 0.1851, "step": 15212 }, { "epoch": 1.8039843472073995, "grad_norm": 0.8767484621703151, "learning_rate": 3.0145343760096435e-05, "loss": 0.1753, "step": 15213 }, { "epoch": 1.8041029289695245, "grad_norm": 0.6807568994789905, "learning_rate": 3.0142994764144407e-05, "loss": 0.1452, "step": 15214 }, { "epoch": 1.8042215107316495, "grad_norm": 0.7805003746792329, "learning_rate": 3.014064572078028e-05, "loss": 0.1433, "step": 15215 }, { "epoch": 1.8043400924937745, "grad_norm": 0.8650929419677594, "learning_rate": 3.0138296630025696e-05, "loss": 0.1771, "step": 15216 }, { "epoch": 1.8044586742558995, "grad_norm": 0.7228113999420612, "learning_rate": 3.013594749190232e-05, "loss": 0.1505, "step": 15217 }, { "epoch": 1.8045772560180244, "grad_norm": 0.7615932269820929, "learning_rate": 3.013359830643181e-05, "loss": 0.1465, "step": 15218 }, { "epoch": 1.8046958377801494, "grad_norm": 1.3587083966242381, "learning_rate": 3.013124907363582e-05, "loss": 0.2333, "step": 15219 }, { "epoch": 1.8048144195422744, "grad_norm": 0.5269649522379545, "learning_rate": 3.0128899793536003e-05, "loss": 0.1409, "step": 15220 }, { "epoch": 1.8049330013043994, "grad_norm": 0.6832051578571429, "learning_rate": 3.012655046615402e-05, "loss": 0.1356, "step": 15221 }, { "epoch": 1.8050515830665244, "grad_norm": 1.1215541056903264, "learning_rate": 3.0124201091511527e-05, "loss": 0.2105, "step": 15222 }, { "epoch": 1.8051701648286493, "grad_norm": 1.0986712059210137, "learning_rate": 3.012185166963018e-05, "loss": 0.2195, "step": 15223 }, { "epoch": 1.8052887465907743, "grad_norm": 0.7034890540046537, "learning_rate": 3.0119502200531646e-05, "loss": 0.147, "step": 15224 }, { "epoch": 1.8054073283528993, "grad_norm": 1.347226414579973, "learning_rate": 3.011715268423758e-05, "loss": 0.2905, "step": 15225 }, { "epoch": 1.8055259101150243, "grad_norm": 0.7704153796071466, "learning_rate": 3.0114803120769642e-05, "loss": 0.1809, "step": 15226 }, { "epoch": 1.8056444918771493, "grad_norm": 0.7338520567410853, "learning_rate": 3.0112453510149484e-05, "loss": 0.1574, "step": 15227 }, { "epoch": 1.8057630736392742, "grad_norm": 0.907874191340384, "learning_rate": 3.0110103852398786e-05, "loss": 0.1843, "step": 15228 }, { "epoch": 1.8058816554013992, "grad_norm": 0.9088841774887669, "learning_rate": 3.0107754147539185e-05, "loss": 0.2221, "step": 15229 }, { "epoch": 1.8060002371635242, "grad_norm": 0.7609009955089608, "learning_rate": 3.010540439559236e-05, "loss": 0.177, "step": 15230 }, { "epoch": 1.8061188189256492, "grad_norm": 1.4646084576475629, "learning_rate": 3.0103054596579973e-05, "loss": 0.3375, "step": 15231 }, { "epoch": 1.8062374006877744, "grad_norm": 0.9045036234033605, "learning_rate": 3.010070475052368e-05, "loss": 0.1942, "step": 15232 }, { "epoch": 1.8063559824498991, "grad_norm": 0.7640047218126231, "learning_rate": 3.009835485744513e-05, "loss": 0.152, "step": 15233 }, { "epoch": 1.8064745642120243, "grad_norm": 0.9173896470133076, "learning_rate": 3.0096004917366012e-05, "loss": 0.1898, "step": 15234 }, { "epoch": 1.806593145974149, "grad_norm": 1.043075417077305, "learning_rate": 3.009365493030797e-05, "loss": 0.16, "step": 15235 }, { "epoch": 1.8067117277362743, "grad_norm": 0.7275174754493908, "learning_rate": 3.0091304896292682e-05, "loss": 0.2044, "step": 15236 }, { "epoch": 1.806830309498399, "grad_norm": 0.909334831950319, "learning_rate": 3.0088954815341803e-05, "loss": 0.1923, "step": 15237 }, { "epoch": 1.8069488912605243, "grad_norm": 0.6629246171655451, "learning_rate": 3.0086604687477004e-05, "loss": 0.147, "step": 15238 }, { "epoch": 1.807067473022649, "grad_norm": 1.1190756020745454, "learning_rate": 3.0084254512719946e-05, "loss": 0.2283, "step": 15239 }, { "epoch": 1.8071860547847742, "grad_norm": 1.114967319532472, "learning_rate": 3.00819042910923e-05, "loss": 0.2024, "step": 15240 }, { "epoch": 1.807304636546899, "grad_norm": 1.6740702817604287, "learning_rate": 3.0079554022615725e-05, "loss": 0.3497, "step": 15241 }, { "epoch": 1.8074232183090242, "grad_norm": 0.6966267142243856, "learning_rate": 3.0077203707311887e-05, "loss": 0.1313, "step": 15242 }, { "epoch": 1.807541800071149, "grad_norm": 0.6850710284267119, "learning_rate": 3.0074853345202457e-05, "loss": 0.1373, "step": 15243 }, { "epoch": 1.8076603818332742, "grad_norm": 0.6888011907686001, "learning_rate": 3.0072502936309104e-05, "loss": 0.1354, "step": 15244 }, { "epoch": 1.807778963595399, "grad_norm": 0.9073493498749794, "learning_rate": 3.0070152480653502e-05, "loss": 0.1837, "step": 15245 }, { "epoch": 1.8078975453575241, "grad_norm": 0.8809591885366445, "learning_rate": 3.0067801978257304e-05, "loss": 0.167, "step": 15246 }, { "epoch": 1.8080161271196489, "grad_norm": 1.0447136164065292, "learning_rate": 3.006545142914219e-05, "loss": 0.1936, "step": 15247 }, { "epoch": 1.808134708881774, "grad_norm": 0.9471722626731461, "learning_rate": 3.006310083332982e-05, "loss": 0.1992, "step": 15248 }, { "epoch": 1.8082532906438988, "grad_norm": 0.871852098025646, "learning_rate": 3.0060750190841875e-05, "loss": 0.1733, "step": 15249 }, { "epoch": 1.808371872406024, "grad_norm": 0.7433856102331952, "learning_rate": 3.0058399501700014e-05, "loss": 0.1886, "step": 15250 }, { "epoch": 1.8084904541681488, "grad_norm": 0.8056981082973372, "learning_rate": 3.005604876592592e-05, "loss": 0.1617, "step": 15251 }, { "epoch": 1.808609035930274, "grad_norm": 0.8563069023566553, "learning_rate": 3.005369798354125e-05, "loss": 0.1679, "step": 15252 }, { "epoch": 1.8087276176923988, "grad_norm": 0.675958477332458, "learning_rate": 3.0051347154567684e-05, "loss": 0.1134, "step": 15253 }, { "epoch": 1.808846199454524, "grad_norm": 0.9784727745964439, "learning_rate": 3.0048996279026885e-05, "loss": 0.2005, "step": 15254 }, { "epoch": 1.8089647812166487, "grad_norm": 0.763599462231589, "learning_rate": 3.004664535694054e-05, "loss": 0.1435, "step": 15255 }, { "epoch": 1.809083362978774, "grad_norm": 1.0231670583924364, "learning_rate": 3.004429438833031e-05, "loss": 0.1974, "step": 15256 }, { "epoch": 1.809201944740899, "grad_norm": 1.1335851175381495, "learning_rate": 3.0041943373217874e-05, "loss": 0.2226, "step": 15257 }, { "epoch": 1.8093205265030239, "grad_norm": 0.9297806030470269, "learning_rate": 3.0039592311624903e-05, "loss": 0.154, "step": 15258 }, { "epoch": 1.8094391082651489, "grad_norm": 1.325758308319614, "learning_rate": 3.003724120357307e-05, "loss": 0.2998, "step": 15259 }, { "epoch": 1.8095576900272738, "grad_norm": 1.0361172507235572, "learning_rate": 3.0034890049084054e-05, "loss": 0.2395, "step": 15260 }, { "epoch": 1.8096762717893988, "grad_norm": 0.8143665501022673, "learning_rate": 3.0032538848179513e-05, "loss": 0.1723, "step": 15261 }, { "epoch": 1.8097948535515238, "grad_norm": 1.5104711784066196, "learning_rate": 3.0030187600881145e-05, "loss": 0.2959, "step": 15262 }, { "epoch": 1.8099134353136488, "grad_norm": 0.7184342013057864, "learning_rate": 3.002783630721061e-05, "loss": 0.1632, "step": 15263 }, { "epoch": 1.8100320170757738, "grad_norm": 0.9669895671358301, "learning_rate": 3.0025484967189597e-05, "loss": 0.1812, "step": 15264 }, { "epoch": 1.8101505988378987, "grad_norm": 0.9439078425332885, "learning_rate": 3.0023133580839773e-05, "loss": 0.2075, "step": 15265 }, { "epoch": 1.8102691806000237, "grad_norm": 0.7995536605362623, "learning_rate": 3.002078214818282e-05, "loss": 0.1618, "step": 15266 }, { "epoch": 1.8103877623621487, "grad_norm": 0.895161977697565, "learning_rate": 3.0018430669240406e-05, "loss": 0.1823, "step": 15267 }, { "epoch": 1.8105063441242737, "grad_norm": 0.773872719254621, "learning_rate": 3.0016079144034227e-05, "loss": 0.1946, "step": 15268 }, { "epoch": 1.8106249258863987, "grad_norm": 0.8960827594249422, "learning_rate": 3.001372757258594e-05, "loss": 0.2036, "step": 15269 }, { "epoch": 1.8107435076485237, "grad_norm": 0.68428085502591, "learning_rate": 3.0011375954917227e-05, "loss": 0.1773, "step": 15270 }, { "epoch": 1.8108620894106486, "grad_norm": 0.8103033257681806, "learning_rate": 3.000902429104978e-05, "loss": 0.1728, "step": 15271 }, { "epoch": 1.8109806711727736, "grad_norm": 0.807279193208687, "learning_rate": 3.0006672581005274e-05, "loss": 0.1736, "step": 15272 }, { "epoch": 1.8110992529348986, "grad_norm": 0.6801779311640868, "learning_rate": 3.000432082480538e-05, "loss": 0.1524, "step": 15273 }, { "epoch": 1.8112178346970236, "grad_norm": 1.0171118677664284, "learning_rate": 3.0001969022471788e-05, "loss": 0.1955, "step": 15274 }, { "epoch": 1.8113364164591486, "grad_norm": 1.0212226607741433, "learning_rate": 2.9999617174026178e-05, "loss": 0.1894, "step": 15275 }, { "epoch": 1.8114549982212735, "grad_norm": 0.7080880806476614, "learning_rate": 2.9997265279490226e-05, "loss": 0.1538, "step": 15276 }, { "epoch": 1.8115735799833985, "grad_norm": 1.0457971179203098, "learning_rate": 2.9994913338885623e-05, "loss": 0.2108, "step": 15277 }, { "epoch": 1.8116921617455235, "grad_norm": 1.4724577619725654, "learning_rate": 2.9992561352234035e-05, "loss": 0.3365, "step": 15278 }, { "epoch": 1.8118107435076485, "grad_norm": 0.9014810510951258, "learning_rate": 2.9990209319557156e-05, "loss": 0.1571, "step": 15279 }, { "epoch": 1.8119293252697735, "grad_norm": 0.7977892961064299, "learning_rate": 2.9987857240876665e-05, "loss": 0.1601, "step": 15280 }, { "epoch": 1.8120479070318984, "grad_norm": 0.6278846627918053, "learning_rate": 2.9985505116214252e-05, "loss": 0.1319, "step": 15281 }, { "epoch": 1.8121664887940234, "grad_norm": 0.8030058293635072, "learning_rate": 2.998315294559159e-05, "loss": 0.1915, "step": 15282 }, { "epoch": 1.8122850705561486, "grad_norm": 0.8733656039367683, "learning_rate": 2.9980800729030374e-05, "loss": 0.2132, "step": 15283 }, { "epoch": 1.8124036523182734, "grad_norm": 0.6327343413469493, "learning_rate": 2.997844846655227e-05, "loss": 0.1633, "step": 15284 }, { "epoch": 1.8125222340803986, "grad_norm": 0.8320824111930306, "learning_rate": 2.997609615817899e-05, "loss": 0.1744, "step": 15285 }, { "epoch": 1.8126408158425233, "grad_norm": 0.8340511143449201, "learning_rate": 2.9973743803932203e-05, "loss": 0.1963, "step": 15286 }, { "epoch": 1.8127593976046485, "grad_norm": 0.5920607831492545, "learning_rate": 2.9971391403833594e-05, "loss": 0.1255, "step": 15287 }, { "epoch": 1.8128779793667733, "grad_norm": 1.221464799820948, "learning_rate": 2.9969038957904862e-05, "loss": 0.2349, "step": 15288 }, { "epoch": 1.8129965611288985, "grad_norm": 0.9977423505166422, "learning_rate": 2.996668646616767e-05, "loss": 0.2046, "step": 15289 }, { "epoch": 1.8131151428910233, "grad_norm": 0.8799603782398817, "learning_rate": 2.996433392864373e-05, "loss": 0.2242, "step": 15290 }, { "epoch": 1.8132337246531485, "grad_norm": 0.7008405922026111, "learning_rate": 2.9961981345354713e-05, "loss": 0.1273, "step": 15291 }, { "epoch": 1.8133523064152732, "grad_norm": 0.7205679662758091, "learning_rate": 2.9959628716322313e-05, "loss": 0.1388, "step": 15292 }, { "epoch": 1.8134708881773984, "grad_norm": 0.847835832058915, "learning_rate": 2.9957276041568216e-05, "loss": 0.2185, "step": 15293 }, { "epoch": 1.8135894699395232, "grad_norm": 0.7688026160026602, "learning_rate": 2.995492332111412e-05, "loss": 0.1399, "step": 15294 }, { "epoch": 1.8137080517016484, "grad_norm": 0.6920954988987542, "learning_rate": 2.9952570554981703e-05, "loss": 0.127, "step": 15295 }, { "epoch": 1.8138266334637732, "grad_norm": 1.4581016470038146, "learning_rate": 2.9950217743192664e-05, "loss": 0.3385, "step": 15296 }, { "epoch": 1.8139452152258984, "grad_norm": 1.2058478634898417, "learning_rate": 2.9947864885768685e-05, "loss": 0.2031, "step": 15297 }, { "epoch": 1.8140637969880231, "grad_norm": 0.9085337558727927, "learning_rate": 2.9945511982731457e-05, "loss": 0.2007, "step": 15298 }, { "epoch": 1.8141823787501483, "grad_norm": 1.0978107649282072, "learning_rate": 2.9943159034102674e-05, "loss": 0.2131, "step": 15299 }, { "epoch": 1.814300960512273, "grad_norm": 1.0042024865730301, "learning_rate": 2.9940806039904023e-05, "loss": 0.1805, "step": 15300 }, { "epoch": 1.8144195422743983, "grad_norm": 0.9854329195411209, "learning_rate": 2.993845300015721e-05, "loss": 0.1599, "step": 15301 }, { "epoch": 1.814538124036523, "grad_norm": 0.8826217392959612, "learning_rate": 2.99360999148839e-05, "loss": 0.1916, "step": 15302 }, { "epoch": 1.8146567057986482, "grad_norm": 1.0175564574012719, "learning_rate": 2.993374678410581e-05, "loss": 0.1997, "step": 15303 }, { "epoch": 1.814775287560773, "grad_norm": 0.7939593342986472, "learning_rate": 2.9931393607844623e-05, "loss": 0.1635, "step": 15304 }, { "epoch": 1.8148938693228982, "grad_norm": 0.9637371710870302, "learning_rate": 2.9929040386122036e-05, "loss": 0.2258, "step": 15305 }, { "epoch": 1.815012451085023, "grad_norm": 1.2994510930677314, "learning_rate": 2.992668711895974e-05, "loss": 0.2524, "step": 15306 }, { "epoch": 1.8151310328471482, "grad_norm": 0.873924200708601, "learning_rate": 2.9924333806379433e-05, "loss": 0.1638, "step": 15307 }, { "epoch": 1.8152496146092731, "grad_norm": 0.8610701663439282, "learning_rate": 2.9921980448402808e-05, "loss": 0.174, "step": 15308 }, { "epoch": 1.8153681963713981, "grad_norm": 0.8927793464123126, "learning_rate": 2.9919627045051556e-05, "loss": 0.2224, "step": 15309 }, { "epoch": 1.815486778133523, "grad_norm": 0.7261982214172353, "learning_rate": 2.991727359634738e-05, "loss": 0.1432, "step": 15310 }, { "epoch": 1.815605359895648, "grad_norm": 1.0124862913008617, "learning_rate": 2.9914920102311966e-05, "loss": 0.2074, "step": 15311 }, { "epoch": 1.815723941657773, "grad_norm": 0.7830927192172253, "learning_rate": 2.9912566562967014e-05, "loss": 0.1335, "step": 15312 }, { "epoch": 1.815842523419898, "grad_norm": 0.790795564121555, "learning_rate": 2.9910212978334228e-05, "loss": 0.196, "step": 15313 }, { "epoch": 1.815961105182023, "grad_norm": 0.8555548766887383, "learning_rate": 2.9907859348435297e-05, "loss": 0.142, "step": 15314 }, { "epoch": 1.816079686944148, "grad_norm": 0.8732728532257412, "learning_rate": 2.9905505673291918e-05, "loss": 0.2006, "step": 15315 }, { "epoch": 1.816198268706273, "grad_norm": 0.8584631181511472, "learning_rate": 2.9903151952925802e-05, "loss": 0.143, "step": 15316 }, { "epoch": 1.816316850468398, "grad_norm": 0.6221715423461472, "learning_rate": 2.9900798187358624e-05, "loss": 0.1491, "step": 15317 }, { "epoch": 1.816435432230523, "grad_norm": 0.5700332378664487, "learning_rate": 2.989844437661211e-05, "loss": 0.1004, "step": 15318 }, { "epoch": 1.816554013992648, "grad_norm": 0.8404444240931636, "learning_rate": 2.9896090520707937e-05, "loss": 0.1246, "step": 15319 }, { "epoch": 1.816672595754773, "grad_norm": 0.7925946751066655, "learning_rate": 2.9893736619667817e-05, "loss": 0.1502, "step": 15320 }, { "epoch": 1.816791177516898, "grad_norm": 0.7908484804618506, "learning_rate": 2.9891382673513448e-05, "loss": 0.1416, "step": 15321 }, { "epoch": 1.8169097592790229, "grad_norm": 1.175105213501486, "learning_rate": 2.9889028682266524e-05, "loss": 0.2769, "step": 15322 }, { "epoch": 1.8170283410411479, "grad_norm": 0.7574366576969697, "learning_rate": 2.9886674645948747e-05, "loss": 0.1647, "step": 15323 }, { "epoch": 1.8171469228032728, "grad_norm": 0.9539844189983758, "learning_rate": 2.9884320564581835e-05, "loss": 0.1971, "step": 15324 }, { "epoch": 1.8172655045653978, "grad_norm": 0.6507432565438186, "learning_rate": 2.9881966438187464e-05, "loss": 0.1636, "step": 15325 }, { "epoch": 1.8173840863275228, "grad_norm": 0.8978130799492937, "learning_rate": 2.987961226678736e-05, "loss": 0.1719, "step": 15326 }, { "epoch": 1.8175026680896478, "grad_norm": 0.7478027979128965, "learning_rate": 2.9877258050403212e-05, "loss": 0.1524, "step": 15327 }, { "epoch": 1.8176212498517728, "grad_norm": 0.6838669586010478, "learning_rate": 2.987490378905672e-05, "loss": 0.1771, "step": 15328 }, { "epoch": 1.8177398316138977, "grad_norm": 0.9781849500248863, "learning_rate": 2.9872549482769603e-05, "loss": 0.228, "step": 15329 }, { "epoch": 1.8178584133760227, "grad_norm": 1.0809597274270244, "learning_rate": 2.9870195131563543e-05, "loss": 0.1681, "step": 15330 }, { "epoch": 1.8179769951381477, "grad_norm": 0.922626577611113, "learning_rate": 2.9867840735460268e-05, "loss": 0.1583, "step": 15331 }, { "epoch": 1.818095576900273, "grad_norm": 1.2450145250132303, "learning_rate": 2.986548629448146e-05, "loss": 0.2921, "step": 15332 }, { "epoch": 1.8182141586623977, "grad_norm": 0.792291964885216, "learning_rate": 2.9863131808648836e-05, "loss": 0.1943, "step": 15333 }, { "epoch": 1.8183327404245229, "grad_norm": 1.3286886472692427, "learning_rate": 2.98607772779841e-05, "loss": 0.2372, "step": 15334 }, { "epoch": 1.8184513221866476, "grad_norm": 0.7995073577085069, "learning_rate": 2.985842270250897e-05, "loss": 0.1448, "step": 15335 }, { "epoch": 1.8185699039487728, "grad_norm": 0.9507616649199002, "learning_rate": 2.985606808224513e-05, "loss": 0.2083, "step": 15336 }, { "epoch": 1.8186884857108976, "grad_norm": 1.0793503203095618, "learning_rate": 2.9853713417214302e-05, "loss": 0.1791, "step": 15337 }, { "epoch": 1.8188070674730228, "grad_norm": 1.1863959696293285, "learning_rate": 2.9851358707438182e-05, "loss": 0.2076, "step": 15338 }, { "epoch": 1.8189256492351475, "grad_norm": 1.2078118999498182, "learning_rate": 2.9849003952938493e-05, "loss": 0.2511, "step": 15339 }, { "epoch": 1.8190442309972727, "grad_norm": 1.099621275013443, "learning_rate": 2.9846649153736928e-05, "loss": 0.2736, "step": 15340 }, { "epoch": 1.8191628127593975, "grad_norm": 0.9767867432903887, "learning_rate": 2.9844294309855198e-05, "loss": 0.1962, "step": 15341 }, { "epoch": 1.8192813945215227, "grad_norm": 0.9469528405618699, "learning_rate": 2.9841939421315018e-05, "loss": 0.2216, "step": 15342 }, { "epoch": 1.8193999762836475, "grad_norm": 0.996312693064748, "learning_rate": 2.983958448813809e-05, "loss": 0.1728, "step": 15343 }, { "epoch": 1.8195185580457727, "grad_norm": 1.0476534758728546, "learning_rate": 2.983722951034614e-05, "loss": 0.2146, "step": 15344 }, { "epoch": 1.8196371398078974, "grad_norm": 1.152401842740534, "learning_rate": 2.983487448796085e-05, "loss": 0.2646, "step": 15345 }, { "epoch": 1.8197557215700226, "grad_norm": 1.1457099663554928, "learning_rate": 2.9832519421003962e-05, "loss": 0.2418, "step": 15346 }, { "epoch": 1.8198743033321474, "grad_norm": 0.9267420333995193, "learning_rate": 2.983016430949716e-05, "loss": 0.2161, "step": 15347 }, { "epoch": 1.8199928850942726, "grad_norm": 0.9771873361154638, "learning_rate": 2.982780915346217e-05, "loss": 0.2135, "step": 15348 }, { "epoch": 1.8201114668563974, "grad_norm": 1.0812969769250833, "learning_rate": 2.98254539529207e-05, "loss": 0.2441, "step": 15349 }, { "epoch": 1.8202300486185226, "grad_norm": 0.9291653234283628, "learning_rate": 2.9823098707894464e-05, "loss": 0.1879, "step": 15350 }, { "epoch": 1.8203486303806473, "grad_norm": 0.859331319835636, "learning_rate": 2.9820743418405167e-05, "loss": 0.1765, "step": 15351 }, { "epoch": 1.8204672121427725, "grad_norm": 1.0588709013525603, "learning_rate": 2.981838808447453e-05, "loss": 0.2166, "step": 15352 }, { "epoch": 1.8205857939048973, "grad_norm": 1.3089852941335953, "learning_rate": 2.9816032706124264e-05, "loss": 0.3057, "step": 15353 }, { "epoch": 1.8207043756670225, "grad_norm": 1.0230032065837968, "learning_rate": 2.9813677283376085e-05, "loss": 0.2446, "step": 15354 }, { "epoch": 1.8208229574291472, "grad_norm": 1.0036418454899414, "learning_rate": 2.9811321816251697e-05, "loss": 0.2351, "step": 15355 }, { "epoch": 1.8209415391912724, "grad_norm": 0.7038436788421636, "learning_rate": 2.9808966304772825e-05, "loss": 0.1338, "step": 15356 }, { "epoch": 1.8210601209533974, "grad_norm": 1.0770720631206359, "learning_rate": 2.9806610748961183e-05, "loss": 0.2132, "step": 15357 }, { "epoch": 1.8211787027155224, "grad_norm": 0.7061355869679413, "learning_rate": 2.9804255148838477e-05, "loss": 0.1634, "step": 15358 }, { "epoch": 1.8212972844776474, "grad_norm": 0.9026271738817263, "learning_rate": 2.9801899504426444e-05, "loss": 0.1593, "step": 15359 }, { "epoch": 1.8214158662397724, "grad_norm": 0.9925600749233368, "learning_rate": 2.9799543815746778e-05, "loss": 0.1838, "step": 15360 }, { "epoch": 1.8215344480018973, "grad_norm": 0.9236421000218497, "learning_rate": 2.9797188082821205e-05, "loss": 0.1995, "step": 15361 }, { "epoch": 1.8216530297640223, "grad_norm": 0.8052098522377797, "learning_rate": 2.9794832305671438e-05, "loss": 0.1996, "step": 15362 }, { "epoch": 1.8217716115261473, "grad_norm": 0.921372956876502, "learning_rate": 2.9792476484319193e-05, "loss": 0.2015, "step": 15363 }, { "epoch": 1.8218901932882723, "grad_norm": 0.8483875686515787, "learning_rate": 2.9790120618786194e-05, "loss": 0.1435, "step": 15364 }, { "epoch": 1.8220087750503973, "grad_norm": 0.6689870042009372, "learning_rate": 2.978776470909416e-05, "loss": 0.1421, "step": 15365 }, { "epoch": 1.8221273568125222, "grad_norm": 0.7620896863115509, "learning_rate": 2.978540875526481e-05, "loss": 0.2035, "step": 15366 }, { "epoch": 1.8222459385746472, "grad_norm": 0.7871718360389401, "learning_rate": 2.9783052757319857e-05, "loss": 0.1729, "step": 15367 }, { "epoch": 1.8223645203367722, "grad_norm": 0.9348674533291015, "learning_rate": 2.978069671528102e-05, "loss": 0.1845, "step": 15368 }, { "epoch": 1.8224831020988972, "grad_norm": 1.1764721518207681, "learning_rate": 2.9778340629170027e-05, "loss": 0.2088, "step": 15369 }, { "epoch": 1.8226016838610222, "grad_norm": 0.7500456399926081, "learning_rate": 2.9775984499008587e-05, "loss": 0.1525, "step": 15370 }, { "epoch": 1.8227202656231472, "grad_norm": 1.0043056354417137, "learning_rate": 2.9773628324818425e-05, "loss": 0.2123, "step": 15371 }, { "epoch": 1.8228388473852721, "grad_norm": 0.9965126162440716, "learning_rate": 2.9771272106621266e-05, "loss": 0.2186, "step": 15372 }, { "epoch": 1.8229574291473971, "grad_norm": 0.9466537864894152, "learning_rate": 2.9768915844438834e-05, "loss": 0.2412, "step": 15373 }, { "epoch": 1.823076010909522, "grad_norm": 0.9685451430255317, "learning_rate": 2.976655953829284e-05, "loss": 0.1883, "step": 15374 }, { "epoch": 1.823194592671647, "grad_norm": 0.976212434472658, "learning_rate": 2.9764203188205016e-05, "loss": 0.1611, "step": 15375 }, { "epoch": 1.823313174433772, "grad_norm": 0.7993033054934024, "learning_rate": 2.9761846794197085e-05, "loss": 0.1639, "step": 15376 }, { "epoch": 1.823431756195897, "grad_norm": 0.644423317884611, "learning_rate": 2.975949035629076e-05, "loss": 0.1584, "step": 15377 }, { "epoch": 1.823550337958022, "grad_norm": 0.6361957467653376, "learning_rate": 2.975713387450777e-05, "loss": 0.1663, "step": 15378 }, { "epoch": 1.823668919720147, "grad_norm": 0.8083390700507445, "learning_rate": 2.9754777348869837e-05, "loss": 0.1758, "step": 15379 }, { "epoch": 1.823787501482272, "grad_norm": 1.0009482580330868, "learning_rate": 2.97524207793987e-05, "loss": 0.1971, "step": 15380 }, { "epoch": 1.823906083244397, "grad_norm": 0.7749559525252164, "learning_rate": 2.975006416611606e-05, "loss": 0.1654, "step": 15381 }, { "epoch": 1.824024665006522, "grad_norm": 1.1788922314814314, "learning_rate": 2.9747707509043653e-05, "loss": 0.2284, "step": 15382 }, { "epoch": 1.8241432467686471, "grad_norm": 0.6472482042543826, "learning_rate": 2.9745350808203203e-05, "loss": 0.1458, "step": 15383 }, { "epoch": 1.824261828530772, "grad_norm": 0.7378467345717279, "learning_rate": 2.9742994063616447e-05, "loss": 0.1184, "step": 15384 }, { "epoch": 1.824380410292897, "grad_norm": 0.8306176972984514, "learning_rate": 2.9740637275305094e-05, "loss": 0.1609, "step": 15385 }, { "epoch": 1.8244989920550219, "grad_norm": 0.9863432290638695, "learning_rate": 2.973828044329088e-05, "loss": 0.1549, "step": 15386 }, { "epoch": 1.824617573817147, "grad_norm": 0.676357064815752, "learning_rate": 2.9735923567595535e-05, "loss": 0.1681, "step": 15387 }, { "epoch": 1.8247361555792718, "grad_norm": 1.2228425595127967, "learning_rate": 2.973356664824078e-05, "loss": 0.3159, "step": 15388 }, { "epoch": 1.824854737341397, "grad_norm": 1.103012663765399, "learning_rate": 2.9731209685248345e-05, "loss": 0.2055, "step": 15389 }, { "epoch": 1.8249733191035218, "grad_norm": 0.8298451632690624, "learning_rate": 2.9728852678639956e-05, "loss": 0.1671, "step": 15390 }, { "epoch": 1.825091900865647, "grad_norm": 0.49605046826054927, "learning_rate": 2.9726495628437352e-05, "loss": 0.0993, "step": 15391 }, { "epoch": 1.8252104826277717, "grad_norm": 0.7322244791007606, "learning_rate": 2.9724138534662243e-05, "loss": 0.1571, "step": 15392 }, { "epoch": 1.825329064389897, "grad_norm": 0.8681517052027562, "learning_rate": 2.9721781397336378e-05, "loss": 0.2187, "step": 15393 }, { "epoch": 1.8254476461520217, "grad_norm": 0.6724897213208547, "learning_rate": 2.9719424216481473e-05, "loss": 0.1381, "step": 15394 }, { "epoch": 1.825566227914147, "grad_norm": 0.8831776764121378, "learning_rate": 2.971706699211927e-05, "loss": 0.1749, "step": 15395 }, { "epoch": 1.8256848096762717, "grad_norm": 0.9895974137765967, "learning_rate": 2.971470972427149e-05, "loss": 0.1499, "step": 15396 }, { "epoch": 1.8258033914383969, "grad_norm": 0.869542004534008, "learning_rate": 2.971235241295987e-05, "loss": 0.1793, "step": 15397 }, { "epoch": 1.8259219732005216, "grad_norm": 0.5760851819033119, "learning_rate": 2.970999505820614e-05, "loss": 0.141, "step": 15398 }, { "epoch": 1.8260405549626468, "grad_norm": 0.8605444798430084, "learning_rate": 2.9707637660032028e-05, "loss": 0.1847, "step": 15399 }, { "epoch": 1.8261591367247716, "grad_norm": 0.9596186165912983, "learning_rate": 2.9705280218459274e-05, "loss": 0.192, "step": 15400 }, { "epoch": 1.8262777184868968, "grad_norm": 0.7495967277511423, "learning_rate": 2.9702922733509604e-05, "loss": 0.1691, "step": 15401 }, { "epoch": 1.8263963002490216, "grad_norm": 1.1038455169199468, "learning_rate": 2.970056520520475e-05, "loss": 0.1809, "step": 15402 }, { "epoch": 1.8265148820111468, "grad_norm": 0.9378227198402072, "learning_rate": 2.969820763356645e-05, "loss": 0.2196, "step": 15403 }, { "epoch": 1.8266334637732715, "grad_norm": 1.2861399332835486, "learning_rate": 2.9695850018616444e-05, "loss": 0.2389, "step": 15404 }, { "epoch": 1.8267520455353967, "grad_norm": 1.3444652415728569, "learning_rate": 2.969349236037645e-05, "loss": 0.2065, "step": 15405 }, { "epoch": 1.8268706272975215, "grad_norm": 1.0241699260694561, "learning_rate": 2.969113465886822e-05, "loss": 0.194, "step": 15406 }, { "epoch": 1.8269892090596467, "grad_norm": 0.8412949540116269, "learning_rate": 2.9688776914113475e-05, "loss": 0.2137, "step": 15407 }, { "epoch": 1.8271077908217717, "grad_norm": 1.0514082042408757, "learning_rate": 2.9686419126133963e-05, "loss": 0.2093, "step": 15408 }, { "epoch": 1.8272263725838966, "grad_norm": 0.7454526604520645, "learning_rate": 2.968406129495141e-05, "loss": 0.1343, "step": 15409 }, { "epoch": 1.8273449543460216, "grad_norm": 0.8808717866449423, "learning_rate": 2.9681703420587558e-05, "loss": 0.1497, "step": 15410 }, { "epoch": 1.8274635361081466, "grad_norm": 1.0360812975152616, "learning_rate": 2.967934550306414e-05, "loss": 0.2041, "step": 15411 }, { "epoch": 1.8275821178702716, "grad_norm": 1.009231966050227, "learning_rate": 2.967698754240289e-05, "loss": 0.2066, "step": 15412 }, { "epoch": 1.8277006996323966, "grad_norm": 0.7924510974596026, "learning_rate": 2.9674629538625552e-05, "loss": 0.1625, "step": 15413 }, { "epoch": 1.8278192813945215, "grad_norm": 1.1483330575940787, "learning_rate": 2.9672271491753862e-05, "loss": 0.2367, "step": 15414 }, { "epoch": 1.8279378631566465, "grad_norm": 0.9054021801137345, "learning_rate": 2.966991340180956e-05, "loss": 0.1423, "step": 15415 }, { "epoch": 1.8280564449187715, "grad_norm": 1.0487710652400293, "learning_rate": 2.9667555268814384e-05, "loss": 0.2099, "step": 15416 }, { "epoch": 1.8281750266808965, "grad_norm": 0.7264887400782071, "learning_rate": 2.9665197092790074e-05, "loss": 0.1622, "step": 15417 }, { "epoch": 1.8282936084430215, "grad_norm": 0.9587722773048629, "learning_rate": 2.9662838873758365e-05, "loss": 0.237, "step": 15418 }, { "epoch": 1.8284121902051464, "grad_norm": 0.8290794377845487, "learning_rate": 2.9660480611741005e-05, "loss": 0.1273, "step": 15419 }, { "epoch": 1.8285307719672714, "grad_norm": 0.9029289325573021, "learning_rate": 2.965812230675972e-05, "loss": 0.1825, "step": 15420 }, { "epoch": 1.8286493537293964, "grad_norm": 0.9126311854963453, "learning_rate": 2.9655763958836262e-05, "loss": 0.166, "step": 15421 }, { "epoch": 1.8287679354915214, "grad_norm": 0.7776262613214269, "learning_rate": 2.9653405567992366e-05, "loss": 0.1064, "step": 15422 }, { "epoch": 1.8288865172536464, "grad_norm": 0.8288257053584829, "learning_rate": 2.965104713424979e-05, "loss": 0.1915, "step": 15423 }, { "epoch": 1.8290050990157714, "grad_norm": 0.871096126916273, "learning_rate": 2.964868865763025e-05, "loss": 0.1743, "step": 15424 }, { "epoch": 1.8291236807778963, "grad_norm": 0.7337421050263578, "learning_rate": 2.964633013815551e-05, "loss": 0.1283, "step": 15425 }, { "epoch": 1.8292422625400213, "grad_norm": 0.729404296390423, "learning_rate": 2.9643971575847296e-05, "loss": 0.1614, "step": 15426 }, { "epoch": 1.8293608443021463, "grad_norm": 0.7908881932901992, "learning_rate": 2.9641612970727368e-05, "loss": 0.1573, "step": 15427 }, { "epoch": 1.8294794260642713, "grad_norm": 0.80973418750176, "learning_rate": 2.963925432281745e-05, "loss": 0.1555, "step": 15428 }, { "epoch": 1.8295980078263963, "grad_norm": 0.8351829055069806, "learning_rate": 2.96368956321393e-05, "loss": 0.1708, "step": 15429 }, { "epoch": 1.8297165895885212, "grad_norm": 0.9051833998081217, "learning_rate": 2.9634536898714667e-05, "loss": 0.1881, "step": 15430 }, { "epoch": 1.8298351713506462, "grad_norm": 1.3100036022578894, "learning_rate": 2.9632178122565275e-05, "loss": 0.2785, "step": 15431 }, { "epoch": 1.8299537531127714, "grad_norm": 0.6186873760369141, "learning_rate": 2.962981930371288e-05, "loss": 0.1428, "step": 15432 }, { "epoch": 1.8300723348748962, "grad_norm": 0.7272614353718987, "learning_rate": 2.9627460442179233e-05, "loss": 0.1452, "step": 15433 }, { "epoch": 1.8301909166370214, "grad_norm": 1.0258556749257077, "learning_rate": 2.9625101537986083e-05, "loss": 0.1805, "step": 15434 }, { "epoch": 1.8303094983991461, "grad_norm": 0.9667898621556686, "learning_rate": 2.962274259115516e-05, "loss": 0.1819, "step": 15435 }, { "epoch": 1.8304280801612713, "grad_norm": 0.7271642203731177, "learning_rate": 2.9620383601708223e-05, "loss": 0.1364, "step": 15436 }, { "epoch": 1.830546661923396, "grad_norm": 0.9296663800919625, "learning_rate": 2.9618024569667014e-05, "loss": 0.1839, "step": 15437 }, { "epoch": 1.8306652436855213, "grad_norm": 0.8926007251429378, "learning_rate": 2.961566549505328e-05, "loss": 0.186, "step": 15438 }, { "epoch": 1.830783825447646, "grad_norm": 0.6924331508317774, "learning_rate": 2.9613306377888773e-05, "loss": 0.1574, "step": 15439 }, { "epoch": 1.8309024072097713, "grad_norm": 0.9997789716572151, "learning_rate": 2.961094721819524e-05, "loss": 0.1878, "step": 15440 }, { "epoch": 1.831020988971896, "grad_norm": 1.0130366320506008, "learning_rate": 2.9608588015994422e-05, "loss": 0.2387, "step": 15441 }, { "epoch": 1.8311395707340212, "grad_norm": 0.832014787651683, "learning_rate": 2.960622877130807e-05, "loss": 0.1572, "step": 15442 }, { "epoch": 1.831258152496146, "grad_norm": 0.687106216687091, "learning_rate": 2.9603869484157942e-05, "loss": 0.1374, "step": 15443 }, { "epoch": 1.8313767342582712, "grad_norm": 0.5982241800490347, "learning_rate": 2.960151015456578e-05, "loss": 0.1268, "step": 15444 }, { "epoch": 1.831495316020396, "grad_norm": 0.8131698123129604, "learning_rate": 2.9599150782553346e-05, "loss": 0.1361, "step": 15445 }, { "epoch": 1.8316138977825211, "grad_norm": 0.9452819807511228, "learning_rate": 2.9596791368142372e-05, "loss": 0.2196, "step": 15446 }, { "epoch": 1.831732479544646, "grad_norm": 0.6721914521285541, "learning_rate": 2.959443191135463e-05, "loss": 0.1234, "step": 15447 }, { "epoch": 1.8318510613067711, "grad_norm": 0.8309236120153571, "learning_rate": 2.959207241221185e-05, "loss": 0.1637, "step": 15448 }, { "epoch": 1.8319696430688959, "grad_norm": 0.9556203360061042, "learning_rate": 2.9589712870735804e-05, "loss": 0.1369, "step": 15449 }, { "epoch": 1.832088224831021, "grad_norm": 0.6941384659877499, "learning_rate": 2.9587353286948222e-05, "loss": 0.1231, "step": 15450 }, { "epoch": 1.8322068065931458, "grad_norm": 0.6300492024424782, "learning_rate": 2.9584993660870873e-05, "loss": 0.1032, "step": 15451 }, { "epoch": 1.832325388355271, "grad_norm": 0.8123198997757365, "learning_rate": 2.9582633992525503e-05, "loss": 0.1382, "step": 15452 }, { "epoch": 1.8324439701173958, "grad_norm": 1.1008156604058594, "learning_rate": 2.9580274281933874e-05, "loss": 0.1857, "step": 15453 }, { "epoch": 1.832562551879521, "grad_norm": 0.706450983837067, "learning_rate": 2.9577914529117723e-05, "loss": 0.1143, "step": 15454 }, { "epoch": 1.8326811336416458, "grad_norm": 0.787339343938937, "learning_rate": 2.957555473409882e-05, "loss": 0.1338, "step": 15455 }, { "epoch": 1.832799715403771, "grad_norm": 0.689838339152342, "learning_rate": 2.9573194896898916e-05, "loss": 0.1503, "step": 15456 }, { "epoch": 1.832918297165896, "grad_norm": 0.8316656353624343, "learning_rate": 2.9570835017539756e-05, "loss": 0.1697, "step": 15457 }, { "epoch": 1.833036878928021, "grad_norm": 0.8896455023816116, "learning_rate": 2.9568475096043102e-05, "loss": 0.1559, "step": 15458 }, { "epoch": 1.833155460690146, "grad_norm": 0.6900176125576237, "learning_rate": 2.9566115132430717e-05, "loss": 0.1584, "step": 15459 }, { "epoch": 1.8332740424522709, "grad_norm": 0.9899384929721701, "learning_rate": 2.9563755126724345e-05, "loss": 0.2518, "step": 15460 }, { "epoch": 1.8333926242143959, "grad_norm": 1.000555084827324, "learning_rate": 2.956139507894575e-05, "loss": 0.2252, "step": 15461 }, { "epoch": 1.8335112059765208, "grad_norm": 0.6994404393411617, "learning_rate": 2.955903498911668e-05, "loss": 0.16, "step": 15462 }, { "epoch": 1.8336297877386458, "grad_norm": 1.0091171766989921, "learning_rate": 2.9556674857258898e-05, "loss": 0.2171, "step": 15463 }, { "epoch": 1.8337483695007708, "grad_norm": 0.8129425636466915, "learning_rate": 2.955431468339417e-05, "loss": 0.1497, "step": 15464 }, { "epoch": 1.8338669512628958, "grad_norm": 0.8011607802714994, "learning_rate": 2.9551954467544242e-05, "loss": 0.1302, "step": 15465 }, { "epoch": 1.8339855330250208, "grad_norm": 1.4572126385665942, "learning_rate": 2.9549594209730873e-05, "loss": 0.3423, "step": 15466 }, { "epoch": 1.8341041147871457, "grad_norm": 0.7780451977717283, "learning_rate": 2.9547233909975824e-05, "loss": 0.1729, "step": 15467 }, { "epoch": 1.8342226965492707, "grad_norm": 0.8336083946032258, "learning_rate": 2.9544873568300864e-05, "loss": 0.1739, "step": 15468 }, { "epoch": 1.8343412783113957, "grad_norm": 0.7265278259147545, "learning_rate": 2.954251318472773e-05, "loss": 0.1716, "step": 15469 }, { "epoch": 1.8344598600735207, "grad_norm": 0.8752985539861614, "learning_rate": 2.95401527592782e-05, "loss": 0.174, "step": 15470 }, { "epoch": 1.8345784418356457, "grad_norm": 0.878311609204327, "learning_rate": 2.953779229197403e-05, "loss": 0.151, "step": 15471 }, { "epoch": 1.8346970235977706, "grad_norm": 0.785063578793951, "learning_rate": 2.9535431782836974e-05, "loss": 0.1409, "step": 15472 }, { "epoch": 1.8348156053598956, "grad_norm": 0.6331385197087557, "learning_rate": 2.953307123188881e-05, "loss": 0.12, "step": 15473 }, { "epoch": 1.8349341871220206, "grad_norm": 0.8597036336228239, "learning_rate": 2.9530710639151278e-05, "loss": 0.1727, "step": 15474 }, { "epoch": 1.8350527688841456, "grad_norm": 1.073054088069077, "learning_rate": 2.952835000464616e-05, "loss": 0.1857, "step": 15475 }, { "epoch": 1.8351713506462706, "grad_norm": 0.7755037909941674, "learning_rate": 2.9525989328395197e-05, "loss": 0.1701, "step": 15476 }, { "epoch": 1.8352899324083956, "grad_norm": 0.6702205079578166, "learning_rate": 2.952362861042017e-05, "loss": 0.186, "step": 15477 }, { "epoch": 1.8354085141705205, "grad_norm": 0.9094125965903891, "learning_rate": 2.9521267850742834e-05, "loss": 0.1797, "step": 15478 }, { "epoch": 1.8355270959326455, "grad_norm": 0.7106060224626918, "learning_rate": 2.9518907049384954e-05, "loss": 0.1545, "step": 15479 }, { "epoch": 1.8356456776947705, "grad_norm": 0.8622245866271262, "learning_rate": 2.9516546206368285e-05, "loss": 0.1572, "step": 15480 }, { "epoch": 1.8357642594568955, "grad_norm": 0.6596835544119195, "learning_rate": 2.9514185321714597e-05, "loss": 0.1706, "step": 15481 }, { "epoch": 1.8358828412190205, "grad_norm": 0.7560687931572966, "learning_rate": 2.9511824395445663e-05, "loss": 0.141, "step": 15482 }, { "epoch": 1.8360014229811457, "grad_norm": 0.8890556943735853, "learning_rate": 2.9509463427583246e-05, "loss": 0.1313, "step": 15483 }, { "epoch": 1.8361200047432704, "grad_norm": 1.119155717449911, "learning_rate": 2.9507102418149096e-05, "loss": 0.2614, "step": 15484 }, { "epoch": 1.8362385865053956, "grad_norm": 0.6456642786562725, "learning_rate": 2.950474136716499e-05, "loss": 0.16, "step": 15485 }, { "epoch": 1.8363571682675204, "grad_norm": 0.8557236347113389, "learning_rate": 2.9502380274652696e-05, "loss": 0.1846, "step": 15486 }, { "epoch": 1.8364757500296456, "grad_norm": 0.8007346154416894, "learning_rate": 2.9500019140633972e-05, "loss": 0.1382, "step": 15487 }, { "epoch": 1.8365943317917703, "grad_norm": 0.7492700198155701, "learning_rate": 2.9497657965130597e-05, "loss": 0.124, "step": 15488 }, { "epoch": 1.8367129135538955, "grad_norm": 0.8095612631701082, "learning_rate": 2.9495296748164332e-05, "loss": 0.1623, "step": 15489 }, { "epoch": 1.8368314953160203, "grad_norm": 0.9093624521336063, "learning_rate": 2.949293548975694e-05, "loss": 0.1888, "step": 15490 }, { "epoch": 1.8369500770781455, "grad_norm": 1.1065149132694878, "learning_rate": 2.949057418993019e-05, "loss": 0.2164, "step": 15491 }, { "epoch": 1.8370686588402703, "grad_norm": 0.8759685985459098, "learning_rate": 2.9488212848705854e-05, "loss": 0.18, "step": 15492 }, { "epoch": 1.8371872406023955, "grad_norm": 1.1971151638825674, "learning_rate": 2.9485851466105698e-05, "loss": 0.2482, "step": 15493 }, { "epoch": 1.8373058223645202, "grad_norm": 0.8391011156757014, "learning_rate": 2.9483490042151496e-05, "loss": 0.1037, "step": 15494 }, { "epoch": 1.8374244041266454, "grad_norm": 2.0083428293393215, "learning_rate": 2.9481128576865007e-05, "loss": 0.3277, "step": 15495 }, { "epoch": 1.8375429858887702, "grad_norm": 0.9209035404187014, "learning_rate": 2.9478767070268016e-05, "loss": 0.1841, "step": 15496 }, { "epoch": 1.8376615676508954, "grad_norm": 0.9996791848066113, "learning_rate": 2.947640552238228e-05, "loss": 0.2278, "step": 15497 }, { "epoch": 1.8377801494130201, "grad_norm": 0.6732401862127024, "learning_rate": 2.947404393322958e-05, "loss": 0.137, "step": 15498 }, { "epoch": 1.8378987311751454, "grad_norm": 1.1084664256288934, "learning_rate": 2.9471682302831672e-05, "loss": 0.2691, "step": 15499 }, { "epoch": 1.83801731293727, "grad_norm": 0.6482816576645161, "learning_rate": 2.946932063121034e-05, "loss": 0.1328, "step": 15500 }, { "epoch": 1.8381358946993953, "grad_norm": 1.1227081780109371, "learning_rate": 2.9466958918387354e-05, "loss": 0.3145, "step": 15501 }, { "epoch": 1.83825447646152, "grad_norm": 0.8758403831599904, "learning_rate": 2.9464597164384485e-05, "loss": 0.1942, "step": 15502 }, { "epoch": 1.8383730582236453, "grad_norm": 1.0425075898135845, "learning_rate": 2.9462235369223508e-05, "loss": 0.1917, "step": 15503 }, { "epoch": 1.83849163998577, "grad_norm": 0.630705654923278, "learning_rate": 2.9459873532926186e-05, "loss": 0.1221, "step": 15504 }, { "epoch": 1.8386102217478952, "grad_norm": 0.9083342730679429, "learning_rate": 2.9457511655514302e-05, "loss": 0.1499, "step": 15505 }, { "epoch": 1.83872880351002, "grad_norm": 0.664302784165351, "learning_rate": 2.9455149737009628e-05, "loss": 0.1401, "step": 15506 }, { "epoch": 1.8388473852721452, "grad_norm": 0.7678042926863818, "learning_rate": 2.9452787777433942e-05, "loss": 0.1809, "step": 15507 }, { "epoch": 1.8389659670342702, "grad_norm": 1.3778987524572128, "learning_rate": 2.9450425776809003e-05, "loss": 0.1862, "step": 15508 }, { "epoch": 1.8390845487963952, "grad_norm": 0.6208772653689606, "learning_rate": 2.9448063735156604e-05, "loss": 0.1447, "step": 15509 }, { "epoch": 1.8392031305585201, "grad_norm": 0.7850130714689275, "learning_rate": 2.9445701652498503e-05, "loss": 0.1949, "step": 15510 }, { "epoch": 1.8393217123206451, "grad_norm": 0.8433228397821364, "learning_rate": 2.944333952885649e-05, "loss": 0.1658, "step": 15511 }, { "epoch": 1.83944029408277, "grad_norm": 0.6878163139496245, "learning_rate": 2.944097736425233e-05, "loss": 0.1354, "step": 15512 }, { "epoch": 1.839558875844895, "grad_norm": 0.704040443752837, "learning_rate": 2.9438615158707812e-05, "loss": 0.1534, "step": 15513 }, { "epoch": 1.83967745760702, "grad_norm": 0.6676015212422032, "learning_rate": 2.9436252912244706e-05, "loss": 0.1264, "step": 15514 }, { "epoch": 1.839796039369145, "grad_norm": 0.883115234643828, "learning_rate": 2.943389062488478e-05, "loss": 0.1725, "step": 15515 }, { "epoch": 1.83991462113127, "grad_norm": 1.0172208142199568, "learning_rate": 2.9431528296649828e-05, "loss": 0.193, "step": 15516 }, { "epoch": 1.840033202893395, "grad_norm": 0.8328197516015264, "learning_rate": 2.9429165927561613e-05, "loss": 0.2467, "step": 15517 }, { "epoch": 1.84015178465552, "grad_norm": 0.8117451887952097, "learning_rate": 2.9426803517641928e-05, "loss": 0.142, "step": 15518 }, { "epoch": 1.840270366417645, "grad_norm": 0.8397542825645542, "learning_rate": 2.9424441066912534e-05, "loss": 0.2458, "step": 15519 }, { "epoch": 1.84038894817977, "grad_norm": 1.0078051282401075, "learning_rate": 2.9422078575395225e-05, "loss": 0.203, "step": 15520 }, { "epoch": 1.840507529941895, "grad_norm": 0.5420193010594105, "learning_rate": 2.9419716043111767e-05, "loss": 0.1014, "step": 15521 }, { "epoch": 1.84062611170402, "grad_norm": 1.0907618659174765, "learning_rate": 2.9417353470083952e-05, "loss": 0.222, "step": 15522 }, { "epoch": 1.8407446934661449, "grad_norm": 1.0563309552253528, "learning_rate": 2.9414990856333553e-05, "loss": 0.2206, "step": 15523 }, { "epoch": 1.8408632752282699, "grad_norm": 0.8458022975930581, "learning_rate": 2.9412628201882358e-05, "loss": 0.2163, "step": 15524 }, { "epoch": 1.8409818569903948, "grad_norm": 0.6600922478012725, "learning_rate": 2.9410265506752134e-05, "loss": 0.1147, "step": 15525 }, { "epoch": 1.8411004387525198, "grad_norm": 1.0199171113854102, "learning_rate": 2.9407902770964678e-05, "loss": 0.1825, "step": 15526 }, { "epoch": 1.8412190205146448, "grad_norm": 1.103219083830544, "learning_rate": 2.9405539994541758e-05, "loss": 0.2176, "step": 15527 }, { "epoch": 1.8413376022767698, "grad_norm": 1.2827932091432572, "learning_rate": 2.940317717750516e-05, "loss": 0.2402, "step": 15528 }, { "epoch": 1.8414561840388948, "grad_norm": 0.8721853492738934, "learning_rate": 2.9400814319876673e-05, "loss": 0.165, "step": 15529 }, { "epoch": 1.8415747658010198, "grad_norm": 1.122134696750533, "learning_rate": 2.9398451421678076e-05, "loss": 0.297, "step": 15530 }, { "epoch": 1.8416933475631447, "grad_norm": 0.9829570932765097, "learning_rate": 2.9396088482931144e-05, "loss": 0.2062, "step": 15531 }, { "epoch": 1.84181192932527, "grad_norm": 0.8042683453300616, "learning_rate": 2.9393725503657664e-05, "loss": 0.1666, "step": 15532 }, { "epoch": 1.8419305110873947, "grad_norm": 0.9202558249894406, "learning_rate": 2.9391362483879437e-05, "loss": 0.1548, "step": 15533 }, { "epoch": 1.84204909284952, "grad_norm": 0.8454260548450594, "learning_rate": 2.9388999423618223e-05, "loss": 0.2052, "step": 15534 }, { "epoch": 1.8421676746116447, "grad_norm": 0.8925478968597303, "learning_rate": 2.9386636322895823e-05, "loss": 0.2294, "step": 15535 }, { "epoch": 1.8422862563737699, "grad_norm": 0.9002653005814084, "learning_rate": 2.938427318173401e-05, "loss": 0.1629, "step": 15536 }, { "epoch": 1.8424048381358946, "grad_norm": 0.6915961148266494, "learning_rate": 2.9381910000154578e-05, "loss": 0.144, "step": 15537 }, { "epoch": 1.8425234198980198, "grad_norm": 0.6818877006349963, "learning_rate": 2.9379546778179307e-05, "loss": 0.1228, "step": 15538 }, { "epoch": 1.8426420016601446, "grad_norm": 1.473406741035067, "learning_rate": 2.9377183515829987e-05, "loss": 0.3391, "step": 15539 }, { "epoch": 1.8427605834222698, "grad_norm": 0.6705623357485878, "learning_rate": 2.93748202131284e-05, "loss": 0.1404, "step": 15540 }, { "epoch": 1.8428791651843945, "grad_norm": 0.9203295163860467, "learning_rate": 2.9372456870096333e-05, "loss": 0.1858, "step": 15541 }, { "epoch": 1.8429977469465197, "grad_norm": 0.9613777185043043, "learning_rate": 2.937009348675558e-05, "loss": 0.2122, "step": 15542 }, { "epoch": 1.8431163287086445, "grad_norm": 0.6184935914394286, "learning_rate": 2.9367730063127918e-05, "loss": 0.1102, "step": 15543 }, { "epoch": 1.8432349104707697, "grad_norm": 0.7539112068200723, "learning_rate": 2.936536659923515e-05, "loss": 0.1457, "step": 15544 }, { "epoch": 1.8433534922328945, "grad_norm": 1.183561803533386, "learning_rate": 2.936300309509905e-05, "loss": 0.2121, "step": 15545 }, { "epoch": 1.8434720739950197, "grad_norm": 0.9456086609690579, "learning_rate": 2.9360639550741413e-05, "loss": 0.1912, "step": 15546 }, { "epoch": 1.8435906557571444, "grad_norm": 0.7899376814372949, "learning_rate": 2.935827596618403e-05, "loss": 0.1598, "step": 15547 }, { "epoch": 1.8437092375192696, "grad_norm": 1.0370541990541948, "learning_rate": 2.9355912341448682e-05, "loss": 0.2376, "step": 15548 }, { "epoch": 1.8438278192813944, "grad_norm": 0.8290271023304456, "learning_rate": 2.9353548676557162e-05, "loss": 0.1998, "step": 15549 }, { "epoch": 1.8439464010435196, "grad_norm": 1.4183854732448595, "learning_rate": 2.9351184971531263e-05, "loss": 0.3062, "step": 15550 }, { "epoch": 1.8440649828056443, "grad_norm": 1.0709302491114647, "learning_rate": 2.9348821226392774e-05, "loss": 0.1995, "step": 15551 }, { "epoch": 1.8441835645677696, "grad_norm": 0.836206134439578, "learning_rate": 2.9346457441163494e-05, "loss": 0.2091, "step": 15552 }, { "epoch": 1.8443021463298943, "grad_norm": 0.8987637128691102, "learning_rate": 2.9344093615865198e-05, "loss": 0.2017, "step": 15553 }, { "epoch": 1.8444207280920195, "grad_norm": 0.7705290022867171, "learning_rate": 2.93417297505197e-05, "loss": 0.1475, "step": 15554 }, { "epoch": 1.8445393098541443, "grad_norm": 0.8308075049951436, "learning_rate": 2.933936584514876e-05, "loss": 0.1606, "step": 15555 }, { "epoch": 1.8446578916162695, "grad_norm": 0.7248935586388102, "learning_rate": 2.9337001899774192e-05, "loss": 0.1857, "step": 15556 }, { "epoch": 1.8447764733783945, "grad_norm": 0.8031174152080248, "learning_rate": 2.9334637914417796e-05, "loss": 0.1593, "step": 15557 }, { "epoch": 1.8448950551405194, "grad_norm": 0.5641743331417466, "learning_rate": 2.9332273889101343e-05, "loss": 0.1277, "step": 15558 }, { "epoch": 1.8450136369026444, "grad_norm": 0.5708075724398769, "learning_rate": 2.932990982384664e-05, "loss": 0.1466, "step": 15559 }, { "epoch": 1.8451322186647694, "grad_norm": 0.9241076569195626, "learning_rate": 2.9327545718675475e-05, "loss": 0.1991, "step": 15560 }, { "epoch": 1.8452508004268944, "grad_norm": 1.0026829470406493, "learning_rate": 2.932518157360965e-05, "loss": 0.1558, "step": 15561 }, { "epoch": 1.8453693821890194, "grad_norm": 0.8586297986464995, "learning_rate": 2.9322817388670947e-05, "loss": 0.1321, "step": 15562 }, { "epoch": 1.8454879639511443, "grad_norm": 0.8255254271120334, "learning_rate": 2.932045316388118e-05, "loss": 0.1903, "step": 15563 }, { "epoch": 1.8456065457132693, "grad_norm": 1.2823175562279303, "learning_rate": 2.9318088899262124e-05, "loss": 0.2436, "step": 15564 }, { "epoch": 1.8457251274753943, "grad_norm": 0.874445783958458, "learning_rate": 2.9315724594835593e-05, "loss": 0.1928, "step": 15565 }, { "epoch": 1.8458437092375193, "grad_norm": 0.8545078057303036, "learning_rate": 2.9313360250623368e-05, "loss": 0.2112, "step": 15566 }, { "epoch": 1.8459622909996443, "grad_norm": 1.0224309740962911, "learning_rate": 2.931099586664725e-05, "loss": 0.1961, "step": 15567 }, { "epoch": 1.8460808727617692, "grad_norm": 1.0297019893046673, "learning_rate": 2.9308631442929036e-05, "loss": 0.2588, "step": 15568 }, { "epoch": 1.8461994545238942, "grad_norm": 1.7061857389486814, "learning_rate": 2.930626697949053e-05, "loss": 0.2817, "step": 15569 }, { "epoch": 1.8463180362860192, "grad_norm": 0.8999758072998301, "learning_rate": 2.9303902476353516e-05, "loss": 0.1853, "step": 15570 }, { "epoch": 1.8464366180481442, "grad_norm": 0.781073619641064, "learning_rate": 2.93015379335398e-05, "loss": 0.1344, "step": 15571 }, { "epoch": 1.8465551998102692, "grad_norm": 0.7498439022285138, "learning_rate": 2.929917335107118e-05, "loss": 0.1365, "step": 15572 }, { "epoch": 1.8466737815723941, "grad_norm": 0.9505779648776173, "learning_rate": 2.929680872896945e-05, "loss": 0.2285, "step": 15573 }, { "epoch": 1.8467923633345191, "grad_norm": 0.8382397437900913, "learning_rate": 2.929444406725642e-05, "loss": 0.1491, "step": 15574 }, { "epoch": 1.846910945096644, "grad_norm": 0.8415434501040656, "learning_rate": 2.929207936595388e-05, "loss": 0.1704, "step": 15575 }, { "epoch": 1.847029526858769, "grad_norm": 0.9083594701187442, "learning_rate": 2.928971462508363e-05, "loss": 0.1749, "step": 15576 }, { "epoch": 1.847148108620894, "grad_norm": 1.1491848036634968, "learning_rate": 2.9287349844667473e-05, "loss": 0.2666, "step": 15577 }, { "epoch": 1.847266690383019, "grad_norm": 0.6493716535398631, "learning_rate": 2.9284985024727206e-05, "loss": 0.1155, "step": 15578 }, { "epoch": 1.847385272145144, "grad_norm": 0.9759288863422841, "learning_rate": 2.928262016528463e-05, "loss": 0.2059, "step": 15579 }, { "epoch": 1.847503853907269, "grad_norm": 0.7124178859063552, "learning_rate": 2.928025526636155e-05, "loss": 0.1488, "step": 15580 }, { "epoch": 1.847622435669394, "grad_norm": 1.940151202141396, "learning_rate": 2.9277890327979762e-05, "loss": 0.4144, "step": 15581 }, { "epoch": 1.847741017431519, "grad_norm": 1.4792745672100487, "learning_rate": 2.9275525350161082e-05, "loss": 0.2656, "step": 15582 }, { "epoch": 1.8478595991936442, "grad_norm": 0.7330227774173688, "learning_rate": 2.9273160332927286e-05, "loss": 0.1487, "step": 15583 }, { "epoch": 1.847978180955769, "grad_norm": 1.333876655140558, "learning_rate": 2.9270795276300207e-05, "loss": 0.2877, "step": 15584 }, { "epoch": 1.8480967627178941, "grad_norm": 0.6386843792573899, "learning_rate": 2.926843018030162e-05, "loss": 0.1453, "step": 15585 }, { "epoch": 1.848215344480019, "grad_norm": 0.5259683041838589, "learning_rate": 2.9266065044953345e-05, "loss": 0.1009, "step": 15586 }, { "epoch": 1.848333926242144, "grad_norm": 0.9004323502534072, "learning_rate": 2.9263699870277184e-05, "loss": 0.224, "step": 15587 }, { "epoch": 1.8484525080042689, "grad_norm": 1.4306150656678245, "learning_rate": 2.9261334656294938e-05, "loss": 0.2986, "step": 15588 }, { "epoch": 1.848571089766394, "grad_norm": 0.7368854490568835, "learning_rate": 2.9258969403028414e-05, "loss": 0.185, "step": 15589 }, { "epoch": 1.8486896715285188, "grad_norm": 1.025749766666629, "learning_rate": 2.9256604110499413e-05, "loss": 0.19, "step": 15590 }, { "epoch": 1.848808253290644, "grad_norm": 0.7292812815899912, "learning_rate": 2.9254238778729738e-05, "loss": 0.1619, "step": 15591 }, { "epoch": 1.8489268350527688, "grad_norm": 0.7071279221656349, "learning_rate": 2.92518734077412e-05, "loss": 0.1417, "step": 15592 }, { "epoch": 1.849045416814894, "grad_norm": 0.7197525768949439, "learning_rate": 2.924950799755561e-05, "loss": 0.1615, "step": 15593 }, { "epoch": 1.8491639985770187, "grad_norm": 0.8378307988576199, "learning_rate": 2.9247142548194755e-05, "loss": 0.1844, "step": 15594 }, { "epoch": 1.849282580339144, "grad_norm": 1.3233558238150476, "learning_rate": 2.9244777059680466e-05, "loss": 0.2613, "step": 15595 }, { "epoch": 1.8494011621012687, "grad_norm": 0.9012908047189936, "learning_rate": 2.924241153203453e-05, "loss": 0.2078, "step": 15596 }, { "epoch": 1.849519743863394, "grad_norm": 0.5147452961093335, "learning_rate": 2.924004596527877e-05, "loss": 0.129, "step": 15597 }, { "epoch": 1.8496383256255187, "grad_norm": 0.8858035348823747, "learning_rate": 2.9237680359434984e-05, "loss": 0.2661, "step": 15598 }, { "epoch": 1.8497569073876439, "grad_norm": 0.6828880680424807, "learning_rate": 2.9235314714524976e-05, "loss": 0.166, "step": 15599 }, { "epoch": 1.8498754891497686, "grad_norm": 1.1935208257339225, "learning_rate": 2.9232949030570554e-05, "loss": 0.2568, "step": 15600 }, { "epoch": 1.8499940709118938, "grad_norm": 0.8898511995929292, "learning_rate": 2.923058330759354e-05, "loss": 0.1856, "step": 15601 }, { "epoch": 1.8501126526740186, "grad_norm": 0.8124758221647069, "learning_rate": 2.9228217545615743e-05, "loss": 0.1444, "step": 15602 }, { "epoch": 1.8502312344361438, "grad_norm": 0.8316478480568147, "learning_rate": 2.9225851744658956e-05, "loss": 0.2147, "step": 15603 }, { "epoch": 1.8503498161982685, "grad_norm": 0.8019987553462476, "learning_rate": 2.9223485904745006e-05, "loss": 0.1753, "step": 15604 }, { "epoch": 1.8504683979603938, "grad_norm": 0.7157762226264879, "learning_rate": 2.9221120025895686e-05, "loss": 0.1503, "step": 15605 }, { "epoch": 1.8505869797225185, "grad_norm": 0.8714854020859208, "learning_rate": 2.9218754108132822e-05, "loss": 0.1978, "step": 15606 }, { "epoch": 1.8507055614846437, "grad_norm": 1.103012406649438, "learning_rate": 2.9216388151478213e-05, "loss": 0.2639, "step": 15607 }, { "epoch": 1.8508241432467687, "grad_norm": 0.8915620592165444, "learning_rate": 2.9214022155953686e-05, "loss": 0.191, "step": 15608 }, { "epoch": 1.8509427250088937, "grad_norm": 1.5023907277175206, "learning_rate": 2.9211656121581032e-05, "loss": 0.265, "step": 15609 }, { "epoch": 1.8510613067710187, "grad_norm": 0.7089361084194097, "learning_rate": 2.9209290048382076e-05, "loss": 0.1453, "step": 15610 }, { "epoch": 1.8511798885331436, "grad_norm": 0.870043087568511, "learning_rate": 2.9206923936378626e-05, "loss": 0.1755, "step": 15611 }, { "epoch": 1.8512984702952686, "grad_norm": 0.5965664842488005, "learning_rate": 2.92045577855925e-05, "loss": 0.1208, "step": 15612 }, { "epoch": 1.8514170520573936, "grad_norm": 0.9279671591344625, "learning_rate": 2.9202191596045503e-05, "loss": 0.18, "step": 15613 }, { "epoch": 1.8515356338195186, "grad_norm": 1.3482982801317842, "learning_rate": 2.9199825367759452e-05, "loss": 0.3088, "step": 15614 }, { "epoch": 1.8516542155816436, "grad_norm": 1.0507404848474609, "learning_rate": 2.919745910075617e-05, "loss": 0.2782, "step": 15615 }, { "epoch": 1.8517727973437685, "grad_norm": 0.9869963160485663, "learning_rate": 2.9195092795057456e-05, "loss": 0.2413, "step": 15616 }, { "epoch": 1.8518913791058935, "grad_norm": 1.016546673477003, "learning_rate": 2.9192726450685137e-05, "loss": 0.2429, "step": 15617 }, { "epoch": 1.8520099608680185, "grad_norm": 1.4008981084399015, "learning_rate": 2.919036006766101e-05, "loss": 0.3219, "step": 15618 }, { "epoch": 1.8521285426301435, "grad_norm": 0.6283334707237043, "learning_rate": 2.9187993646006913e-05, "loss": 0.1806, "step": 15619 }, { "epoch": 1.8522471243922685, "grad_norm": 0.9422028146771613, "learning_rate": 2.918562718574464e-05, "loss": 0.1768, "step": 15620 }, { "epoch": 1.8523657061543934, "grad_norm": 1.07751274275259, "learning_rate": 2.9183260686896025e-05, "loss": 0.2334, "step": 15621 }, { "epoch": 1.8524842879165184, "grad_norm": 0.7046371530438592, "learning_rate": 2.9180894149482867e-05, "loss": 0.1419, "step": 15622 }, { "epoch": 1.8526028696786434, "grad_norm": 0.7243516592360328, "learning_rate": 2.9178527573527004e-05, "loss": 0.1689, "step": 15623 }, { "epoch": 1.8527214514407684, "grad_norm": 0.871906904811501, "learning_rate": 2.9176160959050236e-05, "loss": 0.2386, "step": 15624 }, { "epoch": 1.8528400332028934, "grad_norm": 1.3728896169990192, "learning_rate": 2.9173794306074385e-05, "loss": 0.3783, "step": 15625 }, { "epoch": 1.8529586149650183, "grad_norm": 0.7782717284178692, "learning_rate": 2.9171427614621266e-05, "loss": 0.1511, "step": 15626 }, { "epoch": 1.8530771967271433, "grad_norm": 0.9904610181942246, "learning_rate": 2.9169060884712705e-05, "loss": 0.2055, "step": 15627 }, { "epoch": 1.8531957784892683, "grad_norm": 0.6846036994155176, "learning_rate": 2.916669411637051e-05, "loss": 0.1487, "step": 15628 }, { "epoch": 1.8533143602513933, "grad_norm": 0.8130760677623585, "learning_rate": 2.91643273096165e-05, "loss": 0.1452, "step": 15629 }, { "epoch": 1.8534329420135183, "grad_norm": 0.766813565558471, "learning_rate": 2.916196046447251e-05, "loss": 0.1518, "step": 15630 }, { "epoch": 1.8535515237756433, "grad_norm": 0.7273536122778759, "learning_rate": 2.9159593580960337e-05, "loss": 0.1637, "step": 15631 }, { "epoch": 1.8536701055377685, "grad_norm": 0.6366873424029477, "learning_rate": 2.9157226659101822e-05, "loss": 0.1361, "step": 15632 }, { "epoch": 1.8537886872998932, "grad_norm": 0.8260746777707846, "learning_rate": 2.9154859698918773e-05, "loss": 0.202, "step": 15633 }, { "epoch": 1.8539072690620184, "grad_norm": 0.8328156127031066, "learning_rate": 2.9152492700433015e-05, "loss": 0.177, "step": 15634 }, { "epoch": 1.8540258508241432, "grad_norm": 0.8609523421584799, "learning_rate": 2.9150125663666363e-05, "loss": 0.1577, "step": 15635 }, { "epoch": 1.8541444325862684, "grad_norm": 0.789895179635797, "learning_rate": 2.9147758588640638e-05, "loss": 0.1648, "step": 15636 }, { "epoch": 1.8542630143483931, "grad_norm": 0.9408002739541972, "learning_rate": 2.914539147537767e-05, "loss": 0.1616, "step": 15637 }, { "epoch": 1.8543815961105183, "grad_norm": 0.7373062096743352, "learning_rate": 2.914302432389928e-05, "loss": 0.1554, "step": 15638 }, { "epoch": 1.854500177872643, "grad_norm": 0.982429227946715, "learning_rate": 2.9140657134227278e-05, "loss": 0.1523, "step": 15639 }, { "epoch": 1.8546187596347683, "grad_norm": 0.8881578475008414, "learning_rate": 2.9138289906383497e-05, "loss": 0.2302, "step": 15640 }, { "epoch": 1.854737341396893, "grad_norm": 0.8005171672667237, "learning_rate": 2.9135922640389756e-05, "loss": 0.1867, "step": 15641 }, { "epoch": 1.8548559231590183, "grad_norm": 0.6910683775076653, "learning_rate": 2.913355533626788e-05, "loss": 0.162, "step": 15642 }, { "epoch": 1.854974504921143, "grad_norm": 0.8798792327105698, "learning_rate": 2.91311879940397e-05, "loss": 0.1914, "step": 15643 }, { "epoch": 1.8550930866832682, "grad_norm": 1.1819467065681852, "learning_rate": 2.9128820613727025e-05, "loss": 0.2476, "step": 15644 }, { "epoch": 1.855211668445393, "grad_norm": 0.8273042250409575, "learning_rate": 2.9126453195351694e-05, "loss": 0.1672, "step": 15645 }, { "epoch": 1.8553302502075182, "grad_norm": 1.0607297314171167, "learning_rate": 2.912408573893552e-05, "loss": 0.2329, "step": 15646 }, { "epoch": 1.855448831969643, "grad_norm": 1.7986048301015167, "learning_rate": 2.9121718244500335e-05, "loss": 0.3663, "step": 15647 }, { "epoch": 1.8555674137317681, "grad_norm": 0.8043641982440064, "learning_rate": 2.9119350712067955e-05, "loss": 0.1533, "step": 15648 }, { "epoch": 1.855685995493893, "grad_norm": 0.9367515341028413, "learning_rate": 2.9116983141660226e-05, "loss": 0.1623, "step": 15649 }, { "epoch": 1.855804577256018, "grad_norm": 0.644915371918487, "learning_rate": 2.911461553329895e-05, "loss": 0.1624, "step": 15650 }, { "epoch": 1.8559231590181429, "grad_norm": 0.8575947820497104, "learning_rate": 2.9112247887005968e-05, "loss": 0.1819, "step": 15651 }, { "epoch": 1.856041740780268, "grad_norm": 0.9230339500987473, "learning_rate": 2.9109880202803095e-05, "loss": 0.1956, "step": 15652 }, { "epoch": 1.8561603225423928, "grad_norm": 0.8614010196402185, "learning_rate": 2.9107512480712185e-05, "loss": 0.2006, "step": 15653 }, { "epoch": 1.856278904304518, "grad_norm": 0.9003924831140376, "learning_rate": 2.910514472075503e-05, "loss": 0.1394, "step": 15654 }, { "epoch": 1.8563974860666428, "grad_norm": 1.1020308361792919, "learning_rate": 2.910277692295349e-05, "loss": 0.1606, "step": 15655 }, { "epoch": 1.856516067828768, "grad_norm": 0.8904009821810605, "learning_rate": 2.9100409087329363e-05, "loss": 0.1629, "step": 15656 }, { "epoch": 1.856634649590893, "grad_norm": 0.820372429711094, "learning_rate": 2.9098041213904497e-05, "loss": 0.171, "step": 15657 }, { "epoch": 1.856753231353018, "grad_norm": 0.9399088111972209, "learning_rate": 2.909567330270072e-05, "loss": 0.168, "step": 15658 }, { "epoch": 1.856871813115143, "grad_norm": 0.9272690330266089, "learning_rate": 2.9093305353739853e-05, "loss": 0.2163, "step": 15659 }, { "epoch": 1.856990394877268, "grad_norm": 0.9729458100674276, "learning_rate": 2.9090937367043734e-05, "loss": 0.1511, "step": 15660 }, { "epoch": 1.857108976639393, "grad_norm": 0.9285184514541763, "learning_rate": 2.9088569342634185e-05, "loss": 0.226, "step": 15661 }, { "epoch": 1.8572275584015179, "grad_norm": 0.6976503687526869, "learning_rate": 2.908620128053305e-05, "loss": 0.1508, "step": 15662 }, { "epoch": 1.8573461401636429, "grad_norm": 1.0018706443896965, "learning_rate": 2.9083833180762138e-05, "loss": 0.1921, "step": 15663 }, { "epoch": 1.8574647219257678, "grad_norm": 0.9253091323930899, "learning_rate": 2.9081465043343303e-05, "loss": 0.2026, "step": 15664 }, { "epoch": 1.8575833036878928, "grad_norm": 0.8241201571412, "learning_rate": 2.9079096868298356e-05, "loss": 0.1614, "step": 15665 }, { "epoch": 1.8577018854500178, "grad_norm": 1.001766706979655, "learning_rate": 2.9076728655649145e-05, "loss": 0.2049, "step": 15666 }, { "epoch": 1.8578204672121428, "grad_norm": 0.7216001944845267, "learning_rate": 2.9074360405417488e-05, "loss": 0.1498, "step": 15667 }, { "epoch": 1.8579390489742678, "grad_norm": 1.1248656522948117, "learning_rate": 2.9071992117625236e-05, "loss": 0.2133, "step": 15668 }, { "epoch": 1.8580576307363927, "grad_norm": 0.8359184327256771, "learning_rate": 2.90696237922942e-05, "loss": 0.1611, "step": 15669 }, { "epoch": 1.8581762124985177, "grad_norm": 0.6594707286038638, "learning_rate": 2.906725542944622e-05, "loss": 0.1196, "step": 15670 }, { "epoch": 1.8582947942606427, "grad_norm": 0.8706476299053684, "learning_rate": 2.906488702910314e-05, "loss": 0.156, "step": 15671 }, { "epoch": 1.8584133760227677, "grad_norm": 0.9809505457370777, "learning_rate": 2.906251859128678e-05, "loss": 0.1839, "step": 15672 }, { "epoch": 1.8585319577848927, "grad_norm": 0.908385687254952, "learning_rate": 2.9060150116018987e-05, "loss": 0.2052, "step": 15673 }, { "epoch": 1.8586505395470176, "grad_norm": 0.6403852497813047, "learning_rate": 2.905778160332158e-05, "loss": 0.1558, "step": 15674 }, { "epoch": 1.8587691213091426, "grad_norm": 0.6910058246520515, "learning_rate": 2.905541305321642e-05, "loss": 0.1422, "step": 15675 }, { "epoch": 1.8588877030712676, "grad_norm": 0.868689309602238, "learning_rate": 2.905304446572531e-05, "loss": 0.1967, "step": 15676 }, { "epoch": 1.8590062848333926, "grad_norm": 0.7812059251581103, "learning_rate": 2.9050675840870106e-05, "loss": 0.1892, "step": 15677 }, { "epoch": 1.8591248665955176, "grad_norm": 0.9801019398123298, "learning_rate": 2.9048307178672635e-05, "loss": 0.1834, "step": 15678 }, { "epoch": 1.8592434483576425, "grad_norm": 0.8751198766746738, "learning_rate": 2.9045938479154734e-05, "loss": 0.1898, "step": 15679 }, { "epoch": 1.8593620301197675, "grad_norm": 0.6745627940207227, "learning_rate": 2.9043569742338238e-05, "loss": 0.1197, "step": 15680 }, { "epoch": 1.8594806118818927, "grad_norm": 0.8375523037175205, "learning_rate": 2.9041200968245e-05, "loss": 0.1328, "step": 15681 }, { "epoch": 1.8595991936440175, "grad_norm": 1.3329359252954864, "learning_rate": 2.903883215689684e-05, "loss": 0.3215, "step": 15682 }, { "epoch": 1.8597177754061427, "grad_norm": 1.2509264056929286, "learning_rate": 2.90364633083156e-05, "loss": 0.2856, "step": 15683 }, { "epoch": 1.8598363571682675, "grad_norm": 1.014680619244333, "learning_rate": 2.9034094422523118e-05, "loss": 0.2064, "step": 15684 }, { "epoch": 1.8599549389303927, "grad_norm": 0.7746250640363895, "learning_rate": 2.9031725499541228e-05, "loss": 0.2197, "step": 15685 }, { "epoch": 1.8600735206925174, "grad_norm": 0.9214808425410601, "learning_rate": 2.9029356539391777e-05, "loss": 0.1604, "step": 15686 }, { "epoch": 1.8601921024546426, "grad_norm": 0.6913923478278179, "learning_rate": 2.90269875420966e-05, "loss": 0.1411, "step": 15687 }, { "epoch": 1.8603106842167674, "grad_norm": 0.7744038931820316, "learning_rate": 2.9024618507677538e-05, "loss": 0.2111, "step": 15688 }, { "epoch": 1.8604292659788926, "grad_norm": 1.0254662211981798, "learning_rate": 2.9022249436156423e-05, "loss": 0.2093, "step": 15689 }, { "epoch": 1.8605478477410173, "grad_norm": 0.7516137964496158, "learning_rate": 2.9019880327555103e-05, "loss": 0.1231, "step": 15690 }, { "epoch": 1.8606664295031425, "grad_norm": 0.5374938668771707, "learning_rate": 2.9017511181895417e-05, "loss": 0.1286, "step": 15691 }, { "epoch": 1.8607850112652673, "grad_norm": 1.3718887838689324, "learning_rate": 2.901514199919921e-05, "loss": 0.2562, "step": 15692 }, { "epoch": 1.8609035930273925, "grad_norm": 0.895108497796498, "learning_rate": 2.9012772779488305e-05, "loss": 0.1863, "step": 15693 }, { "epoch": 1.8610221747895173, "grad_norm": 0.8744748401003857, "learning_rate": 2.901040352278457e-05, "loss": 0.1445, "step": 15694 }, { "epoch": 1.8611407565516425, "grad_norm": 0.5645227287826177, "learning_rate": 2.900803422910982e-05, "loss": 0.1169, "step": 15695 }, { "epoch": 1.8612593383137672, "grad_norm": 0.6337871798591447, "learning_rate": 2.9005664898485917e-05, "loss": 0.1115, "step": 15696 }, { "epoch": 1.8613779200758924, "grad_norm": 0.8049843822911771, "learning_rate": 2.9003295530934692e-05, "loss": 0.1927, "step": 15697 }, { "epoch": 1.8614965018380172, "grad_norm": 0.837988393959526, "learning_rate": 2.9000926126477994e-05, "loss": 0.1527, "step": 15698 }, { "epoch": 1.8616150836001424, "grad_norm": 1.7878125229895176, "learning_rate": 2.8998556685137663e-05, "loss": 0.3658, "step": 15699 }, { "epoch": 1.8617336653622671, "grad_norm": 0.8275902150069597, "learning_rate": 2.899618720693554e-05, "loss": 0.1851, "step": 15700 }, { "epoch": 1.8618522471243923, "grad_norm": 0.8515986436068896, "learning_rate": 2.8993817691893466e-05, "loss": 0.1772, "step": 15701 }, { "epoch": 1.861970828886517, "grad_norm": 0.812208020719093, "learning_rate": 2.8991448140033296e-05, "loss": 0.1915, "step": 15702 }, { "epoch": 1.8620894106486423, "grad_norm": 0.7899743116550093, "learning_rate": 2.8989078551376876e-05, "loss": 0.1834, "step": 15703 }, { "epoch": 1.862207992410767, "grad_norm": 0.6632674680948493, "learning_rate": 2.8986708925946033e-05, "loss": 0.1714, "step": 15704 }, { "epoch": 1.8623265741728923, "grad_norm": 0.7421330041548343, "learning_rate": 2.8984339263762634e-05, "loss": 0.1437, "step": 15705 }, { "epoch": 1.8624451559350172, "grad_norm": 0.8749952754176814, "learning_rate": 2.8981969564848498e-05, "loss": 0.1962, "step": 15706 }, { "epoch": 1.8625637376971422, "grad_norm": 0.7052780358153463, "learning_rate": 2.8979599829225503e-05, "loss": 0.1338, "step": 15707 }, { "epoch": 1.8626823194592672, "grad_norm": 0.9547332606122376, "learning_rate": 2.897723005691546e-05, "loss": 0.1595, "step": 15708 }, { "epoch": 1.8628009012213922, "grad_norm": 0.6765995914432513, "learning_rate": 2.8974860247940245e-05, "loss": 0.1529, "step": 15709 }, { "epoch": 1.8629194829835172, "grad_norm": 0.5745832210648041, "learning_rate": 2.8972490402321683e-05, "loss": 0.1342, "step": 15710 }, { "epoch": 1.8630380647456422, "grad_norm": 0.9591877553947494, "learning_rate": 2.897012052008164e-05, "loss": 0.2211, "step": 15711 }, { "epoch": 1.8631566465077671, "grad_norm": 0.8565683979125435, "learning_rate": 2.896775060124195e-05, "loss": 0.171, "step": 15712 }, { "epoch": 1.8632752282698921, "grad_norm": 1.0795536365000349, "learning_rate": 2.896538064582447e-05, "loss": 0.2273, "step": 15713 }, { "epoch": 1.863393810032017, "grad_norm": 1.002563274499882, "learning_rate": 2.8963010653851035e-05, "loss": 0.2383, "step": 15714 }, { "epoch": 1.863512391794142, "grad_norm": 0.8637176975892262, "learning_rate": 2.8960640625343505e-05, "loss": 0.1448, "step": 15715 }, { "epoch": 1.863630973556267, "grad_norm": 0.9866310733151303, "learning_rate": 2.895827056032373e-05, "loss": 0.2423, "step": 15716 }, { "epoch": 1.863749555318392, "grad_norm": 0.8554819326118541, "learning_rate": 2.895590045881355e-05, "loss": 0.2256, "step": 15717 }, { "epoch": 1.863868137080517, "grad_norm": 0.8037109261312879, "learning_rate": 2.8953530320834822e-05, "loss": 0.2161, "step": 15718 }, { "epoch": 1.863986718842642, "grad_norm": 0.6474137052303837, "learning_rate": 2.895116014640939e-05, "loss": 0.1268, "step": 15719 }, { "epoch": 1.864105300604767, "grad_norm": 0.8587270776136704, "learning_rate": 2.8948789935559108e-05, "loss": 0.2308, "step": 15720 }, { "epoch": 1.864223882366892, "grad_norm": 0.8182373802362489, "learning_rate": 2.8946419688305813e-05, "loss": 0.1443, "step": 15721 }, { "epoch": 1.864342464129017, "grad_norm": 1.3265198984237285, "learning_rate": 2.8944049404671387e-05, "loss": 0.297, "step": 15722 }, { "epoch": 1.864461045891142, "grad_norm": 0.9118667041548154, "learning_rate": 2.8941679084677654e-05, "loss": 0.1955, "step": 15723 }, { "epoch": 1.864579627653267, "grad_norm": 1.2622831460584076, "learning_rate": 2.8939308728346475e-05, "loss": 0.2508, "step": 15724 }, { "epoch": 1.8646982094153919, "grad_norm": 0.9324726997806493, "learning_rate": 2.8936938335699694e-05, "loss": 0.2141, "step": 15725 }, { "epoch": 1.8648167911775169, "grad_norm": 1.0928001754902494, "learning_rate": 2.893456790675918e-05, "loss": 0.197, "step": 15726 }, { "epoch": 1.8649353729396418, "grad_norm": 0.8952926398432173, "learning_rate": 2.8932197441546765e-05, "loss": 0.1814, "step": 15727 }, { "epoch": 1.8650539547017668, "grad_norm": 0.8758312594635707, "learning_rate": 2.8929826940084314e-05, "loss": 0.1482, "step": 15728 }, { "epoch": 1.8651725364638918, "grad_norm": 1.2181686553786264, "learning_rate": 2.8927456402393678e-05, "loss": 0.2946, "step": 15729 }, { "epoch": 1.8652911182260168, "grad_norm": 1.4131469436617585, "learning_rate": 2.8925085828496706e-05, "loss": 0.2779, "step": 15730 }, { "epoch": 1.8654096999881418, "grad_norm": 1.0694390902186148, "learning_rate": 2.8922715218415263e-05, "loss": 0.1894, "step": 15731 }, { "epoch": 1.865528281750267, "grad_norm": 0.6836333681877979, "learning_rate": 2.892034457217119e-05, "loss": 0.1541, "step": 15732 }, { "epoch": 1.8656468635123917, "grad_norm": 0.8596774037946419, "learning_rate": 2.891797388978636e-05, "loss": 0.2096, "step": 15733 }, { "epoch": 1.865765445274517, "grad_norm": 0.8250029415054085, "learning_rate": 2.89156031712826e-05, "loss": 0.1802, "step": 15734 }, { "epoch": 1.8658840270366417, "grad_norm": 0.678079837059266, "learning_rate": 2.8913232416681796e-05, "loss": 0.1479, "step": 15735 }, { "epoch": 1.866002608798767, "grad_norm": 0.809114400221667, "learning_rate": 2.8910861626005776e-05, "loss": 0.2033, "step": 15736 }, { "epoch": 1.8661211905608917, "grad_norm": 0.6163417785646943, "learning_rate": 2.8908490799276412e-05, "loss": 0.1571, "step": 15737 }, { "epoch": 1.8662397723230169, "grad_norm": 0.6840862693957209, "learning_rate": 2.8906119936515554e-05, "loss": 0.1481, "step": 15738 }, { "epoch": 1.8663583540851416, "grad_norm": 0.8938975462306731, "learning_rate": 2.890374903774506e-05, "loss": 0.1463, "step": 15739 }, { "epoch": 1.8664769358472668, "grad_norm": 0.9504721647322899, "learning_rate": 2.8901378102986783e-05, "loss": 0.233, "step": 15740 }, { "epoch": 1.8665955176093916, "grad_norm": 0.7146363370393196, "learning_rate": 2.889900713226259e-05, "loss": 0.1504, "step": 15741 }, { "epoch": 1.8667140993715168, "grad_norm": 1.0396518166153694, "learning_rate": 2.8896636125594328e-05, "loss": 0.259, "step": 15742 }, { "epoch": 1.8668326811336415, "grad_norm": 0.9360046472568816, "learning_rate": 2.8894265083003862e-05, "loss": 0.2038, "step": 15743 }, { "epoch": 1.8669512628957667, "grad_norm": 0.7872630127277365, "learning_rate": 2.8891894004513053e-05, "loss": 0.1399, "step": 15744 }, { "epoch": 1.8670698446578915, "grad_norm": 1.0436326735132802, "learning_rate": 2.8889522890143745e-05, "loss": 0.2547, "step": 15745 }, { "epoch": 1.8671884264200167, "grad_norm": 0.8509003437879862, "learning_rate": 2.888715173991781e-05, "loss": 0.1644, "step": 15746 }, { "epoch": 1.8673070081821415, "grad_norm": 0.8684813648614095, "learning_rate": 2.8884780553857104e-05, "loss": 0.1927, "step": 15747 }, { "epoch": 1.8674255899442667, "grad_norm": 0.7796511634699426, "learning_rate": 2.8882409331983486e-05, "loss": 0.1896, "step": 15748 }, { "epoch": 1.8675441717063914, "grad_norm": 0.6767253517150421, "learning_rate": 2.888003807431881e-05, "loss": 0.1375, "step": 15749 }, { "epoch": 1.8676627534685166, "grad_norm": 0.7616364986488229, "learning_rate": 2.887766678088494e-05, "loss": 0.157, "step": 15750 }, { "epoch": 1.8677813352306414, "grad_norm": 0.547953528078479, "learning_rate": 2.887529545170374e-05, "loss": 0.1233, "step": 15751 }, { "epoch": 1.8678999169927666, "grad_norm": 0.7030006715050804, "learning_rate": 2.887292408679707e-05, "loss": 0.1275, "step": 15752 }, { "epoch": 1.8680184987548913, "grad_norm": 1.0234857295299398, "learning_rate": 2.8870552686186784e-05, "loss": 0.2364, "step": 15753 }, { "epoch": 1.8681370805170165, "grad_norm": 0.7723577842499634, "learning_rate": 2.8868181249894754e-05, "loss": 0.1461, "step": 15754 }, { "epoch": 1.8682556622791413, "grad_norm": 1.081796725484585, "learning_rate": 2.8865809777942833e-05, "loss": 0.2771, "step": 15755 }, { "epoch": 1.8683742440412665, "grad_norm": 0.6623159770796955, "learning_rate": 2.8863438270352892e-05, "loss": 0.1409, "step": 15756 }, { "epoch": 1.8684928258033915, "grad_norm": 0.9049185357901101, "learning_rate": 2.8861066727146778e-05, "loss": 0.1942, "step": 15757 }, { "epoch": 1.8686114075655165, "grad_norm": 0.8300228381872246, "learning_rate": 2.885869514834637e-05, "loss": 0.1734, "step": 15758 }, { "epoch": 1.8687299893276414, "grad_norm": 0.5910055447030439, "learning_rate": 2.8856323533973524e-05, "loss": 0.1207, "step": 15759 }, { "epoch": 1.8688485710897664, "grad_norm": 0.8429615327450076, "learning_rate": 2.88539518840501e-05, "loss": 0.1679, "step": 15760 }, { "epoch": 1.8689671528518914, "grad_norm": 0.9171764223833185, "learning_rate": 2.8851580198597973e-05, "loss": 0.1818, "step": 15761 }, { "epoch": 1.8690857346140164, "grad_norm": 0.8901020469916581, "learning_rate": 2.8849208477638996e-05, "loss": 0.1819, "step": 15762 }, { "epoch": 1.8692043163761414, "grad_norm": 0.49486570768482574, "learning_rate": 2.884683672119504e-05, "loss": 0.1031, "step": 15763 }, { "epoch": 1.8693228981382664, "grad_norm": 0.6198386329817547, "learning_rate": 2.884446492928796e-05, "loss": 0.1285, "step": 15764 }, { "epoch": 1.8694414799003913, "grad_norm": 0.9465585582843161, "learning_rate": 2.8842093101939636e-05, "loss": 0.2233, "step": 15765 }, { "epoch": 1.8695600616625163, "grad_norm": 1.0089277903918226, "learning_rate": 2.8839721239171918e-05, "loss": 0.2366, "step": 15766 }, { "epoch": 1.8696786434246413, "grad_norm": 0.9047673179759875, "learning_rate": 2.8837349341006686e-05, "loss": 0.1688, "step": 15767 }, { "epoch": 1.8697972251867663, "grad_norm": 0.7831040885358498, "learning_rate": 2.883497740746579e-05, "loss": 0.1562, "step": 15768 }, { "epoch": 1.8699158069488913, "grad_norm": 1.1423677225678779, "learning_rate": 2.8832605438571108e-05, "loss": 0.2168, "step": 15769 }, { "epoch": 1.8700343887110162, "grad_norm": 1.1470798716965198, "learning_rate": 2.8830233434344507e-05, "loss": 0.2409, "step": 15770 }, { "epoch": 1.8701529704731412, "grad_norm": 0.6515268178095849, "learning_rate": 2.8827861394807843e-05, "loss": 0.1295, "step": 15771 }, { "epoch": 1.8702715522352662, "grad_norm": 0.9883520431756633, "learning_rate": 2.8825489319983e-05, "loss": 0.1725, "step": 15772 }, { "epoch": 1.8703901339973912, "grad_norm": 1.3011487402953013, "learning_rate": 2.8823117209891832e-05, "loss": 0.2724, "step": 15773 }, { "epoch": 1.8705087157595162, "grad_norm": 0.6957918847063239, "learning_rate": 2.8820745064556216e-05, "loss": 0.1197, "step": 15774 }, { "epoch": 1.8706272975216411, "grad_norm": 0.7959840492919338, "learning_rate": 2.8818372883998012e-05, "loss": 0.2011, "step": 15775 }, { "epoch": 1.8707458792837661, "grad_norm": 0.7658993926237069, "learning_rate": 2.8816000668239095e-05, "loss": 0.1614, "step": 15776 }, { "epoch": 1.870864461045891, "grad_norm": 1.0084203696055216, "learning_rate": 2.881362841730133e-05, "loss": 0.2218, "step": 15777 }, { "epoch": 1.870983042808016, "grad_norm": 1.136394541686231, "learning_rate": 2.881125613120659e-05, "loss": 0.243, "step": 15778 }, { "epoch": 1.871101624570141, "grad_norm": 0.8303080198541085, "learning_rate": 2.8808883809976734e-05, "loss": 0.1496, "step": 15779 }, { "epoch": 1.871220206332266, "grad_norm": 1.0694878797020637, "learning_rate": 2.8806511453633644e-05, "loss": 0.1996, "step": 15780 }, { "epoch": 1.8713387880943912, "grad_norm": 0.8846714010112036, "learning_rate": 2.8804139062199185e-05, "loss": 0.263, "step": 15781 }, { "epoch": 1.871457369856516, "grad_norm": 0.8180850329633103, "learning_rate": 2.8801766635695238e-05, "loss": 0.2213, "step": 15782 }, { "epoch": 1.8715759516186412, "grad_norm": 0.7483747987442596, "learning_rate": 2.8799394174143653e-05, "loss": 0.138, "step": 15783 }, { "epoch": 1.871694533380766, "grad_norm": 0.8033560879254849, "learning_rate": 2.8797021677566323e-05, "loss": 0.192, "step": 15784 }, { "epoch": 1.8718131151428912, "grad_norm": 0.9286437354733176, "learning_rate": 2.8794649145985096e-05, "loss": 0.2106, "step": 15785 }, { "epoch": 1.871931696905016, "grad_norm": 0.8309171293093917, "learning_rate": 2.8792276579421866e-05, "loss": 0.2542, "step": 15786 }, { "epoch": 1.8720502786671411, "grad_norm": 0.6702850643355743, "learning_rate": 2.8789903977898503e-05, "loss": 0.1754, "step": 15787 }, { "epoch": 1.872168860429266, "grad_norm": 0.8628348642666399, "learning_rate": 2.8787531341436857e-05, "loss": 0.1468, "step": 15788 }, { "epoch": 1.872287442191391, "grad_norm": 1.3118133866539863, "learning_rate": 2.8785158670058825e-05, "loss": 0.2987, "step": 15789 }, { "epoch": 1.8724060239535159, "grad_norm": 0.8733196993050373, "learning_rate": 2.878278596378627e-05, "loss": 0.1701, "step": 15790 }, { "epoch": 1.872524605715641, "grad_norm": 1.127254408058649, "learning_rate": 2.8780413222641074e-05, "loss": 0.2604, "step": 15791 }, { "epoch": 1.8726431874777658, "grad_norm": 0.7803722910197074, "learning_rate": 2.8778040446645095e-05, "loss": 0.1487, "step": 15792 }, { "epoch": 1.872761769239891, "grad_norm": 0.7392848108693871, "learning_rate": 2.8775667635820226e-05, "loss": 0.1392, "step": 15793 }, { "epoch": 1.8728803510020158, "grad_norm": 0.8377128178776392, "learning_rate": 2.877329479018832e-05, "loss": 0.1317, "step": 15794 }, { "epoch": 1.872998932764141, "grad_norm": 0.6956347449923469, "learning_rate": 2.8770921909771275e-05, "loss": 0.191, "step": 15795 }, { "epoch": 1.8731175145262657, "grad_norm": 0.8955426961979964, "learning_rate": 2.8768548994590944e-05, "loss": 0.1359, "step": 15796 }, { "epoch": 1.873236096288391, "grad_norm": 0.620209676470644, "learning_rate": 2.8766176044669218e-05, "loss": 0.1216, "step": 15797 }, { "epoch": 1.8733546780505157, "grad_norm": 0.7384927971220349, "learning_rate": 2.8763803060027967e-05, "loss": 0.1339, "step": 15798 }, { "epoch": 1.873473259812641, "grad_norm": 1.2793953070138269, "learning_rate": 2.8761430040689065e-05, "loss": 0.2337, "step": 15799 }, { "epoch": 1.8735918415747657, "grad_norm": 1.0879184836800282, "learning_rate": 2.8759056986674382e-05, "loss": 0.2385, "step": 15800 }, { "epoch": 1.8737104233368909, "grad_norm": 1.1391263099350006, "learning_rate": 2.8756683898005817e-05, "loss": 0.2152, "step": 15801 }, { "epoch": 1.8738290050990156, "grad_norm": 0.7595798212423558, "learning_rate": 2.8754310774705228e-05, "loss": 0.1319, "step": 15802 }, { "epoch": 1.8739475868611408, "grad_norm": 0.9215425161435666, "learning_rate": 2.8751937616794494e-05, "loss": 0.1973, "step": 15803 }, { "epoch": 1.8740661686232656, "grad_norm": 0.7113222638492109, "learning_rate": 2.8749564424295505e-05, "loss": 0.12, "step": 15804 }, { "epoch": 1.8741847503853908, "grad_norm": 0.6872569213757911, "learning_rate": 2.874719119723012e-05, "loss": 0.1036, "step": 15805 }, { "epoch": 1.8743033321475158, "grad_norm": 0.7909412229742584, "learning_rate": 2.874481793562024e-05, "loss": 0.1484, "step": 15806 }, { "epoch": 1.8744219139096407, "grad_norm": 1.3991784629185209, "learning_rate": 2.8742444639487722e-05, "loss": 0.3713, "step": 15807 }, { "epoch": 1.8745404956717657, "grad_norm": 0.5811833054537823, "learning_rate": 2.8740071308854454e-05, "loss": 0.1286, "step": 15808 }, { "epoch": 1.8746590774338907, "grad_norm": 0.6316879586848315, "learning_rate": 2.8737697943742313e-05, "loss": 0.1546, "step": 15809 }, { "epoch": 1.8747776591960157, "grad_norm": 1.057315311457953, "learning_rate": 2.8735324544173188e-05, "loss": 0.2356, "step": 15810 }, { "epoch": 1.8748962409581407, "grad_norm": 0.795547436469296, "learning_rate": 2.8732951110168944e-05, "loss": 0.1591, "step": 15811 }, { "epoch": 1.8750148227202657, "grad_norm": 0.9101019986594284, "learning_rate": 2.8730577641751476e-05, "loss": 0.2094, "step": 15812 }, { "epoch": 1.8751334044823906, "grad_norm": 1.431042614289283, "learning_rate": 2.8728204138942648e-05, "loss": 0.3572, "step": 15813 }, { "epoch": 1.8752519862445156, "grad_norm": 0.723854330976813, "learning_rate": 2.8725830601764353e-05, "loss": 0.1613, "step": 15814 }, { "epoch": 1.8753705680066406, "grad_norm": 1.0490044559103697, "learning_rate": 2.8723457030238473e-05, "loss": 0.2424, "step": 15815 }, { "epoch": 1.8754891497687656, "grad_norm": 0.9200026264472219, "learning_rate": 2.872108342438688e-05, "loss": 0.1486, "step": 15816 }, { "epoch": 1.8756077315308906, "grad_norm": 0.8777700234210734, "learning_rate": 2.8718709784231463e-05, "loss": 0.142, "step": 15817 }, { "epoch": 1.8757263132930155, "grad_norm": 1.0240132424454647, "learning_rate": 2.87163361097941e-05, "loss": 0.2031, "step": 15818 }, { "epoch": 1.8758448950551405, "grad_norm": 0.5447338092567117, "learning_rate": 2.8713962401096676e-05, "loss": 0.1402, "step": 15819 }, { "epoch": 1.8759634768172655, "grad_norm": 0.7489182593861344, "learning_rate": 2.871158865816107e-05, "loss": 0.1982, "step": 15820 }, { "epoch": 1.8760820585793905, "grad_norm": 0.7011363253405474, "learning_rate": 2.8709214881009178e-05, "loss": 0.1326, "step": 15821 }, { "epoch": 1.8762006403415155, "grad_norm": 1.0498203882457546, "learning_rate": 2.8706841069662866e-05, "loss": 0.2037, "step": 15822 }, { "epoch": 1.8763192221036404, "grad_norm": 0.8399437081432923, "learning_rate": 2.8704467224144026e-05, "loss": 0.1521, "step": 15823 }, { "epoch": 1.8764378038657654, "grad_norm": 0.9219637173389194, "learning_rate": 2.870209334447454e-05, "loss": 0.1953, "step": 15824 }, { "epoch": 1.8765563856278904, "grad_norm": 0.7250677225635698, "learning_rate": 2.86997194306763e-05, "loss": 0.1235, "step": 15825 }, { "epoch": 1.8766749673900154, "grad_norm": 0.8444827082249448, "learning_rate": 2.8697345482771175e-05, "loss": 0.1553, "step": 15826 }, { "epoch": 1.8767935491521404, "grad_norm": 1.149716102343109, "learning_rate": 2.8694971500781064e-05, "loss": 0.2062, "step": 15827 }, { "epoch": 1.8769121309142653, "grad_norm": 1.0112287717605917, "learning_rate": 2.8692597484727847e-05, "loss": 0.1754, "step": 15828 }, { "epoch": 1.8770307126763903, "grad_norm": 0.9034522041821953, "learning_rate": 2.86902234346334e-05, "loss": 0.2108, "step": 15829 }, { "epoch": 1.8771492944385153, "grad_norm": 0.7191857540201086, "learning_rate": 2.8687849350519626e-05, "loss": 0.147, "step": 15830 }, { "epoch": 1.8772678762006403, "grad_norm": 0.8464233473673629, "learning_rate": 2.86854752324084e-05, "loss": 0.1819, "step": 15831 }, { "epoch": 1.8773864579627655, "grad_norm": 0.9532910702993551, "learning_rate": 2.8683101080321616e-05, "loss": 0.1986, "step": 15832 }, { "epoch": 1.8775050397248902, "grad_norm": 1.2533274050416605, "learning_rate": 2.8680726894281152e-05, "loss": 0.3698, "step": 15833 }, { "epoch": 1.8776236214870154, "grad_norm": 0.8918494308597169, "learning_rate": 2.8678352674308908e-05, "loss": 0.1861, "step": 15834 }, { "epoch": 1.8777422032491402, "grad_norm": 0.7399863162993228, "learning_rate": 2.8675978420426753e-05, "loss": 0.1346, "step": 15835 }, { "epoch": 1.8778607850112654, "grad_norm": 0.7281018595813784, "learning_rate": 2.8673604132656595e-05, "loss": 0.181, "step": 15836 }, { "epoch": 1.8779793667733902, "grad_norm": 0.7512920586800717, "learning_rate": 2.86712298110203e-05, "loss": 0.1375, "step": 15837 }, { "epoch": 1.8780979485355154, "grad_norm": 0.6531525289679379, "learning_rate": 2.8668855455539773e-05, "loss": 0.1252, "step": 15838 }, { "epoch": 1.8782165302976401, "grad_norm": 0.6870066980524422, "learning_rate": 2.86664810662369e-05, "loss": 0.1408, "step": 15839 }, { "epoch": 1.8783351120597653, "grad_norm": 0.8085418275842104, "learning_rate": 2.866410664313357e-05, "loss": 0.1677, "step": 15840 }, { "epoch": 1.87845369382189, "grad_norm": 0.6505484659622596, "learning_rate": 2.866173218625166e-05, "loss": 0.137, "step": 15841 }, { "epoch": 1.8785722755840153, "grad_norm": 0.7438816055418921, "learning_rate": 2.8659357695613076e-05, "loss": 0.1593, "step": 15842 }, { "epoch": 1.87869085734614, "grad_norm": 0.637284376388411, "learning_rate": 2.8656983171239705e-05, "loss": 0.1574, "step": 15843 }, { "epoch": 1.8788094391082653, "grad_norm": 0.6428622878417586, "learning_rate": 2.865460861315343e-05, "loss": 0.1611, "step": 15844 }, { "epoch": 1.87892802087039, "grad_norm": 0.7193178189529267, "learning_rate": 2.8652234021376147e-05, "loss": 0.1335, "step": 15845 }, { "epoch": 1.8790466026325152, "grad_norm": 1.0333904552913857, "learning_rate": 2.864985939592974e-05, "loss": 0.2223, "step": 15846 }, { "epoch": 1.87916518439464, "grad_norm": 0.7839281583015862, "learning_rate": 2.864748473683611e-05, "loss": 0.2335, "step": 15847 }, { "epoch": 1.8792837661567652, "grad_norm": 0.8217527134566849, "learning_rate": 2.864511004411714e-05, "loss": 0.1678, "step": 15848 }, { "epoch": 1.87940234791889, "grad_norm": 0.8490106958153745, "learning_rate": 2.864273531779473e-05, "loss": 0.1593, "step": 15849 }, { "epoch": 1.8795209296810151, "grad_norm": 1.1899800683976633, "learning_rate": 2.864036055789076e-05, "loss": 0.268, "step": 15850 }, { "epoch": 1.87963951144314, "grad_norm": 1.0204586214900662, "learning_rate": 2.8637985764427138e-05, "loss": 0.1938, "step": 15851 }, { "epoch": 1.879758093205265, "grad_norm": 1.0863306914526316, "learning_rate": 2.8635610937425743e-05, "loss": 0.2479, "step": 15852 }, { "epoch": 1.8798766749673899, "grad_norm": 1.005897280542907, "learning_rate": 2.8633236076908475e-05, "loss": 0.2022, "step": 15853 }, { "epoch": 1.879995256729515, "grad_norm": 1.4397962790345318, "learning_rate": 2.863086118289723e-05, "loss": 0.3559, "step": 15854 }, { "epoch": 1.8801138384916398, "grad_norm": 0.9109673127382304, "learning_rate": 2.8628486255413895e-05, "loss": 0.1868, "step": 15855 }, { "epoch": 1.880232420253765, "grad_norm": 0.7250079848461775, "learning_rate": 2.8626111294480362e-05, "loss": 0.1733, "step": 15856 }, { "epoch": 1.88035100201589, "grad_norm": 0.8702375394145215, "learning_rate": 2.8623736300118532e-05, "loss": 0.1633, "step": 15857 }, { "epoch": 1.880469583778015, "grad_norm": 0.8460350651137697, "learning_rate": 2.8621361272350295e-05, "loss": 0.1626, "step": 15858 }, { "epoch": 1.88058816554014, "grad_norm": 0.803103142835801, "learning_rate": 2.8618986211197545e-05, "loss": 0.1575, "step": 15859 }, { "epoch": 1.880706747302265, "grad_norm": 0.6411971892017744, "learning_rate": 2.861661111668219e-05, "loss": 0.1559, "step": 15860 }, { "epoch": 1.88082532906439, "grad_norm": 0.7238511811597965, "learning_rate": 2.861423598882611e-05, "loss": 0.1957, "step": 15861 }, { "epoch": 1.880943910826515, "grad_norm": 0.923464860317249, "learning_rate": 2.861186082765121e-05, "loss": 0.1773, "step": 15862 }, { "epoch": 1.88106249258864, "grad_norm": 0.7339977026754959, "learning_rate": 2.8609485633179378e-05, "loss": 0.1919, "step": 15863 }, { "epoch": 1.8811810743507649, "grad_norm": 1.2446982130460174, "learning_rate": 2.860711040543252e-05, "loss": 0.2924, "step": 15864 }, { "epoch": 1.8812996561128899, "grad_norm": 0.9480167786009551, "learning_rate": 2.860473514443252e-05, "loss": 0.2013, "step": 15865 }, { "epoch": 1.8814182378750148, "grad_norm": 0.8041291829935788, "learning_rate": 2.8602359850201288e-05, "loss": 0.14, "step": 15866 }, { "epoch": 1.8815368196371398, "grad_norm": 0.8451106182743051, "learning_rate": 2.859998452276071e-05, "loss": 0.1783, "step": 15867 }, { "epoch": 1.8816554013992648, "grad_norm": 0.7277035876620315, "learning_rate": 2.8597609162132694e-05, "loss": 0.1817, "step": 15868 }, { "epoch": 1.8817739831613898, "grad_norm": 0.8260323323414726, "learning_rate": 2.8595233768339125e-05, "loss": 0.1541, "step": 15869 }, { "epoch": 1.8818925649235148, "grad_norm": 0.8453823535577939, "learning_rate": 2.8592858341401922e-05, "loss": 0.174, "step": 15870 }, { "epoch": 1.8820111466856397, "grad_norm": 0.7986907549273424, "learning_rate": 2.8590482881342963e-05, "loss": 0.165, "step": 15871 }, { "epoch": 1.8821297284477647, "grad_norm": 0.7616902978251614, "learning_rate": 2.8588107388184155e-05, "loss": 0.1701, "step": 15872 }, { "epoch": 1.8822483102098897, "grad_norm": 0.6637644287942615, "learning_rate": 2.85857318619474e-05, "loss": 0.1475, "step": 15873 }, { "epoch": 1.8823668919720147, "grad_norm": 0.7767490851495146, "learning_rate": 2.8583356302654597e-05, "loss": 0.1935, "step": 15874 }, { "epoch": 1.8824854737341397, "grad_norm": 0.8769871254092575, "learning_rate": 2.858098071032764e-05, "loss": 0.181, "step": 15875 }, { "epoch": 1.8826040554962646, "grad_norm": 0.8128251098503485, "learning_rate": 2.857860508498843e-05, "loss": 0.1643, "step": 15876 }, { "epoch": 1.8827226372583896, "grad_norm": 2.1104847835798375, "learning_rate": 2.8576229426658873e-05, "loss": 0.4531, "step": 15877 }, { "epoch": 1.8828412190205146, "grad_norm": 1.1747407170671436, "learning_rate": 2.857385373536086e-05, "loss": 0.2605, "step": 15878 }, { "epoch": 1.8829598007826396, "grad_norm": 1.1611295695000332, "learning_rate": 2.8571478011116303e-05, "loss": 0.2616, "step": 15879 }, { "epoch": 1.8830783825447646, "grad_norm": 0.633385296717795, "learning_rate": 2.8569102253947094e-05, "loss": 0.1255, "step": 15880 }, { "epoch": 1.8831969643068898, "grad_norm": 0.7620284747839683, "learning_rate": 2.8566726463875147e-05, "loss": 0.1826, "step": 15881 }, { "epoch": 1.8833155460690145, "grad_norm": 0.817773627716119, "learning_rate": 2.856435064092235e-05, "loss": 0.1932, "step": 15882 }, { "epoch": 1.8834341278311397, "grad_norm": 0.789585754436894, "learning_rate": 2.856197478511061e-05, "loss": 0.1783, "step": 15883 }, { "epoch": 1.8835527095932645, "grad_norm": 0.8667252616256016, "learning_rate": 2.8559598896461837e-05, "loss": 0.2143, "step": 15884 }, { "epoch": 1.8836712913553897, "grad_norm": 0.9317505078209753, "learning_rate": 2.8557222974997923e-05, "loss": 0.2333, "step": 15885 }, { "epoch": 1.8837898731175144, "grad_norm": 0.7069756825527386, "learning_rate": 2.8554847020740776e-05, "loss": 0.1955, "step": 15886 }, { "epoch": 1.8839084548796396, "grad_norm": 0.8742708423228294, "learning_rate": 2.8552471033712296e-05, "loss": 0.1693, "step": 15887 }, { "epoch": 1.8840270366417644, "grad_norm": 0.6809683033547083, "learning_rate": 2.8550095013934386e-05, "loss": 0.1607, "step": 15888 }, { "epoch": 1.8841456184038896, "grad_norm": 0.9329416283888812, "learning_rate": 2.854771896142896e-05, "loss": 0.1548, "step": 15889 }, { "epoch": 1.8842642001660144, "grad_norm": 0.8729116458764042, "learning_rate": 2.854534287621792e-05, "loss": 0.1865, "step": 15890 }, { "epoch": 1.8843827819281396, "grad_norm": 0.8183274914108097, "learning_rate": 2.8542966758323154e-05, "loss": 0.1331, "step": 15891 }, { "epoch": 1.8845013636902643, "grad_norm": 0.6617296450470821, "learning_rate": 2.8540590607766592e-05, "loss": 0.1662, "step": 15892 }, { "epoch": 1.8846199454523895, "grad_norm": 0.9259566154390383, "learning_rate": 2.853821442457012e-05, "loss": 0.1696, "step": 15893 }, { "epoch": 1.8847385272145143, "grad_norm": 0.762991301989314, "learning_rate": 2.853583820875565e-05, "loss": 0.138, "step": 15894 }, { "epoch": 1.8848571089766395, "grad_norm": 0.6017013025675343, "learning_rate": 2.853346196034509e-05, "loss": 0.1263, "step": 15895 }, { "epoch": 1.8849756907387643, "grad_norm": 1.4625274894902123, "learning_rate": 2.8531085679360344e-05, "loss": 0.2973, "step": 15896 }, { "epoch": 1.8850942725008895, "grad_norm": 0.7515366610833764, "learning_rate": 2.8528709365823313e-05, "loss": 0.1634, "step": 15897 }, { "epoch": 1.8852128542630142, "grad_norm": 0.8133817268513323, "learning_rate": 2.8526333019755907e-05, "loss": 0.1764, "step": 15898 }, { "epoch": 1.8853314360251394, "grad_norm": 0.6771959429321358, "learning_rate": 2.8523956641180037e-05, "loss": 0.1677, "step": 15899 }, { "epoch": 1.8854500177872642, "grad_norm": 0.6940516396894323, "learning_rate": 2.852158023011761e-05, "loss": 0.1468, "step": 15900 }, { "epoch": 1.8855685995493894, "grad_norm": 0.7254744611844434, "learning_rate": 2.8519203786590537e-05, "loss": 0.1798, "step": 15901 }, { "epoch": 1.8856871813115141, "grad_norm": 0.621228437857134, "learning_rate": 2.8516827310620716e-05, "loss": 0.1474, "step": 15902 }, { "epoch": 1.8858057630736393, "grad_norm": 0.9662905380425313, "learning_rate": 2.8514450802230057e-05, "loss": 0.197, "step": 15903 }, { "epoch": 1.885924344835764, "grad_norm": 1.4066582355604234, "learning_rate": 2.8512074261440473e-05, "loss": 0.3084, "step": 15904 }, { "epoch": 1.8860429265978893, "grad_norm": 0.7810612479948721, "learning_rate": 2.8509697688273878e-05, "loss": 0.1443, "step": 15905 }, { "epoch": 1.8861615083600143, "grad_norm": 0.920597089608908, "learning_rate": 2.8507321082752163e-05, "loss": 0.2234, "step": 15906 }, { "epoch": 1.8862800901221393, "grad_norm": 0.926437852844144, "learning_rate": 2.8504944444897258e-05, "loss": 0.2257, "step": 15907 }, { "epoch": 1.8863986718842642, "grad_norm": 1.0186248551516073, "learning_rate": 2.850256777473106e-05, "loss": 0.1741, "step": 15908 }, { "epoch": 1.8865172536463892, "grad_norm": 1.026625390061044, "learning_rate": 2.850019107227548e-05, "loss": 0.2385, "step": 15909 }, { "epoch": 1.8866358354085142, "grad_norm": 0.7178550748443047, "learning_rate": 2.8497814337552426e-05, "loss": 0.1613, "step": 15910 }, { "epoch": 1.8867544171706392, "grad_norm": 1.2247029725639094, "learning_rate": 2.8495437570583822e-05, "loss": 0.2535, "step": 15911 }, { "epoch": 1.8868729989327642, "grad_norm": 0.7130525817751445, "learning_rate": 2.8493060771391567e-05, "loss": 0.137, "step": 15912 }, { "epoch": 1.8869915806948891, "grad_norm": 1.266562472512775, "learning_rate": 2.8490683939997576e-05, "loss": 0.3248, "step": 15913 }, { "epoch": 1.8871101624570141, "grad_norm": 0.6758051391560961, "learning_rate": 2.848830707642376e-05, "loss": 0.1927, "step": 15914 }, { "epoch": 1.887228744219139, "grad_norm": 0.7817099356362078, "learning_rate": 2.8485930180692026e-05, "loss": 0.1616, "step": 15915 }, { "epoch": 1.887347325981264, "grad_norm": 0.6016896803533841, "learning_rate": 2.8483553252824296e-05, "loss": 0.1138, "step": 15916 }, { "epoch": 1.887465907743389, "grad_norm": 1.089766600350401, "learning_rate": 2.848117629284247e-05, "loss": 0.2139, "step": 15917 }, { "epoch": 1.887584489505514, "grad_norm": 0.8549196696737729, "learning_rate": 2.847879930076847e-05, "loss": 0.1576, "step": 15918 }, { "epoch": 1.887703071267639, "grad_norm": 1.0030523897681358, "learning_rate": 2.847642227662421e-05, "loss": 0.1476, "step": 15919 }, { "epoch": 1.887821653029764, "grad_norm": 0.9284807810343103, "learning_rate": 2.84740452204316e-05, "loss": 0.2019, "step": 15920 }, { "epoch": 1.887940234791889, "grad_norm": 0.6662691672320165, "learning_rate": 2.847166813221255e-05, "loss": 0.1313, "step": 15921 }, { "epoch": 1.888058816554014, "grad_norm": 0.7507297378417578, "learning_rate": 2.846929101198898e-05, "loss": 0.1334, "step": 15922 }, { "epoch": 1.888177398316139, "grad_norm": 0.8617770205284616, "learning_rate": 2.84669138597828e-05, "loss": 0.185, "step": 15923 }, { "epoch": 1.888295980078264, "grad_norm": 0.7449545921079807, "learning_rate": 2.846453667561593e-05, "loss": 0.2035, "step": 15924 }, { "epoch": 1.888414561840389, "grad_norm": 0.7908801289333157, "learning_rate": 2.8462159459510273e-05, "loss": 0.17, "step": 15925 }, { "epoch": 1.888533143602514, "grad_norm": 0.7203711478671512, "learning_rate": 2.8459782211487758e-05, "loss": 0.1826, "step": 15926 }, { "epoch": 1.8886517253646389, "grad_norm": 0.9813860666403983, "learning_rate": 2.8457404931570285e-05, "loss": 0.1906, "step": 15927 }, { "epoch": 1.8887703071267639, "grad_norm": 0.7170804898980425, "learning_rate": 2.845502761977978e-05, "loss": 0.1801, "step": 15928 }, { "epoch": 1.8888888888888888, "grad_norm": 0.5859171323721092, "learning_rate": 2.845265027613816e-05, "loss": 0.1633, "step": 15929 }, { "epoch": 1.8890074706510138, "grad_norm": 0.9110420036918104, "learning_rate": 2.8450272900667335e-05, "loss": 0.1825, "step": 15930 }, { "epoch": 1.8891260524131388, "grad_norm": 0.7261056264354866, "learning_rate": 2.8447895493389233e-05, "loss": 0.1787, "step": 15931 }, { "epoch": 1.889244634175264, "grad_norm": 0.9258251772076243, "learning_rate": 2.8445518054325752e-05, "loss": 0.2418, "step": 15932 }, { "epoch": 1.8893632159373888, "grad_norm": 0.8227350935006212, "learning_rate": 2.8443140583498827e-05, "loss": 0.1646, "step": 15933 }, { "epoch": 1.889481797699514, "grad_norm": 0.8609373484457465, "learning_rate": 2.8440763080930365e-05, "loss": 0.2271, "step": 15934 }, { "epoch": 1.8896003794616387, "grad_norm": 0.5488175149450569, "learning_rate": 2.84383855466423e-05, "loss": 0.0874, "step": 15935 }, { "epoch": 1.889718961223764, "grad_norm": 0.9039248763888512, "learning_rate": 2.843600798065652e-05, "loss": 0.1912, "step": 15936 }, { "epoch": 1.8898375429858887, "grad_norm": 1.0113086047395727, "learning_rate": 2.8433630382994963e-05, "loss": 0.1845, "step": 15937 }, { "epoch": 1.8899561247480139, "grad_norm": 0.898896484791771, "learning_rate": 2.8431252753679544e-05, "loss": 0.1886, "step": 15938 }, { "epoch": 1.8900747065101386, "grad_norm": 0.936266119465778, "learning_rate": 2.8428875092732188e-05, "loss": 0.2138, "step": 15939 }, { "epoch": 1.8901932882722638, "grad_norm": 0.7482476850792282, "learning_rate": 2.8426497400174802e-05, "loss": 0.1751, "step": 15940 }, { "epoch": 1.8903118700343886, "grad_norm": 0.7719530087828458, "learning_rate": 2.842411967602932e-05, "loss": 0.1551, "step": 15941 }, { "epoch": 1.8904304517965138, "grad_norm": 0.6949046764361371, "learning_rate": 2.8421741920317646e-05, "loss": 0.1272, "step": 15942 }, { "epoch": 1.8905490335586386, "grad_norm": 0.8440646701526194, "learning_rate": 2.841936413306171e-05, "loss": 0.1855, "step": 15943 }, { "epoch": 1.8906676153207638, "grad_norm": 0.7172208503566945, "learning_rate": 2.8416986314283434e-05, "loss": 0.1425, "step": 15944 }, { "epoch": 1.8907861970828885, "grad_norm": 0.7168880161380836, "learning_rate": 2.8414608464004722e-05, "loss": 0.1568, "step": 15945 }, { "epoch": 1.8909047788450137, "grad_norm": 0.8351415017095588, "learning_rate": 2.8412230582247525e-05, "loss": 0.1702, "step": 15946 }, { "epoch": 1.8910233606071385, "grad_norm": 0.9548518545989755, "learning_rate": 2.8409852669033733e-05, "loss": 0.2157, "step": 15947 }, { "epoch": 1.8911419423692637, "grad_norm": 0.9038200586059147, "learning_rate": 2.840747472438528e-05, "loss": 0.1827, "step": 15948 }, { "epoch": 1.8912605241313885, "grad_norm": 0.8807098750210066, "learning_rate": 2.840509674832409e-05, "loss": 0.1811, "step": 15949 }, { "epoch": 1.8913791058935137, "grad_norm": 0.8628302054950924, "learning_rate": 2.8402718740872093e-05, "loss": 0.205, "step": 15950 }, { "epoch": 1.8914976876556384, "grad_norm": 1.5663684429354032, "learning_rate": 2.8400340702051192e-05, "loss": 0.3297, "step": 15951 }, { "epoch": 1.8916162694177636, "grad_norm": 1.1562137038263798, "learning_rate": 2.839796263188333e-05, "loss": 0.2213, "step": 15952 }, { "epoch": 1.8917348511798884, "grad_norm": 0.8544874324691788, "learning_rate": 2.8395584530390408e-05, "loss": 0.1937, "step": 15953 }, { "epoch": 1.8918534329420136, "grad_norm": 0.711607487327528, "learning_rate": 2.839320639759437e-05, "loss": 0.1496, "step": 15954 }, { "epoch": 1.8919720147041383, "grad_norm": 1.049772365948029, "learning_rate": 2.839082823351712e-05, "loss": 0.2261, "step": 15955 }, { "epoch": 1.8920905964662635, "grad_norm": 0.8264796534433995, "learning_rate": 2.8388450038180604e-05, "loss": 0.2026, "step": 15956 }, { "epoch": 1.8922091782283885, "grad_norm": 0.9864084744574206, "learning_rate": 2.8386071811606722e-05, "loss": 0.2139, "step": 15957 }, { "epoch": 1.8923277599905135, "grad_norm": 0.8968602318636496, "learning_rate": 2.8383693553817414e-05, "loss": 0.1876, "step": 15958 }, { "epoch": 1.8924463417526385, "grad_norm": 0.7645094403579633, "learning_rate": 2.8381315264834602e-05, "loss": 0.1686, "step": 15959 }, { "epoch": 1.8925649235147635, "grad_norm": 1.4766509873499134, "learning_rate": 2.8378936944680205e-05, "loss": 0.2581, "step": 15960 }, { "epoch": 1.8926835052768884, "grad_norm": 1.8381490804590244, "learning_rate": 2.8376558593376162e-05, "loss": 0.3022, "step": 15961 }, { "epoch": 1.8928020870390134, "grad_norm": 0.8841167458257453, "learning_rate": 2.837418021094438e-05, "loss": 0.1906, "step": 15962 }, { "epoch": 1.8929206688011384, "grad_norm": 0.891420899134094, "learning_rate": 2.8371801797406806e-05, "loss": 0.2777, "step": 15963 }, { "epoch": 1.8930392505632634, "grad_norm": 0.8881108727466321, "learning_rate": 2.836942335278534e-05, "loss": 0.2192, "step": 15964 }, { "epoch": 1.8931578323253884, "grad_norm": 0.6629967210902918, "learning_rate": 2.8367044877101934e-05, "loss": 0.1414, "step": 15965 }, { "epoch": 1.8932764140875133, "grad_norm": 0.8083390958531154, "learning_rate": 2.8364666370378502e-05, "loss": 0.1563, "step": 15966 }, { "epoch": 1.8933949958496383, "grad_norm": 0.8255997458947091, "learning_rate": 2.8362287832636964e-05, "loss": 0.1721, "step": 15967 }, { "epoch": 1.8935135776117633, "grad_norm": 1.1673362603067714, "learning_rate": 2.8359909263899258e-05, "loss": 0.1988, "step": 15968 }, { "epoch": 1.8936321593738883, "grad_norm": 0.501122125091316, "learning_rate": 2.8357530664187316e-05, "loss": 0.105, "step": 15969 }, { "epoch": 1.8937507411360133, "grad_norm": 0.8538961393301169, "learning_rate": 2.8355152033523048e-05, "loss": 0.2216, "step": 15970 }, { "epoch": 1.8938693228981383, "grad_norm": 0.8679002423036752, "learning_rate": 2.83527733719284e-05, "loss": 0.1749, "step": 15971 }, { "epoch": 1.8939879046602632, "grad_norm": 0.9136354346642834, "learning_rate": 2.8350394679425292e-05, "loss": 0.1551, "step": 15972 }, { "epoch": 1.8941064864223882, "grad_norm": 0.7028766001817698, "learning_rate": 2.834801595603565e-05, "loss": 0.1305, "step": 15973 }, { "epoch": 1.8942250681845132, "grad_norm": 0.5602754839121322, "learning_rate": 2.8345637201781415e-05, "loss": 0.1077, "step": 15974 }, { "epoch": 1.8943436499466382, "grad_norm": 0.9522264462847185, "learning_rate": 2.8343258416684498e-05, "loss": 0.171, "step": 15975 }, { "epoch": 1.8944622317087632, "grad_norm": 0.7910135684669101, "learning_rate": 2.8340879600766847e-05, "loss": 0.1925, "step": 15976 }, { "epoch": 1.8945808134708881, "grad_norm": 0.8222129006091862, "learning_rate": 2.8338500754050373e-05, "loss": 0.1896, "step": 15977 }, { "epoch": 1.8946993952330131, "grad_norm": 0.8210951664670892, "learning_rate": 2.833612187655702e-05, "loss": 0.1704, "step": 15978 }, { "epoch": 1.894817976995138, "grad_norm": 0.7233371948722629, "learning_rate": 2.8333742968308713e-05, "loss": 0.1233, "step": 15979 }, { "epoch": 1.894936558757263, "grad_norm": 0.6901952476435219, "learning_rate": 2.833136402932739e-05, "loss": 0.1377, "step": 15980 }, { "epoch": 1.8950551405193883, "grad_norm": 0.6007473416266343, "learning_rate": 2.832898505963497e-05, "loss": 0.1199, "step": 15981 }, { "epoch": 1.895173722281513, "grad_norm": 0.6342216059157425, "learning_rate": 2.8326606059253398e-05, "loss": 0.1425, "step": 15982 }, { "epoch": 1.8952923040436382, "grad_norm": 0.8188516369177005, "learning_rate": 2.8324227028204587e-05, "loss": 0.2201, "step": 15983 }, { "epoch": 1.895410885805763, "grad_norm": 0.7978125267332222, "learning_rate": 2.832184796651049e-05, "loss": 0.178, "step": 15984 }, { "epoch": 1.8955294675678882, "grad_norm": 0.9694068910362328, "learning_rate": 2.8319468874193016e-05, "loss": 0.243, "step": 15985 }, { "epoch": 1.895648049330013, "grad_norm": 0.7810894317955301, "learning_rate": 2.8317089751274118e-05, "loss": 0.146, "step": 15986 }, { "epoch": 1.8957666310921382, "grad_norm": 0.773685022053031, "learning_rate": 2.831471059777571e-05, "loss": 0.142, "step": 15987 }, { "epoch": 1.895885212854263, "grad_norm": 1.0319921775921364, "learning_rate": 2.8312331413719746e-05, "loss": 0.26, "step": 15988 }, { "epoch": 1.8960037946163881, "grad_norm": 0.8716093468606806, "learning_rate": 2.830995219912815e-05, "loss": 0.1692, "step": 15989 }, { "epoch": 1.8961223763785129, "grad_norm": 0.7674415364921217, "learning_rate": 2.8307572954022844e-05, "loss": 0.1288, "step": 15990 }, { "epoch": 1.896240958140638, "grad_norm": 0.8893190144538159, "learning_rate": 2.830519367842578e-05, "loss": 0.1292, "step": 15991 }, { "epoch": 1.8963595399027628, "grad_norm": 0.7911529282978075, "learning_rate": 2.8302814372358876e-05, "loss": 0.1566, "step": 15992 }, { "epoch": 1.896478121664888, "grad_norm": 0.8400210912787021, "learning_rate": 2.830043503584408e-05, "loss": 0.1769, "step": 15993 }, { "epoch": 1.8965967034270128, "grad_norm": 1.088465900908281, "learning_rate": 2.8298055668903317e-05, "loss": 0.2214, "step": 15994 }, { "epoch": 1.896715285189138, "grad_norm": 0.7110240727868526, "learning_rate": 2.829567627155853e-05, "loss": 0.1636, "step": 15995 }, { "epoch": 1.8968338669512628, "grad_norm": 0.7466832915856868, "learning_rate": 2.8293296843831642e-05, "loss": 0.1739, "step": 15996 }, { "epoch": 1.896952448713388, "grad_norm": 1.0567876665679286, "learning_rate": 2.8290917385744598e-05, "loss": 0.2308, "step": 15997 }, { "epoch": 1.8970710304755127, "grad_norm": 1.0959639892085768, "learning_rate": 2.8288537897319328e-05, "loss": 0.2597, "step": 15998 }, { "epoch": 1.897189612237638, "grad_norm": 0.9944621634276215, "learning_rate": 2.8286158378577776e-05, "loss": 0.1995, "step": 15999 }, { "epoch": 1.8973081939997627, "grad_norm": 0.6940648908477219, "learning_rate": 2.8283778829541874e-05, "loss": 0.1359, "step": 16000 }, { "epoch": 1.897426775761888, "grad_norm": 0.8136907410973065, "learning_rate": 2.8281399250233554e-05, "loss": 0.1656, "step": 16001 }, { "epoch": 1.8975453575240127, "grad_norm": 0.8845120568813197, "learning_rate": 2.827901964067476e-05, "loss": 0.1882, "step": 16002 }, { "epoch": 1.8976639392861379, "grad_norm": 0.7977548385600857, "learning_rate": 2.8276640000887422e-05, "loss": 0.1366, "step": 16003 }, { "epoch": 1.8977825210482626, "grad_norm": 0.7709988463076939, "learning_rate": 2.8274260330893487e-05, "loss": 0.176, "step": 16004 }, { "epoch": 1.8979011028103878, "grad_norm": 0.8027915723846025, "learning_rate": 2.8271880630714886e-05, "loss": 0.1612, "step": 16005 }, { "epoch": 1.8980196845725128, "grad_norm": 0.9014290223264383, "learning_rate": 2.8269500900373557e-05, "loss": 0.1791, "step": 16006 }, { "epoch": 1.8981382663346378, "grad_norm": 0.682384806357561, "learning_rate": 2.826712113989144e-05, "loss": 0.117, "step": 16007 }, { "epoch": 1.8982568480967628, "grad_norm": 1.0227507624447483, "learning_rate": 2.8264741349290464e-05, "loss": 0.2043, "step": 16008 }, { "epoch": 1.8983754298588877, "grad_norm": 0.7678783514368877, "learning_rate": 2.8262361528592583e-05, "loss": 0.123, "step": 16009 }, { "epoch": 1.8984940116210127, "grad_norm": 1.021820293845396, "learning_rate": 2.825998167781973e-05, "loss": 0.2025, "step": 16010 }, { "epoch": 1.8986125933831377, "grad_norm": 0.8464141163006766, "learning_rate": 2.8257601796993844e-05, "loss": 0.2006, "step": 16011 }, { "epoch": 1.8987311751452627, "grad_norm": 0.7293195641664636, "learning_rate": 2.825522188613686e-05, "loss": 0.1325, "step": 16012 }, { "epoch": 1.8988497569073877, "grad_norm": 1.1563178343750835, "learning_rate": 2.825284194527073e-05, "loss": 0.2007, "step": 16013 }, { "epoch": 1.8989683386695126, "grad_norm": 0.9231869470982893, "learning_rate": 2.8250461974417375e-05, "loss": 0.1718, "step": 16014 }, { "epoch": 1.8990869204316376, "grad_norm": 0.8526594602045873, "learning_rate": 2.824808197359876e-05, "loss": 0.1674, "step": 16015 }, { "epoch": 1.8992055021937626, "grad_norm": 1.8119774370952464, "learning_rate": 2.82457019428368e-05, "loss": 0.3478, "step": 16016 }, { "epoch": 1.8993240839558876, "grad_norm": 1.0251312429323536, "learning_rate": 2.824332188215345e-05, "loss": 0.2506, "step": 16017 }, { "epoch": 1.8994426657180126, "grad_norm": 0.9863246662577184, "learning_rate": 2.824094179157065e-05, "loss": 0.2059, "step": 16018 }, { "epoch": 1.8995612474801375, "grad_norm": 0.985841786567784, "learning_rate": 2.8238561671110355e-05, "loss": 0.2147, "step": 16019 }, { "epoch": 1.8996798292422625, "grad_norm": 0.9425934850450475, "learning_rate": 2.8236181520794474e-05, "loss": 0.127, "step": 16020 }, { "epoch": 1.8997984110043875, "grad_norm": 1.2991209508884507, "learning_rate": 2.823380134064498e-05, "loss": 0.2661, "step": 16021 }, { "epoch": 1.8999169927665125, "grad_norm": 1.0221231307519234, "learning_rate": 2.8231421130683793e-05, "loss": 0.2121, "step": 16022 }, { "epoch": 1.9000355745286375, "grad_norm": 0.9122685059724239, "learning_rate": 2.822904089093288e-05, "loss": 0.2232, "step": 16023 }, { "epoch": 1.9001541562907625, "grad_norm": 0.8463973568758093, "learning_rate": 2.8226660621414157e-05, "loss": 0.1934, "step": 16024 }, { "epoch": 1.9002727380528874, "grad_norm": 0.8344088744620402, "learning_rate": 2.8224280322149587e-05, "loss": 0.1617, "step": 16025 }, { "epoch": 1.9003913198150124, "grad_norm": 0.7691163974752263, "learning_rate": 2.8221899993161098e-05, "loss": 0.1466, "step": 16026 }, { "epoch": 1.9005099015771374, "grad_norm": 1.2060567657194745, "learning_rate": 2.8219519634470648e-05, "loss": 0.208, "step": 16027 }, { "epoch": 1.9006284833392624, "grad_norm": 0.8847725284929038, "learning_rate": 2.8217139246100173e-05, "loss": 0.2074, "step": 16028 }, { "epoch": 1.9007470651013874, "grad_norm": 0.6719701690384398, "learning_rate": 2.8214758828071618e-05, "loss": 0.1475, "step": 16029 }, { "epoch": 1.9008656468635123, "grad_norm": 0.706349929732018, "learning_rate": 2.8212378380406933e-05, "loss": 0.1444, "step": 16030 }, { "epoch": 1.9009842286256373, "grad_norm": 1.3667963834118002, "learning_rate": 2.8209997903128054e-05, "loss": 0.3831, "step": 16031 }, { "epoch": 1.9011028103877625, "grad_norm": 0.9340477194046695, "learning_rate": 2.820761739625694e-05, "loss": 0.2149, "step": 16032 }, { "epoch": 1.9012213921498873, "grad_norm": 0.6139532108089062, "learning_rate": 2.820523685981552e-05, "loss": 0.1402, "step": 16033 }, { "epoch": 1.9013399739120125, "grad_norm": 0.6580216803654312, "learning_rate": 2.8202856293825748e-05, "loss": 0.1361, "step": 16034 }, { "epoch": 1.9014585556741372, "grad_norm": 0.8214783306192367, "learning_rate": 2.8200475698309564e-05, "loss": 0.1701, "step": 16035 }, { "epoch": 1.9015771374362624, "grad_norm": 0.8063753830782938, "learning_rate": 2.8198095073288926e-05, "loss": 0.1715, "step": 16036 }, { "epoch": 1.9016957191983872, "grad_norm": 1.0482580823956247, "learning_rate": 2.8195714418785768e-05, "loss": 0.1931, "step": 16037 }, { "epoch": 1.9018143009605124, "grad_norm": 0.7934667790270736, "learning_rate": 2.819333373482204e-05, "loss": 0.186, "step": 16038 }, { "epoch": 1.9019328827226372, "grad_norm": 1.00374053109999, "learning_rate": 2.8190953021419698e-05, "loss": 0.2018, "step": 16039 }, { "epoch": 1.9020514644847624, "grad_norm": 1.2277980024021808, "learning_rate": 2.8188572278600676e-05, "loss": 0.3597, "step": 16040 }, { "epoch": 1.9021700462468871, "grad_norm": 0.7845901191466269, "learning_rate": 2.8186191506386935e-05, "loss": 0.1651, "step": 16041 }, { "epoch": 1.9022886280090123, "grad_norm": 0.6994670210377145, "learning_rate": 2.818381070480041e-05, "loss": 0.1503, "step": 16042 }, { "epoch": 1.902407209771137, "grad_norm": 1.158949383402621, "learning_rate": 2.818142987386305e-05, "loss": 0.2605, "step": 16043 }, { "epoch": 1.9025257915332623, "grad_norm": 0.9952893277227186, "learning_rate": 2.8179049013596813e-05, "loss": 0.2312, "step": 16044 }, { "epoch": 1.902644373295387, "grad_norm": 0.9643211549192695, "learning_rate": 2.8176668124023648e-05, "loss": 0.1559, "step": 16045 }, { "epoch": 1.9027629550575123, "grad_norm": 1.0830202112699108, "learning_rate": 2.8174287205165495e-05, "loss": 0.2738, "step": 16046 }, { "epoch": 1.902881536819637, "grad_norm": 0.962180061913482, "learning_rate": 2.81719062570443e-05, "loss": 0.2471, "step": 16047 }, { "epoch": 1.9030001185817622, "grad_norm": 0.7085835975243565, "learning_rate": 2.8169525279682023e-05, "loss": 0.1436, "step": 16048 }, { "epoch": 1.903118700343887, "grad_norm": 0.9629448029979497, "learning_rate": 2.816714427310061e-05, "loss": 0.2127, "step": 16049 }, { "epoch": 1.9032372821060122, "grad_norm": 1.08611614687514, "learning_rate": 2.816476323732201e-05, "loss": 0.2478, "step": 16050 }, { "epoch": 1.903355863868137, "grad_norm": 1.270560170537835, "learning_rate": 2.816238217236818e-05, "loss": 0.2813, "step": 16051 }, { "epoch": 1.9034744456302621, "grad_norm": 0.8123001145976401, "learning_rate": 2.8160001078261054e-05, "loss": 0.1807, "step": 16052 }, { "epoch": 1.903593027392387, "grad_norm": 0.6630240310509419, "learning_rate": 2.8157619955022606e-05, "loss": 0.1215, "step": 16053 }, { "epoch": 1.903711609154512, "grad_norm": 0.7756802943523601, "learning_rate": 2.8155238802674765e-05, "loss": 0.1846, "step": 16054 }, { "epoch": 1.9038301909166369, "grad_norm": 0.7942594299651337, "learning_rate": 2.815285762123949e-05, "loss": 0.1562, "step": 16055 }, { "epoch": 1.903948772678762, "grad_norm": 0.9065131807304972, "learning_rate": 2.815047641073874e-05, "loss": 0.2393, "step": 16056 }, { "epoch": 1.904067354440887, "grad_norm": 0.8672367138018566, "learning_rate": 2.8148095171194454e-05, "loss": 0.1748, "step": 16057 }, { "epoch": 1.904185936203012, "grad_norm": 0.7971496403043409, "learning_rate": 2.8145713902628595e-05, "loss": 0.1531, "step": 16058 }, { "epoch": 1.904304517965137, "grad_norm": 0.5124821729717439, "learning_rate": 2.8143332605063105e-05, "loss": 0.1179, "step": 16059 }, { "epoch": 1.904423099727262, "grad_norm": 0.7001395036273097, "learning_rate": 2.8140951278519957e-05, "loss": 0.1463, "step": 16060 }, { "epoch": 1.904541681489387, "grad_norm": 0.7959357453403154, "learning_rate": 2.813856992302108e-05, "loss": 0.1908, "step": 16061 }, { "epoch": 1.904660263251512, "grad_norm": 0.8065231863058255, "learning_rate": 2.8136188538588438e-05, "loss": 0.1445, "step": 16062 }, { "epoch": 1.904778845013637, "grad_norm": 0.6411136853133865, "learning_rate": 2.8133807125243983e-05, "loss": 0.1423, "step": 16063 }, { "epoch": 1.904897426775762, "grad_norm": 1.0400106420792206, "learning_rate": 2.8131425683009677e-05, "loss": 0.2795, "step": 16064 }, { "epoch": 1.9050160085378869, "grad_norm": 0.9285993439934166, "learning_rate": 2.812904421190746e-05, "loss": 0.1903, "step": 16065 }, { "epoch": 1.9051345903000119, "grad_norm": 1.0556325688688668, "learning_rate": 2.8126662711959286e-05, "loss": 0.2429, "step": 16066 }, { "epoch": 1.9052531720621368, "grad_norm": 0.8016027049542219, "learning_rate": 2.8124281183187123e-05, "loss": 0.1694, "step": 16067 }, { "epoch": 1.9053717538242618, "grad_norm": 0.8668186736621011, "learning_rate": 2.8121899625612917e-05, "loss": 0.212, "step": 16068 }, { "epoch": 1.9054903355863868, "grad_norm": 1.0784058509316448, "learning_rate": 2.8119518039258625e-05, "loss": 0.3117, "step": 16069 }, { "epoch": 1.9056089173485118, "grad_norm": 0.6697063949978745, "learning_rate": 2.8117136424146206e-05, "loss": 0.1682, "step": 16070 }, { "epoch": 1.9057274991106368, "grad_norm": 0.7343773518141312, "learning_rate": 2.8114754780297608e-05, "loss": 0.1586, "step": 16071 }, { "epoch": 1.9058460808727617, "grad_norm": 0.8926253964800768, "learning_rate": 2.811237310773479e-05, "loss": 0.1592, "step": 16072 }, { "epoch": 1.9059646626348867, "grad_norm": 0.8531225239816421, "learning_rate": 2.8109991406479713e-05, "loss": 0.2018, "step": 16073 }, { "epoch": 1.9060832443970117, "grad_norm": 0.9085552246567482, "learning_rate": 2.8107609676554325e-05, "loss": 0.1931, "step": 16074 }, { "epoch": 1.9062018261591367, "grad_norm": 0.8172230014717179, "learning_rate": 2.8105227917980587e-05, "loss": 0.2137, "step": 16075 }, { "epoch": 1.9063204079212617, "grad_norm": 0.8790515706738277, "learning_rate": 2.8102846130780458e-05, "loss": 0.1706, "step": 16076 }, { "epoch": 1.9064389896833867, "grad_norm": 1.0129113430702423, "learning_rate": 2.8100464314975887e-05, "loss": 0.2222, "step": 16077 }, { "epoch": 1.9065575714455116, "grad_norm": 0.8923724634131934, "learning_rate": 2.8098082470588838e-05, "loss": 0.193, "step": 16078 }, { "epoch": 1.9066761532076366, "grad_norm": 0.9312261026954777, "learning_rate": 2.8095700597641273e-05, "loss": 0.2639, "step": 16079 }, { "epoch": 1.9067947349697616, "grad_norm": 0.8689916871828128, "learning_rate": 2.809331869615514e-05, "loss": 0.177, "step": 16080 }, { "epoch": 1.9069133167318868, "grad_norm": 0.9137513793053651, "learning_rate": 2.8090936766152408e-05, "loss": 0.203, "step": 16081 }, { "epoch": 1.9070318984940116, "grad_norm": 1.2199647432285512, "learning_rate": 2.808855480765502e-05, "loss": 0.2628, "step": 16082 }, { "epoch": 1.9071504802561368, "grad_norm": 0.933837232059193, "learning_rate": 2.8086172820684953e-05, "loss": 0.1937, "step": 16083 }, { "epoch": 1.9072690620182615, "grad_norm": 1.5565611947789477, "learning_rate": 2.8083790805264153e-05, "loss": 0.382, "step": 16084 }, { "epoch": 1.9073876437803867, "grad_norm": 1.8211068949077909, "learning_rate": 2.808140876141458e-05, "loss": 0.217, "step": 16085 }, { "epoch": 1.9075062255425115, "grad_norm": 1.0398461009835385, "learning_rate": 2.8079026689158204e-05, "loss": 0.16, "step": 16086 }, { "epoch": 1.9076248073046367, "grad_norm": 0.890194574819876, "learning_rate": 2.8076644588516976e-05, "loss": 0.2144, "step": 16087 }, { "epoch": 1.9077433890667614, "grad_norm": 0.6436168850439142, "learning_rate": 2.8074262459512852e-05, "loss": 0.1569, "step": 16088 }, { "epoch": 1.9078619708288866, "grad_norm": 0.8199967753319074, "learning_rate": 2.8071880302167803e-05, "loss": 0.1946, "step": 16089 }, { "epoch": 1.9079805525910114, "grad_norm": 0.9516529419926232, "learning_rate": 2.8069498116503785e-05, "loss": 0.2559, "step": 16090 }, { "epoch": 1.9080991343531366, "grad_norm": 0.8184937588329757, "learning_rate": 2.8067115902542755e-05, "loss": 0.2049, "step": 16091 }, { "epoch": 1.9082177161152614, "grad_norm": 0.8665811718101377, "learning_rate": 2.806473366030668e-05, "loss": 0.1907, "step": 16092 }, { "epoch": 1.9083362978773866, "grad_norm": 0.7865269074420737, "learning_rate": 2.8062351389817522e-05, "loss": 0.157, "step": 16093 }, { "epoch": 1.9084548796395113, "grad_norm": 0.8422021674817751, "learning_rate": 2.805996909109724e-05, "loss": 0.1768, "step": 16094 }, { "epoch": 1.9085734614016365, "grad_norm": 0.625207645975702, "learning_rate": 2.8057586764167787e-05, "loss": 0.1418, "step": 16095 }, { "epoch": 1.9086920431637613, "grad_norm": 1.7619033950165237, "learning_rate": 2.8055204409051133e-05, "loss": 0.3946, "step": 16096 }, { "epoch": 1.9088106249258865, "grad_norm": 0.8587232766951006, "learning_rate": 2.8052822025769243e-05, "loss": 0.1947, "step": 16097 }, { "epoch": 1.9089292066880112, "grad_norm": 0.6993821636809319, "learning_rate": 2.8050439614344088e-05, "loss": 0.1408, "step": 16098 }, { "epoch": 1.9090477884501365, "grad_norm": 0.7958806874270875, "learning_rate": 2.804805717479761e-05, "loss": 0.1337, "step": 16099 }, { "epoch": 1.9091663702122612, "grad_norm": 1.1804482701163845, "learning_rate": 2.8045674707151782e-05, "loss": 0.2458, "step": 16100 }, { "epoch": 1.9092849519743864, "grad_norm": 1.2110464888748813, "learning_rate": 2.8043292211428573e-05, "loss": 0.2419, "step": 16101 }, { "epoch": 1.9094035337365112, "grad_norm": 0.5511865009157908, "learning_rate": 2.804090968764994e-05, "loss": 0.1245, "step": 16102 }, { "epoch": 1.9095221154986364, "grad_norm": 0.8541879030060875, "learning_rate": 2.803852713583785e-05, "loss": 0.185, "step": 16103 }, { "epoch": 1.9096406972607611, "grad_norm": 0.778221182153507, "learning_rate": 2.8036144556014264e-05, "loss": 0.1511, "step": 16104 }, { "epoch": 1.9097592790228863, "grad_norm": 0.48146535588377526, "learning_rate": 2.8033761948201152e-05, "loss": 0.1201, "step": 16105 }, { "epoch": 1.9098778607850113, "grad_norm": 0.7438181213533918, "learning_rate": 2.803137931242047e-05, "loss": 0.1298, "step": 16106 }, { "epoch": 1.9099964425471363, "grad_norm": 0.6145567434063876, "learning_rate": 2.802899664869419e-05, "loss": 0.1278, "step": 16107 }, { "epoch": 1.9101150243092613, "grad_norm": 0.7711216492703693, "learning_rate": 2.8026613957044274e-05, "loss": 0.186, "step": 16108 }, { "epoch": 1.9102336060713863, "grad_norm": 0.7507813491788091, "learning_rate": 2.8024231237492693e-05, "loss": 0.1742, "step": 16109 }, { "epoch": 1.9103521878335112, "grad_norm": 1.1401612682928808, "learning_rate": 2.8021848490061403e-05, "loss": 0.2296, "step": 16110 }, { "epoch": 1.9104707695956362, "grad_norm": 0.8996888377989521, "learning_rate": 2.801946571477238e-05, "loss": 0.1982, "step": 16111 }, { "epoch": 1.9105893513577612, "grad_norm": 1.1027614513473663, "learning_rate": 2.8017082911647586e-05, "loss": 0.2377, "step": 16112 }, { "epoch": 1.9107079331198862, "grad_norm": 0.8095909958891714, "learning_rate": 2.801470008070899e-05, "loss": 0.1646, "step": 16113 }, { "epoch": 1.9108265148820112, "grad_norm": 1.234732318365986, "learning_rate": 2.8012317221978546e-05, "loss": 0.2455, "step": 16114 }, { "epoch": 1.9109450966441361, "grad_norm": 0.8875182616985962, "learning_rate": 2.8009934335478238e-05, "loss": 0.165, "step": 16115 }, { "epoch": 1.9110636784062611, "grad_norm": 1.042240090624925, "learning_rate": 2.800755142123003e-05, "loss": 0.2305, "step": 16116 }, { "epoch": 1.911182260168386, "grad_norm": 0.9209188015542018, "learning_rate": 2.8005168479255876e-05, "loss": 0.1726, "step": 16117 }, { "epoch": 1.911300841930511, "grad_norm": 0.861622845146542, "learning_rate": 2.8002785509577757e-05, "loss": 0.1931, "step": 16118 }, { "epoch": 1.911419423692636, "grad_norm": 0.7272874492035702, "learning_rate": 2.8000402512217638e-05, "loss": 0.176, "step": 16119 }, { "epoch": 1.911538005454761, "grad_norm": 1.0042781627306168, "learning_rate": 2.7998019487197492e-05, "loss": 0.218, "step": 16120 }, { "epoch": 1.911656587216886, "grad_norm": 0.8086556569412204, "learning_rate": 2.799563643453928e-05, "loss": 0.1775, "step": 16121 }, { "epoch": 1.911775168979011, "grad_norm": 0.7291045710169852, "learning_rate": 2.7993253354264974e-05, "loss": 0.1632, "step": 16122 }, { "epoch": 1.911893750741136, "grad_norm": 1.139694594633981, "learning_rate": 2.799087024639654e-05, "loss": 0.262, "step": 16123 }, { "epoch": 1.912012332503261, "grad_norm": 0.8419922375736456, "learning_rate": 2.798848711095596e-05, "loss": 0.1626, "step": 16124 }, { "epoch": 1.912130914265386, "grad_norm": 0.7207823124881841, "learning_rate": 2.798610394796518e-05, "loss": 0.1514, "step": 16125 }, { "epoch": 1.912249496027511, "grad_norm": 0.6956671379823878, "learning_rate": 2.7983720757446185e-05, "loss": 0.1333, "step": 16126 }, { "epoch": 1.912368077789636, "grad_norm": 1.2361873183822614, "learning_rate": 2.798133753942095e-05, "loss": 0.2139, "step": 16127 }, { "epoch": 1.912486659551761, "grad_norm": 0.8577699022485038, "learning_rate": 2.797895429391143e-05, "loss": 0.1488, "step": 16128 }, { "epoch": 1.9126052413138859, "grad_norm": 1.391498602024598, "learning_rate": 2.7976571020939616e-05, "loss": 0.2373, "step": 16129 }, { "epoch": 1.912723823076011, "grad_norm": 0.6319294542090831, "learning_rate": 2.797418772052746e-05, "loss": 0.1185, "step": 16130 }, { "epoch": 1.9128424048381358, "grad_norm": 0.9270571237828035, "learning_rate": 2.797180439269695e-05, "loss": 0.1995, "step": 16131 }, { "epoch": 1.912960986600261, "grad_norm": 0.8273926286504675, "learning_rate": 2.7969421037470035e-05, "loss": 0.1624, "step": 16132 }, { "epoch": 1.9130795683623858, "grad_norm": 0.8004937178560668, "learning_rate": 2.796703765486871e-05, "loss": 0.1331, "step": 16133 }, { "epoch": 1.913198150124511, "grad_norm": 0.9043294698160846, "learning_rate": 2.796465424491493e-05, "loss": 0.1962, "step": 16134 }, { "epoch": 1.9133167318866358, "grad_norm": 1.0080642095058048, "learning_rate": 2.7962270807630675e-05, "loss": 0.1884, "step": 16135 }, { "epoch": 1.913435313648761, "grad_norm": 1.0460051698117656, "learning_rate": 2.795988734303791e-05, "loss": 0.1669, "step": 16136 }, { "epoch": 1.9135538954108857, "grad_norm": 0.7882870146105752, "learning_rate": 2.7957503851158617e-05, "loss": 0.1595, "step": 16137 }, { "epoch": 1.913672477173011, "grad_norm": 0.7678024661410756, "learning_rate": 2.7955120332014768e-05, "loss": 0.1526, "step": 16138 }, { "epoch": 1.9137910589351357, "grad_norm": 0.7940783805265607, "learning_rate": 2.7952736785628337e-05, "loss": 0.1555, "step": 16139 }, { "epoch": 1.9139096406972609, "grad_norm": 0.775496185015277, "learning_rate": 2.7950353212021285e-05, "loss": 0.1746, "step": 16140 }, { "epoch": 1.9140282224593856, "grad_norm": 0.9983417367474882, "learning_rate": 2.7947969611215603e-05, "loss": 0.2126, "step": 16141 }, { "epoch": 1.9141468042215108, "grad_norm": 1.4932645257275636, "learning_rate": 2.7945585983233248e-05, "loss": 0.2831, "step": 16142 }, { "epoch": 1.9142653859836356, "grad_norm": 0.7621200842308553, "learning_rate": 2.7943202328096202e-05, "loss": 0.1521, "step": 16143 }, { "epoch": 1.9143839677457608, "grad_norm": 0.7917148857111221, "learning_rate": 2.7940818645826445e-05, "loss": 0.16, "step": 16144 }, { "epoch": 1.9145025495078856, "grad_norm": 0.8528245200908727, "learning_rate": 2.7938434936445945e-05, "loss": 0.1503, "step": 16145 }, { "epoch": 1.9146211312700108, "grad_norm": 0.8898581154357232, "learning_rate": 2.7936051199976677e-05, "loss": 0.1662, "step": 16146 }, { "epoch": 1.9147397130321355, "grad_norm": 0.843507441673002, "learning_rate": 2.7933667436440614e-05, "loss": 0.1982, "step": 16147 }, { "epoch": 1.9148582947942607, "grad_norm": 1.2043005590326452, "learning_rate": 2.793128364585974e-05, "loss": 0.2122, "step": 16148 }, { "epoch": 1.9149768765563855, "grad_norm": 0.7479489636339709, "learning_rate": 2.7928899828256023e-05, "loss": 0.1456, "step": 16149 }, { "epoch": 1.9150954583185107, "grad_norm": 0.8522259276881368, "learning_rate": 2.7926515983651446e-05, "loss": 0.1571, "step": 16150 }, { "epoch": 1.9152140400806354, "grad_norm": 0.742339807552376, "learning_rate": 2.7924132112067975e-05, "loss": 0.1338, "step": 16151 }, { "epoch": 1.9153326218427607, "grad_norm": 0.5601343493911843, "learning_rate": 2.79217482135276e-05, "loss": 0.1209, "step": 16152 }, { "epoch": 1.9154512036048854, "grad_norm": 0.8721545934293266, "learning_rate": 2.791936428805228e-05, "loss": 0.1909, "step": 16153 }, { "epoch": 1.9155697853670106, "grad_norm": 1.2990035213005926, "learning_rate": 2.7916980335664004e-05, "loss": 0.2637, "step": 16154 }, { "epoch": 1.9156883671291356, "grad_norm": 1.1676615106970518, "learning_rate": 2.7914596356384746e-05, "loss": 0.288, "step": 16155 }, { "epoch": 1.9158069488912606, "grad_norm": 0.8729485062290261, "learning_rate": 2.7912212350236482e-05, "loss": 0.1356, "step": 16156 }, { "epoch": 1.9159255306533856, "grad_norm": 0.8488522997522174, "learning_rate": 2.7909828317241193e-05, "loss": 0.2164, "step": 16157 }, { "epoch": 1.9160441124155105, "grad_norm": 0.8463766653486496, "learning_rate": 2.7907444257420855e-05, "loss": 0.1466, "step": 16158 }, { "epoch": 1.9161626941776355, "grad_norm": 0.7733224450732947, "learning_rate": 2.7905060170797447e-05, "loss": 0.1648, "step": 16159 }, { "epoch": 1.9162812759397605, "grad_norm": 0.7455826895422575, "learning_rate": 2.7902676057392947e-05, "loss": 0.1582, "step": 16160 }, { "epoch": 1.9163998577018855, "grad_norm": 1.004755159301484, "learning_rate": 2.790029191722934e-05, "loss": 0.1945, "step": 16161 }, { "epoch": 1.9165184394640105, "grad_norm": 0.8997574366034471, "learning_rate": 2.789790775032859e-05, "loss": 0.2093, "step": 16162 }, { "epoch": 1.9166370212261354, "grad_norm": 0.584404851309574, "learning_rate": 2.7895523556712683e-05, "loss": 0.122, "step": 16163 }, { "epoch": 1.9167556029882604, "grad_norm": 1.1121063598050815, "learning_rate": 2.78931393364036e-05, "loss": 0.1659, "step": 16164 }, { "epoch": 1.9168741847503854, "grad_norm": 0.8318585382515318, "learning_rate": 2.7890755089423326e-05, "loss": 0.1671, "step": 16165 }, { "epoch": 1.9169927665125104, "grad_norm": 0.8429016001991769, "learning_rate": 2.788837081579383e-05, "loss": 0.1914, "step": 16166 }, { "epoch": 1.9171113482746354, "grad_norm": 0.90819520035403, "learning_rate": 2.7885986515537095e-05, "loss": 0.1969, "step": 16167 }, { "epoch": 1.9172299300367603, "grad_norm": 0.7719036826497917, "learning_rate": 2.7883602188675106e-05, "loss": 0.1331, "step": 16168 }, { "epoch": 1.9173485117988853, "grad_norm": 0.7943196418705168, "learning_rate": 2.7881217835229844e-05, "loss": 0.1693, "step": 16169 }, { "epoch": 1.9174670935610103, "grad_norm": 1.296083152250887, "learning_rate": 2.787883345522328e-05, "loss": 0.3394, "step": 16170 }, { "epoch": 1.9175856753231353, "grad_norm": 0.8288673859703468, "learning_rate": 2.7876449048677405e-05, "loss": 0.1611, "step": 16171 }, { "epoch": 1.9177042570852603, "grad_norm": 1.1990628487852457, "learning_rate": 2.7874064615614204e-05, "loss": 0.3308, "step": 16172 }, { "epoch": 1.9178228388473852, "grad_norm": 1.0815767046385982, "learning_rate": 2.787168015605564e-05, "loss": 0.2013, "step": 16173 }, { "epoch": 1.9179414206095102, "grad_norm": 0.9651252540753816, "learning_rate": 2.7869295670023716e-05, "loss": 0.2164, "step": 16174 }, { "epoch": 1.9180600023716352, "grad_norm": 0.9239879289641345, "learning_rate": 2.78669111575404e-05, "loss": 0.2066, "step": 16175 }, { "epoch": 1.9181785841337602, "grad_norm": 0.6745882885422091, "learning_rate": 2.786452661862768e-05, "loss": 0.1188, "step": 16176 }, { "epoch": 1.9182971658958852, "grad_norm": 0.904872701022246, "learning_rate": 2.7862142053307534e-05, "loss": 0.2105, "step": 16177 }, { "epoch": 1.9184157476580102, "grad_norm": 0.8668962811703257, "learning_rate": 2.7859757461601948e-05, "loss": 0.1646, "step": 16178 }, { "epoch": 1.9185343294201351, "grad_norm": 1.2469100481072897, "learning_rate": 2.7857372843532908e-05, "loss": 0.2368, "step": 16179 }, { "epoch": 1.9186529111822601, "grad_norm": 0.8283680587361831, "learning_rate": 2.78549881991224e-05, "loss": 0.1554, "step": 16180 }, { "epoch": 1.9187714929443853, "grad_norm": 0.5693712086414155, "learning_rate": 2.785260352839239e-05, "loss": 0.099, "step": 16181 }, { "epoch": 1.91889007470651, "grad_norm": 0.7993986978775055, "learning_rate": 2.7850218831364883e-05, "loss": 0.1769, "step": 16182 }, { "epoch": 1.9190086564686353, "grad_norm": 0.8110727253418308, "learning_rate": 2.784783410806185e-05, "loss": 0.1218, "step": 16183 }, { "epoch": 1.91912723823076, "grad_norm": 1.1593327078399485, "learning_rate": 2.7845449358505282e-05, "loss": 0.2755, "step": 16184 }, { "epoch": 1.9192458199928852, "grad_norm": 0.6967215943401495, "learning_rate": 2.7843064582717156e-05, "loss": 0.1197, "step": 16185 }, { "epoch": 1.91936440175501, "grad_norm": 0.6688461795638877, "learning_rate": 2.7840679780719458e-05, "loss": 0.169, "step": 16186 }, { "epoch": 1.9194829835171352, "grad_norm": 0.9282752426252121, "learning_rate": 2.7838294952534178e-05, "loss": 0.1875, "step": 16187 }, { "epoch": 1.91960156527926, "grad_norm": 1.2104754215557056, "learning_rate": 2.78359100981833e-05, "loss": 0.2421, "step": 16188 }, { "epoch": 1.9197201470413852, "grad_norm": 0.6172802592106619, "learning_rate": 2.783352521768881e-05, "loss": 0.1522, "step": 16189 }, { "epoch": 1.91983872880351, "grad_norm": 0.823135150093449, "learning_rate": 2.783114031107269e-05, "loss": 0.1952, "step": 16190 }, { "epoch": 1.9199573105656351, "grad_norm": 0.9681224463951311, "learning_rate": 2.7828755378356936e-05, "loss": 0.1735, "step": 16191 }, { "epoch": 1.9200758923277599, "grad_norm": 0.8974201362752957, "learning_rate": 2.782637041956352e-05, "loss": 0.1991, "step": 16192 }, { "epoch": 1.920194474089885, "grad_norm": 1.0214410813564399, "learning_rate": 2.7823985434714434e-05, "loss": 0.2207, "step": 16193 }, { "epoch": 1.9203130558520098, "grad_norm": 0.9555008758797051, "learning_rate": 2.7821600423831663e-05, "loss": 0.1818, "step": 16194 }, { "epoch": 1.920431637614135, "grad_norm": 0.8925409387494988, "learning_rate": 2.7819215386937197e-05, "loss": 0.1422, "step": 16195 }, { "epoch": 1.9205502193762598, "grad_norm": 0.711487173754342, "learning_rate": 2.7816830324053023e-05, "loss": 0.123, "step": 16196 }, { "epoch": 1.920668801138385, "grad_norm": 0.9109872756109001, "learning_rate": 2.781444523520113e-05, "loss": 0.1688, "step": 16197 }, { "epoch": 1.9207873829005098, "grad_norm": 0.8727054658235036, "learning_rate": 2.7812060120403506e-05, "loss": 0.2047, "step": 16198 }, { "epoch": 1.920905964662635, "grad_norm": 0.8524400671780192, "learning_rate": 2.7809674979682133e-05, "loss": 0.1639, "step": 16199 }, { "epoch": 1.9210245464247597, "grad_norm": 0.8243196618860221, "learning_rate": 2.7807289813059002e-05, "loss": 0.1829, "step": 16200 }, { "epoch": 1.921143128186885, "grad_norm": 1.1584271819529106, "learning_rate": 2.7804904620556093e-05, "loss": 0.3051, "step": 16201 }, { "epoch": 1.9212617099490097, "grad_norm": 0.908924212849637, "learning_rate": 2.7802519402195416e-05, "loss": 0.203, "step": 16202 }, { "epoch": 1.921380291711135, "grad_norm": 0.6854458430119058, "learning_rate": 2.780013415799894e-05, "loss": 0.1246, "step": 16203 }, { "epoch": 1.9214988734732596, "grad_norm": 1.2569344739811312, "learning_rate": 2.779774888798867e-05, "loss": 0.2272, "step": 16204 }, { "epoch": 1.9216174552353849, "grad_norm": 0.8902749432834192, "learning_rate": 2.7795363592186575e-05, "loss": 0.1718, "step": 16205 }, { "epoch": 1.9217360369975098, "grad_norm": 0.8104250291679126, "learning_rate": 2.7792978270614656e-05, "loss": 0.1681, "step": 16206 }, { "epoch": 1.9218546187596348, "grad_norm": 0.8557527707274022, "learning_rate": 2.77905929232949e-05, "loss": 0.1548, "step": 16207 }, { "epoch": 1.9219732005217598, "grad_norm": 0.7209200266438465, "learning_rate": 2.778820755024931e-05, "loss": 0.1817, "step": 16208 }, { "epoch": 1.9220917822838848, "grad_norm": 0.6200105035109764, "learning_rate": 2.7785822151499857e-05, "loss": 0.1178, "step": 16209 }, { "epoch": 1.9222103640460098, "grad_norm": 0.91659404515795, "learning_rate": 2.7783436727068546e-05, "loss": 0.2384, "step": 16210 }, { "epoch": 1.9223289458081347, "grad_norm": 1.0217718651794356, "learning_rate": 2.778105127697736e-05, "loss": 0.179, "step": 16211 }, { "epoch": 1.9224475275702597, "grad_norm": 0.7813068808972369, "learning_rate": 2.7778665801248292e-05, "loss": 0.2198, "step": 16212 }, { "epoch": 1.9225661093323847, "grad_norm": 0.625999128828941, "learning_rate": 2.777628029990333e-05, "loss": 0.1519, "step": 16213 }, { "epoch": 1.9226846910945097, "grad_norm": 0.9189648251183812, "learning_rate": 2.7773894772964465e-05, "loss": 0.2408, "step": 16214 }, { "epoch": 1.9228032728566347, "grad_norm": 0.9841323240862208, "learning_rate": 2.7771509220453702e-05, "loss": 0.2086, "step": 16215 }, { "epoch": 1.9229218546187596, "grad_norm": 0.8462774581570619, "learning_rate": 2.7769123642393014e-05, "loss": 0.2172, "step": 16216 }, { "epoch": 1.9230404363808846, "grad_norm": 0.9331086031740888, "learning_rate": 2.7766738038804402e-05, "loss": 0.1985, "step": 16217 }, { "epoch": 1.9231590181430096, "grad_norm": 1.0057817776703024, "learning_rate": 2.776435240970986e-05, "loss": 0.2721, "step": 16218 }, { "epoch": 1.9232775999051346, "grad_norm": 0.8457802998666191, "learning_rate": 2.776196675513138e-05, "loss": 0.1626, "step": 16219 }, { "epoch": 1.9233961816672596, "grad_norm": 0.6258755762034788, "learning_rate": 2.775958107509095e-05, "loss": 0.1316, "step": 16220 }, { "epoch": 1.9235147634293845, "grad_norm": 0.8279269491512927, "learning_rate": 2.7757195369610574e-05, "loss": 0.152, "step": 16221 }, { "epoch": 1.9236333451915095, "grad_norm": 0.7823081863674901, "learning_rate": 2.7754809638712233e-05, "loss": 0.1693, "step": 16222 }, { "epoch": 1.9237519269536345, "grad_norm": 0.7832562949403702, "learning_rate": 2.7752423882417927e-05, "loss": 0.159, "step": 16223 }, { "epoch": 1.9238705087157595, "grad_norm": 0.7446655525425439, "learning_rate": 2.7750038100749644e-05, "loss": 0.1868, "step": 16224 }, { "epoch": 1.9239890904778845, "grad_norm": 0.824811696438495, "learning_rate": 2.774765229372938e-05, "loss": 0.1411, "step": 16225 }, { "epoch": 1.9241076722400094, "grad_norm": 1.0288401744065663, "learning_rate": 2.7745266461379138e-05, "loss": 0.1877, "step": 16226 }, { "epoch": 1.9242262540021344, "grad_norm": 2.2810718985038356, "learning_rate": 2.7742880603720904e-05, "loss": 0.3693, "step": 16227 }, { "epoch": 1.9243448357642594, "grad_norm": 0.9043600279489694, "learning_rate": 2.7740494720776673e-05, "loss": 0.2176, "step": 16228 }, { "epoch": 1.9244634175263844, "grad_norm": 0.7822689191303283, "learning_rate": 2.773810881256844e-05, "loss": 0.1741, "step": 16229 }, { "epoch": 1.9245819992885096, "grad_norm": 0.6759252306295748, "learning_rate": 2.773572287911821e-05, "loss": 0.1609, "step": 16230 }, { "epoch": 1.9247005810506344, "grad_norm": 0.8943259057526366, "learning_rate": 2.7733336920447955e-05, "loss": 0.2505, "step": 16231 }, { "epoch": 1.9248191628127596, "grad_norm": 0.7325996518281909, "learning_rate": 2.7730950936579698e-05, "loss": 0.1261, "step": 16232 }, { "epoch": 1.9249377445748843, "grad_norm": 0.780282457224162, "learning_rate": 2.7728564927535415e-05, "loss": 0.1474, "step": 16233 }, { "epoch": 1.9250563263370095, "grad_norm": 0.7213809666798303, "learning_rate": 2.7726178893337117e-05, "loss": 0.1254, "step": 16234 }, { "epoch": 1.9251749080991343, "grad_norm": 0.6811780071561927, "learning_rate": 2.7723792834006785e-05, "loss": 0.1463, "step": 16235 }, { "epoch": 1.9252934898612595, "grad_norm": 1.2038537021479374, "learning_rate": 2.7721406749566426e-05, "loss": 0.2581, "step": 16236 }, { "epoch": 1.9254120716233842, "grad_norm": 1.0023742592863671, "learning_rate": 2.771902064003803e-05, "loss": 0.1929, "step": 16237 }, { "epoch": 1.9255306533855094, "grad_norm": 0.7024916222771815, "learning_rate": 2.7716634505443606e-05, "loss": 0.1254, "step": 16238 }, { "epoch": 1.9256492351476342, "grad_norm": 1.3766061571718644, "learning_rate": 2.771424834580514e-05, "loss": 0.2775, "step": 16239 }, { "epoch": 1.9257678169097594, "grad_norm": 0.9992837074834383, "learning_rate": 2.7711862161144638e-05, "loss": 0.2431, "step": 16240 }, { "epoch": 1.9258863986718842, "grad_norm": 1.1121760910769325, "learning_rate": 2.770947595148408e-05, "loss": 0.2449, "step": 16241 }, { "epoch": 1.9260049804340094, "grad_norm": 0.7659246361713892, "learning_rate": 2.7707089716845485e-05, "loss": 0.147, "step": 16242 }, { "epoch": 1.9261235621961341, "grad_norm": 0.9633508059454885, "learning_rate": 2.7704703457250842e-05, "loss": 0.231, "step": 16243 }, { "epoch": 1.9262421439582593, "grad_norm": 0.8816967274749952, "learning_rate": 2.770231717272214e-05, "loss": 0.133, "step": 16244 }, { "epoch": 1.926360725720384, "grad_norm": 0.7608586527519601, "learning_rate": 2.7699930863281405e-05, "loss": 0.189, "step": 16245 }, { "epoch": 1.9264793074825093, "grad_norm": 1.229595330220173, "learning_rate": 2.7697544528950602e-05, "loss": 0.2963, "step": 16246 }, { "epoch": 1.926597889244634, "grad_norm": 0.6485218449705971, "learning_rate": 2.769515816975175e-05, "loss": 0.1451, "step": 16247 }, { "epoch": 1.9267164710067592, "grad_norm": 0.7388957648250517, "learning_rate": 2.769277178570685e-05, "loss": 0.153, "step": 16248 }, { "epoch": 1.926835052768884, "grad_norm": 1.2232775068761184, "learning_rate": 2.7690385376837895e-05, "loss": 0.2992, "step": 16249 }, { "epoch": 1.9269536345310092, "grad_norm": 1.4506231048980174, "learning_rate": 2.768799894316688e-05, "loss": 0.2552, "step": 16250 }, { "epoch": 1.927072216293134, "grad_norm": 0.9179888122521316, "learning_rate": 2.768561248471582e-05, "loss": 0.215, "step": 16251 }, { "epoch": 1.9271907980552592, "grad_norm": 0.8323944848942463, "learning_rate": 2.7683226001506703e-05, "loss": 0.1726, "step": 16252 }, { "epoch": 1.927309379817384, "grad_norm": 0.584966044025122, "learning_rate": 2.768083949356154e-05, "loss": 0.134, "step": 16253 }, { "epoch": 1.9274279615795091, "grad_norm": 0.5383850519328199, "learning_rate": 2.7678452960902312e-05, "loss": 0.1309, "step": 16254 }, { "epoch": 1.927546543341634, "grad_norm": 0.7630141026978897, "learning_rate": 2.767606640355103e-05, "loss": 0.1489, "step": 16255 }, { "epoch": 1.927665125103759, "grad_norm": 0.6906362974429978, "learning_rate": 2.76736798215297e-05, "loss": 0.137, "step": 16256 }, { "epoch": 1.927783706865884, "grad_norm": 0.6907790982016173, "learning_rate": 2.7671293214860327e-05, "loss": 0.1554, "step": 16257 }, { "epoch": 1.927902288628009, "grad_norm": 0.6156634311355497, "learning_rate": 2.7668906583564906e-05, "loss": 0.1452, "step": 16258 }, { "epoch": 1.928020870390134, "grad_norm": 0.7227098232628193, "learning_rate": 2.7666519927665434e-05, "loss": 0.1679, "step": 16259 }, { "epoch": 1.928139452152259, "grad_norm": 1.0442215320130193, "learning_rate": 2.7664133247183928e-05, "loss": 0.164, "step": 16260 }, { "epoch": 1.928258033914384, "grad_norm": 1.0350632296924778, "learning_rate": 2.766174654214237e-05, "loss": 0.2158, "step": 16261 }, { "epoch": 1.928376615676509, "grad_norm": 0.7349667567042034, "learning_rate": 2.7659359812562778e-05, "loss": 0.1535, "step": 16262 }, { "epoch": 1.928495197438634, "grad_norm": 0.8860017394076909, "learning_rate": 2.765697305846715e-05, "loss": 0.1502, "step": 16263 }, { "epoch": 1.928613779200759, "grad_norm": 0.9147608470009988, "learning_rate": 2.7654586279877487e-05, "loss": 0.1662, "step": 16264 }, { "epoch": 1.928732360962884, "grad_norm": 0.7024595547643817, "learning_rate": 2.7652199476815795e-05, "loss": 0.135, "step": 16265 }, { "epoch": 1.928850942725009, "grad_norm": 0.9633473708628293, "learning_rate": 2.7649812649304073e-05, "loss": 0.1842, "step": 16266 }, { "epoch": 1.9289695244871339, "grad_norm": 0.9024210858686416, "learning_rate": 2.7647425797364325e-05, "loss": 0.2349, "step": 16267 }, { "epoch": 1.9290881062492589, "grad_norm": 0.6877292794218387, "learning_rate": 2.764503892101857e-05, "loss": 0.1005, "step": 16268 }, { "epoch": 1.9292066880113838, "grad_norm": 0.6653825674517339, "learning_rate": 2.7642652020288785e-05, "loss": 0.0959, "step": 16269 }, { "epoch": 1.9293252697735088, "grad_norm": 0.6476084560822362, "learning_rate": 2.7640265095196997e-05, "loss": 0.1598, "step": 16270 }, { "epoch": 1.9294438515356338, "grad_norm": 0.9167282191087206, "learning_rate": 2.7637878145765207e-05, "loss": 0.1586, "step": 16271 }, { "epoch": 1.9295624332977588, "grad_norm": 0.7935845040873356, "learning_rate": 2.7635491172015406e-05, "loss": 0.1277, "step": 16272 }, { "epoch": 1.9296810150598838, "grad_norm": 0.7973014115427465, "learning_rate": 2.7633104173969616e-05, "loss": 0.1635, "step": 16273 }, { "epoch": 1.9297995968220087, "grad_norm": 0.859827517265893, "learning_rate": 2.7630717151649827e-05, "loss": 0.1893, "step": 16274 }, { "epoch": 1.9299181785841337, "grad_norm": 0.8105651419958483, "learning_rate": 2.7628330105078055e-05, "loss": 0.1934, "step": 16275 }, { "epoch": 1.9300367603462587, "grad_norm": 1.1439449091997902, "learning_rate": 2.7625943034276296e-05, "loss": 0.2314, "step": 16276 }, { "epoch": 1.9301553421083837, "grad_norm": 0.7478391658524234, "learning_rate": 2.7623555939266576e-05, "loss": 0.1687, "step": 16277 }, { "epoch": 1.9302739238705087, "grad_norm": 0.7457222992299454, "learning_rate": 2.7621168820070874e-05, "loss": 0.1612, "step": 16278 }, { "epoch": 1.9303925056326336, "grad_norm": 0.7934664126178257, "learning_rate": 2.761878167671122e-05, "loss": 0.145, "step": 16279 }, { "epoch": 1.9305110873947586, "grad_norm": 0.6970576012944704, "learning_rate": 2.761639450920961e-05, "loss": 0.1423, "step": 16280 }, { "epoch": 1.9306296691568838, "grad_norm": 0.8957741567654607, "learning_rate": 2.7614007317588047e-05, "loss": 0.1959, "step": 16281 }, { "epoch": 1.9307482509190086, "grad_norm": 1.1135977631533982, "learning_rate": 2.761162010186854e-05, "loss": 0.2122, "step": 16282 }, { "epoch": 1.9308668326811338, "grad_norm": 0.8975288745863577, "learning_rate": 2.7609232862073102e-05, "loss": 0.1304, "step": 16283 }, { "epoch": 1.9309854144432586, "grad_norm": 0.5208941876843337, "learning_rate": 2.7606845598223735e-05, "loss": 0.1063, "step": 16284 }, { "epoch": 1.9311039962053838, "grad_norm": 0.8103647574368822, "learning_rate": 2.760445831034245e-05, "loss": 0.1761, "step": 16285 }, { "epoch": 1.9312225779675085, "grad_norm": 0.8965377710269227, "learning_rate": 2.7602070998451253e-05, "loss": 0.2241, "step": 16286 }, { "epoch": 1.9313411597296337, "grad_norm": 0.9799503981826664, "learning_rate": 2.7599683662572146e-05, "loss": 0.2059, "step": 16287 }, { "epoch": 1.9314597414917585, "grad_norm": 0.915503872099355, "learning_rate": 2.7597296302727156e-05, "loss": 0.1826, "step": 16288 }, { "epoch": 1.9315783232538837, "grad_norm": 0.7785380510675531, "learning_rate": 2.7594908918938266e-05, "loss": 0.1673, "step": 16289 }, { "epoch": 1.9316969050160084, "grad_norm": 0.8819746465338293, "learning_rate": 2.759252151122751e-05, "loss": 0.1671, "step": 16290 }, { "epoch": 1.9318154867781336, "grad_norm": 0.7871891322657373, "learning_rate": 2.7590134079616874e-05, "loss": 0.1519, "step": 16291 }, { "epoch": 1.9319340685402584, "grad_norm": 1.0679534426282065, "learning_rate": 2.7587746624128386e-05, "loss": 0.2357, "step": 16292 }, { "epoch": 1.9320526503023836, "grad_norm": 0.7089512577965378, "learning_rate": 2.758535914478404e-05, "loss": 0.1529, "step": 16293 }, { "epoch": 1.9321712320645084, "grad_norm": 0.6609916788809255, "learning_rate": 2.7582971641605866e-05, "loss": 0.1268, "step": 16294 }, { "epoch": 1.9322898138266336, "grad_norm": 0.9383666713250818, "learning_rate": 2.7580584114615848e-05, "loss": 0.2337, "step": 16295 }, { "epoch": 1.9324083955887583, "grad_norm": 0.7061799334726571, "learning_rate": 2.757819656383601e-05, "loss": 0.1436, "step": 16296 }, { "epoch": 1.9325269773508835, "grad_norm": 0.6585925590992556, "learning_rate": 2.7575808989288366e-05, "loss": 0.1569, "step": 16297 }, { "epoch": 1.9326455591130083, "grad_norm": 0.943007915627132, "learning_rate": 2.7573421390994918e-05, "loss": 0.2347, "step": 16298 }, { "epoch": 1.9327641408751335, "grad_norm": 0.9839469387985489, "learning_rate": 2.7571033768977683e-05, "loss": 0.2285, "step": 16299 }, { "epoch": 1.9328827226372582, "grad_norm": 0.9785911075820312, "learning_rate": 2.7568646123258667e-05, "loss": 0.169, "step": 16300 }, { "epoch": 1.9330013043993834, "grad_norm": 0.817502052879532, "learning_rate": 2.756625845385989e-05, "loss": 0.1595, "step": 16301 }, { "epoch": 1.9331198861615082, "grad_norm": 0.8998275141317014, "learning_rate": 2.756387076080335e-05, "loss": 0.2235, "step": 16302 }, { "epoch": 1.9332384679236334, "grad_norm": 0.7354614494511376, "learning_rate": 2.7561483044111074e-05, "loss": 0.1375, "step": 16303 }, { "epoch": 1.9333570496857582, "grad_norm": 0.8520295644481909, "learning_rate": 2.7559095303805055e-05, "loss": 0.1849, "step": 16304 }, { "epoch": 1.9334756314478834, "grad_norm": 0.6540354108118182, "learning_rate": 2.7556707539907317e-05, "loss": 0.1353, "step": 16305 }, { "epoch": 1.9335942132100084, "grad_norm": 0.9418615238307455, "learning_rate": 2.7554319752439872e-05, "loss": 0.1738, "step": 16306 }, { "epoch": 1.9337127949721333, "grad_norm": 1.0894504445429882, "learning_rate": 2.755193194142474e-05, "loss": 0.2056, "step": 16307 }, { "epoch": 1.9338313767342583, "grad_norm": 0.6648006297971638, "learning_rate": 2.7549544106883918e-05, "loss": 0.1707, "step": 16308 }, { "epoch": 1.9339499584963833, "grad_norm": 0.7231447155144718, "learning_rate": 2.7547156248839427e-05, "loss": 0.1509, "step": 16309 }, { "epoch": 1.9340685402585083, "grad_norm": 0.8173416581334517, "learning_rate": 2.754476836731328e-05, "loss": 0.1494, "step": 16310 }, { "epoch": 1.9341871220206333, "grad_norm": 1.6449647292361331, "learning_rate": 2.7542380462327487e-05, "loss": 0.3053, "step": 16311 }, { "epoch": 1.9343057037827582, "grad_norm": 1.0173777606578023, "learning_rate": 2.7539992533904062e-05, "loss": 0.1936, "step": 16312 }, { "epoch": 1.9344242855448832, "grad_norm": 0.7012588044654365, "learning_rate": 2.7537604582065025e-05, "loss": 0.1641, "step": 16313 }, { "epoch": 1.9345428673070082, "grad_norm": 0.5724858468951916, "learning_rate": 2.753521660683238e-05, "loss": 0.1414, "step": 16314 }, { "epoch": 1.9346614490691332, "grad_norm": 0.8835845925655845, "learning_rate": 2.753282860822815e-05, "loss": 0.1482, "step": 16315 }, { "epoch": 1.9347800308312582, "grad_norm": 0.8625225129654933, "learning_rate": 2.753044058627434e-05, "loss": 0.1614, "step": 16316 }, { "epoch": 1.9348986125933831, "grad_norm": 0.8309879299801034, "learning_rate": 2.7528052540992978e-05, "loss": 0.1926, "step": 16317 }, { "epoch": 1.9350171943555081, "grad_norm": 0.7142398825488963, "learning_rate": 2.752566447240607e-05, "loss": 0.1607, "step": 16318 }, { "epoch": 1.935135776117633, "grad_norm": 0.9482058059125964, "learning_rate": 2.7523276380535636e-05, "loss": 0.2098, "step": 16319 }, { "epoch": 1.935254357879758, "grad_norm": 0.759187084389351, "learning_rate": 2.752088826540369e-05, "loss": 0.153, "step": 16320 }, { "epoch": 1.935372939641883, "grad_norm": 0.6458785358564313, "learning_rate": 2.7518500127032236e-05, "loss": 0.1448, "step": 16321 }, { "epoch": 1.935491521404008, "grad_norm": 0.728189357666095, "learning_rate": 2.7516111965443307e-05, "loss": 0.1681, "step": 16322 }, { "epoch": 1.935610103166133, "grad_norm": 1.8419132328993884, "learning_rate": 2.7513723780658907e-05, "loss": 0.4463, "step": 16323 }, { "epoch": 1.935728684928258, "grad_norm": 0.8725323124622913, "learning_rate": 2.7511335572701057e-05, "loss": 0.1631, "step": 16324 }, { "epoch": 1.935847266690383, "grad_norm": 0.7931191431772464, "learning_rate": 2.7508947341591768e-05, "loss": 0.1502, "step": 16325 }, { "epoch": 1.935965848452508, "grad_norm": 0.9144589016597358, "learning_rate": 2.7506559087353074e-05, "loss": 0.185, "step": 16326 }, { "epoch": 1.936084430214633, "grad_norm": 0.9093104363529504, "learning_rate": 2.750417081000697e-05, "loss": 0.2361, "step": 16327 }, { "epoch": 1.936203011976758, "grad_norm": 0.7648873632389199, "learning_rate": 2.750178250957548e-05, "loss": 0.179, "step": 16328 }, { "epoch": 1.936321593738883, "grad_norm": 0.8858035491669537, "learning_rate": 2.749939418608063e-05, "loss": 0.1852, "step": 16329 }, { "epoch": 1.936440175501008, "grad_norm": 1.0528135687094582, "learning_rate": 2.7497005839544422e-05, "loss": 0.2213, "step": 16330 }, { "epoch": 1.9365587572631329, "grad_norm": 0.7139425373034957, "learning_rate": 2.749461746998889e-05, "loss": 0.1541, "step": 16331 }, { "epoch": 1.936677339025258, "grad_norm": 0.9323818551502069, "learning_rate": 2.7492229077436037e-05, "loss": 0.1848, "step": 16332 }, { "epoch": 1.9367959207873828, "grad_norm": 0.5688576485391438, "learning_rate": 2.7489840661907895e-05, "loss": 0.149, "step": 16333 }, { "epoch": 1.936914502549508, "grad_norm": 1.3937011772171348, "learning_rate": 2.7487452223426474e-05, "loss": 0.2975, "step": 16334 }, { "epoch": 1.9370330843116328, "grad_norm": 0.8599078377533141, "learning_rate": 2.748506376201379e-05, "loss": 0.1914, "step": 16335 }, { "epoch": 1.937151666073758, "grad_norm": 0.7272034967430947, "learning_rate": 2.7482675277691866e-05, "loss": 0.1618, "step": 16336 }, { "epoch": 1.9372702478358828, "grad_norm": 0.6125430315886855, "learning_rate": 2.7480286770482728e-05, "loss": 0.1284, "step": 16337 }, { "epoch": 1.937388829598008, "grad_norm": 0.9041990513005814, "learning_rate": 2.747789824040838e-05, "loss": 0.2463, "step": 16338 }, { "epoch": 1.9375074113601327, "grad_norm": 0.6705350577483852, "learning_rate": 2.7475509687490852e-05, "loss": 0.159, "step": 16339 }, { "epoch": 1.937625993122258, "grad_norm": 0.797215186118629, "learning_rate": 2.7473121111752154e-05, "loss": 0.2283, "step": 16340 }, { "epoch": 1.9377445748843827, "grad_norm": 0.8459022043963801, "learning_rate": 2.747073251321432e-05, "loss": 0.2047, "step": 16341 }, { "epoch": 1.9378631566465079, "grad_norm": 0.7823586784015667, "learning_rate": 2.7468343891899358e-05, "loss": 0.1895, "step": 16342 }, { "epoch": 1.9379817384086326, "grad_norm": 0.7764249086962549, "learning_rate": 2.746595524782929e-05, "loss": 0.1712, "step": 16343 }, { "epoch": 1.9381003201707578, "grad_norm": 0.6068356236344893, "learning_rate": 2.7463566581026146e-05, "loss": 0.1383, "step": 16344 }, { "epoch": 1.9382189019328826, "grad_norm": 0.6820415381653931, "learning_rate": 2.7461177891511936e-05, "loss": 0.1263, "step": 16345 }, { "epoch": 1.9383374836950078, "grad_norm": 0.8096432485707196, "learning_rate": 2.745878917930868e-05, "loss": 0.1641, "step": 16346 }, { "epoch": 1.9384560654571326, "grad_norm": 0.6511095450026053, "learning_rate": 2.74564004444384e-05, "loss": 0.1313, "step": 16347 }, { "epoch": 1.9385746472192578, "grad_norm": 0.6772779065713262, "learning_rate": 2.7454011686923132e-05, "loss": 0.1762, "step": 16348 }, { "epoch": 1.9386932289813825, "grad_norm": 0.6253354929339672, "learning_rate": 2.745162290678488e-05, "loss": 0.1172, "step": 16349 }, { "epoch": 1.9388118107435077, "grad_norm": 0.7082450791529888, "learning_rate": 2.7449234104045673e-05, "loss": 0.1386, "step": 16350 }, { "epoch": 1.9389303925056325, "grad_norm": 1.1146440664974027, "learning_rate": 2.7446845278727524e-05, "loss": 0.1855, "step": 16351 }, { "epoch": 1.9390489742677577, "grad_norm": 0.7530766642796596, "learning_rate": 2.744445643085247e-05, "loss": 0.167, "step": 16352 }, { "epoch": 1.9391675560298824, "grad_norm": 0.8024650389847597, "learning_rate": 2.744206756044252e-05, "loss": 0.1425, "step": 16353 }, { "epoch": 1.9392861377920076, "grad_norm": 0.731304097706486, "learning_rate": 2.7439678667519704e-05, "loss": 0.1403, "step": 16354 }, { "epoch": 1.9394047195541326, "grad_norm": 0.8143238735796712, "learning_rate": 2.743728975210604e-05, "loss": 0.1612, "step": 16355 }, { "epoch": 1.9395233013162576, "grad_norm": 0.9743668475176798, "learning_rate": 2.7434900814223557e-05, "loss": 0.1774, "step": 16356 }, { "epoch": 1.9396418830783826, "grad_norm": 0.9130799474075061, "learning_rate": 2.7432511853894272e-05, "loss": 0.1582, "step": 16357 }, { "epoch": 1.9397604648405076, "grad_norm": 0.7128480324089543, "learning_rate": 2.7430122871140207e-05, "loss": 0.1285, "step": 16358 }, { "epoch": 1.9398790466026326, "grad_norm": 0.6083390742163582, "learning_rate": 2.74277338659834e-05, "loss": 0.1417, "step": 16359 }, { "epoch": 1.9399976283647575, "grad_norm": 0.6018195734062577, "learning_rate": 2.7425344838445853e-05, "loss": 0.1315, "step": 16360 }, { "epoch": 1.9401162101268825, "grad_norm": 0.7611954288820711, "learning_rate": 2.742295578854961e-05, "loss": 0.1466, "step": 16361 }, { "epoch": 1.9402347918890075, "grad_norm": 0.6788752366781177, "learning_rate": 2.742056671631668e-05, "loss": 0.1486, "step": 16362 }, { "epoch": 1.9403533736511325, "grad_norm": 0.785319084045297, "learning_rate": 2.7418177621769097e-05, "loss": 0.2194, "step": 16363 }, { "epoch": 1.9404719554132575, "grad_norm": 0.9991907507158211, "learning_rate": 2.741578850492888e-05, "loss": 0.199, "step": 16364 }, { "epoch": 1.9405905371753824, "grad_norm": 0.7381457393076227, "learning_rate": 2.7413399365818048e-05, "loss": 0.1919, "step": 16365 }, { "epoch": 1.9407091189375074, "grad_norm": 0.8117235577213168, "learning_rate": 2.741101020445864e-05, "loss": 0.134, "step": 16366 }, { "epoch": 1.9408277006996324, "grad_norm": 0.9458775134085682, "learning_rate": 2.7408621020872675e-05, "loss": 0.183, "step": 16367 }, { "epoch": 1.9409462824617574, "grad_norm": 1.058519450650208, "learning_rate": 2.7406231815082177e-05, "loss": 0.2543, "step": 16368 }, { "epoch": 1.9410648642238824, "grad_norm": 0.6233877642148292, "learning_rate": 2.7403842587109173e-05, "loss": 0.1255, "step": 16369 }, { "epoch": 1.9411834459860073, "grad_norm": 0.7173998967989395, "learning_rate": 2.740145333697569e-05, "loss": 0.1979, "step": 16370 }, { "epoch": 1.9413020277481323, "grad_norm": 0.9296352913349236, "learning_rate": 2.7399064064703744e-05, "loss": 0.1831, "step": 16371 }, { "epoch": 1.9414206095102573, "grad_norm": 0.8824401959389832, "learning_rate": 2.7396674770315378e-05, "loss": 0.1788, "step": 16372 }, { "epoch": 1.9415391912723823, "grad_norm": 0.7398227688437886, "learning_rate": 2.7394285453832608e-05, "loss": 0.1543, "step": 16373 }, { "epoch": 1.9416577730345073, "grad_norm": 0.7402187303636013, "learning_rate": 2.7391896115277464e-05, "loss": 0.141, "step": 16374 }, { "epoch": 1.9417763547966322, "grad_norm": 0.7574858219285326, "learning_rate": 2.738950675467197e-05, "loss": 0.1437, "step": 16375 }, { "epoch": 1.9418949365587572, "grad_norm": 0.746793427846761, "learning_rate": 2.7387117372038145e-05, "loss": 0.1776, "step": 16376 }, { "epoch": 1.9420135183208822, "grad_norm": 0.8147989342957679, "learning_rate": 2.7384727967398026e-05, "loss": 0.1737, "step": 16377 }, { "epoch": 1.9421321000830072, "grad_norm": 1.143061005927861, "learning_rate": 2.738233854077365e-05, "loss": 0.262, "step": 16378 }, { "epoch": 1.9422506818451322, "grad_norm": 0.9291367642948983, "learning_rate": 2.737994909218703e-05, "loss": 0.1923, "step": 16379 }, { "epoch": 1.9423692636072571, "grad_norm": 0.9030873210704913, "learning_rate": 2.73775596216602e-05, "loss": 0.2449, "step": 16380 }, { "epoch": 1.9424878453693823, "grad_norm": 1.0841538832186994, "learning_rate": 2.7375170129215177e-05, "loss": 0.1972, "step": 16381 }, { "epoch": 1.942606427131507, "grad_norm": 0.7069322663339878, "learning_rate": 2.7372780614874012e-05, "loss": 0.1707, "step": 16382 }, { "epoch": 1.9427250088936323, "grad_norm": 0.8454857553028744, "learning_rate": 2.737039107865871e-05, "loss": 0.1504, "step": 16383 }, { "epoch": 1.942843590655757, "grad_norm": 0.8624521796313256, "learning_rate": 2.7368001520591307e-05, "loss": 0.1926, "step": 16384 }, { "epoch": 1.9429621724178823, "grad_norm": 1.292689629658044, "learning_rate": 2.7365611940693837e-05, "loss": 0.16, "step": 16385 }, { "epoch": 1.943080754180007, "grad_norm": 0.7811231430400088, "learning_rate": 2.736322233898832e-05, "loss": 0.1778, "step": 16386 }, { "epoch": 1.9431993359421322, "grad_norm": 0.5827745965660086, "learning_rate": 2.7360832715496803e-05, "loss": 0.1215, "step": 16387 }, { "epoch": 1.943317917704257, "grad_norm": 0.9308467819178561, "learning_rate": 2.7358443070241298e-05, "loss": 0.2123, "step": 16388 }, { "epoch": 1.9434364994663822, "grad_norm": 0.9395802342889222, "learning_rate": 2.7356053403243843e-05, "loss": 0.2117, "step": 16389 }, { "epoch": 1.943555081228507, "grad_norm": 1.6406697478601848, "learning_rate": 2.7353663714526458e-05, "loss": 0.3047, "step": 16390 }, { "epoch": 1.9436736629906322, "grad_norm": 0.5318932204266572, "learning_rate": 2.7351274004111182e-05, "loss": 0.1398, "step": 16391 }, { "epoch": 1.943792244752757, "grad_norm": 0.7738884716323468, "learning_rate": 2.734888427202004e-05, "loss": 0.1594, "step": 16392 }, { "epoch": 1.9439108265148821, "grad_norm": 1.0457802250038821, "learning_rate": 2.734649451827508e-05, "loss": 0.2158, "step": 16393 }, { "epoch": 1.9440294082770069, "grad_norm": 0.6743533589752824, "learning_rate": 2.7344104742898302e-05, "loss": 0.1269, "step": 16394 }, { "epoch": 1.944147990039132, "grad_norm": 0.7698440110325474, "learning_rate": 2.7341714945911756e-05, "loss": 0.1598, "step": 16395 }, { "epoch": 1.9442665718012568, "grad_norm": 0.7582315730944709, "learning_rate": 2.7339325127337474e-05, "loss": 0.1571, "step": 16396 }, { "epoch": 1.944385153563382, "grad_norm": 0.8019494556277882, "learning_rate": 2.733693528719748e-05, "loss": 0.1554, "step": 16397 }, { "epoch": 1.9445037353255068, "grad_norm": 0.8127280077126118, "learning_rate": 2.7334545425513808e-05, "loss": 0.1673, "step": 16398 }, { "epoch": 1.944622317087632, "grad_norm": 1.210307901176505, "learning_rate": 2.7332155542308497e-05, "loss": 0.2264, "step": 16399 }, { "epoch": 1.9447408988497568, "grad_norm": 0.8485233655157708, "learning_rate": 2.7329765637603565e-05, "loss": 0.2118, "step": 16400 }, { "epoch": 1.944859480611882, "grad_norm": 0.9978498701023479, "learning_rate": 2.7327375711421048e-05, "loss": 0.17, "step": 16401 }, { "epoch": 1.9449780623740067, "grad_norm": 0.8669581288470144, "learning_rate": 2.732498576378299e-05, "loss": 0.1411, "step": 16402 }, { "epoch": 1.945096644136132, "grad_norm": 0.9519768887299245, "learning_rate": 2.73225957947114e-05, "loss": 0.2241, "step": 16403 }, { "epoch": 1.9452152258982567, "grad_norm": 0.8321379612408843, "learning_rate": 2.7320205804228337e-05, "loss": 0.1779, "step": 16404 }, { "epoch": 1.9453338076603819, "grad_norm": 1.1471754378922117, "learning_rate": 2.7317815792355817e-05, "loss": 0.2112, "step": 16405 }, { "epoch": 1.9454523894225069, "grad_norm": 0.7565426420030024, "learning_rate": 2.7315425759115885e-05, "loss": 0.1563, "step": 16406 }, { "epoch": 1.9455709711846318, "grad_norm": 0.8318162796998314, "learning_rate": 2.731303570453056e-05, "loss": 0.1345, "step": 16407 }, { "epoch": 1.9456895529467568, "grad_norm": 0.7572990844022272, "learning_rate": 2.7310645628621882e-05, "loss": 0.1688, "step": 16408 }, { "epoch": 1.9458081347088818, "grad_norm": 0.7020033291596982, "learning_rate": 2.7308255531411887e-05, "loss": 0.1214, "step": 16409 }, { "epoch": 1.9459267164710068, "grad_norm": 0.892332692321887, "learning_rate": 2.730586541292261e-05, "loss": 0.23, "step": 16410 }, { "epoch": 1.9460452982331318, "grad_norm": 0.6313933593535302, "learning_rate": 2.7303475273176076e-05, "loss": 0.1336, "step": 16411 }, { "epoch": 1.9461638799952568, "grad_norm": 0.7454138663587897, "learning_rate": 2.730108511219433e-05, "loss": 0.1461, "step": 16412 }, { "epoch": 1.9462824617573817, "grad_norm": 0.7386650471190996, "learning_rate": 2.7298694929999398e-05, "loss": 0.1684, "step": 16413 }, { "epoch": 1.9464010435195067, "grad_norm": 0.8298127099209364, "learning_rate": 2.7296304726613315e-05, "loss": 0.1926, "step": 16414 }, { "epoch": 1.9465196252816317, "grad_norm": 0.7079296657829692, "learning_rate": 2.7293914502058117e-05, "loss": 0.1352, "step": 16415 }, { "epoch": 1.9466382070437567, "grad_norm": 0.5399515796729163, "learning_rate": 2.729152425635584e-05, "loss": 0.1086, "step": 16416 }, { "epoch": 1.9467567888058817, "grad_norm": 0.6757614422334314, "learning_rate": 2.7289133989528527e-05, "loss": 0.119, "step": 16417 }, { "epoch": 1.9468753705680066, "grad_norm": 0.6453150928410119, "learning_rate": 2.72867437015982e-05, "loss": 0.1583, "step": 16418 }, { "epoch": 1.9469939523301316, "grad_norm": 0.834680827946555, "learning_rate": 2.7284353392586904e-05, "loss": 0.2113, "step": 16419 }, { "epoch": 1.9471125340922566, "grad_norm": 0.9171516836566599, "learning_rate": 2.728196306251667e-05, "loss": 0.1557, "step": 16420 }, { "epoch": 1.9472311158543816, "grad_norm": 0.852395521703101, "learning_rate": 2.7279572711409534e-05, "loss": 0.2354, "step": 16421 }, { "epoch": 1.9473496976165066, "grad_norm": 0.8619263914951475, "learning_rate": 2.7277182339287532e-05, "loss": 0.1801, "step": 16422 }, { "epoch": 1.9474682793786315, "grad_norm": 0.6426768880075667, "learning_rate": 2.7274791946172706e-05, "loss": 0.1053, "step": 16423 }, { "epoch": 1.9475868611407565, "grad_norm": 0.8903693580422422, "learning_rate": 2.7272401532087083e-05, "loss": 0.1941, "step": 16424 }, { "epoch": 1.9477054429028815, "grad_norm": 1.024760152382083, "learning_rate": 2.72700110970527e-05, "loss": 0.21, "step": 16425 }, { "epoch": 1.9478240246650065, "grad_norm": 0.8347049789891208, "learning_rate": 2.7267620641091602e-05, "loss": 0.1545, "step": 16426 }, { "epoch": 1.9479426064271315, "grad_norm": 0.8741021836107608, "learning_rate": 2.726523016422583e-05, "loss": 0.1484, "step": 16427 }, { "epoch": 1.9480611881892564, "grad_norm": 0.9589723680434501, "learning_rate": 2.7262839666477408e-05, "loss": 0.172, "step": 16428 }, { "epoch": 1.9481797699513814, "grad_norm": 0.6827656070098324, "learning_rate": 2.7260449147868373e-05, "loss": 0.148, "step": 16429 }, { "epoch": 1.9482983517135066, "grad_norm": 0.8912472186789356, "learning_rate": 2.725805860842078e-05, "loss": 0.2131, "step": 16430 }, { "epoch": 1.9484169334756314, "grad_norm": 0.6651441908962812, "learning_rate": 2.7255668048156645e-05, "loss": 0.1537, "step": 16431 }, { "epoch": 1.9485355152377566, "grad_norm": 0.6448027717060855, "learning_rate": 2.7253277467098026e-05, "loss": 0.1155, "step": 16432 }, { "epoch": 1.9486540969998813, "grad_norm": 0.8262713551052182, "learning_rate": 2.725088686526695e-05, "loss": 0.1538, "step": 16433 }, { "epoch": 1.9487726787620065, "grad_norm": 0.9869174673776087, "learning_rate": 2.724849624268545e-05, "loss": 0.1977, "step": 16434 }, { "epoch": 1.9488912605241313, "grad_norm": 0.6694721216147048, "learning_rate": 2.7246105599375577e-05, "loss": 0.1101, "step": 16435 }, { "epoch": 1.9490098422862565, "grad_norm": 0.921490367336916, "learning_rate": 2.724371493535937e-05, "loss": 0.2207, "step": 16436 }, { "epoch": 1.9491284240483813, "grad_norm": 0.9203854769799883, "learning_rate": 2.7241324250658857e-05, "loss": 0.1955, "step": 16437 }, { "epoch": 1.9492470058105065, "grad_norm": 1.5312205098289042, "learning_rate": 2.7238933545296086e-05, "loss": 0.318, "step": 16438 }, { "epoch": 1.9493655875726312, "grad_norm": 0.8208175879634465, "learning_rate": 2.7236542819293087e-05, "loss": 0.1808, "step": 16439 }, { "epoch": 1.9494841693347564, "grad_norm": 0.6464959689423282, "learning_rate": 2.7234152072671913e-05, "loss": 0.1136, "step": 16440 }, { "epoch": 1.9496027510968812, "grad_norm": 0.7349024878143514, "learning_rate": 2.7231761305454595e-05, "loss": 0.1563, "step": 16441 }, { "epoch": 1.9497213328590064, "grad_norm": 1.2693008848118739, "learning_rate": 2.722937051766317e-05, "loss": 0.2746, "step": 16442 }, { "epoch": 1.9498399146211312, "grad_norm": 1.0828850243065455, "learning_rate": 2.722697970931969e-05, "loss": 0.1803, "step": 16443 }, { "epoch": 1.9499584963832564, "grad_norm": 0.7064120793781952, "learning_rate": 2.722458888044618e-05, "loss": 0.1357, "step": 16444 }, { "epoch": 1.9500770781453811, "grad_norm": 0.7279213440228596, "learning_rate": 2.722219803106469e-05, "loss": 0.1697, "step": 16445 }, { "epoch": 1.9501956599075063, "grad_norm": 0.9960093666776008, "learning_rate": 2.7219807161197257e-05, "loss": 0.2173, "step": 16446 }, { "epoch": 1.950314241669631, "grad_norm": 0.9060928349742707, "learning_rate": 2.7217416270865932e-05, "loss": 0.1584, "step": 16447 }, { "epoch": 1.9504328234317563, "grad_norm": 0.4904323795811234, "learning_rate": 2.7215025360092745e-05, "loss": 0.1099, "step": 16448 }, { "epoch": 1.950551405193881, "grad_norm": 0.8074845600241674, "learning_rate": 2.721263442889974e-05, "loss": 0.139, "step": 16449 }, { "epoch": 1.9506699869560062, "grad_norm": 1.105407215114659, "learning_rate": 2.721024347730896e-05, "loss": 0.2251, "step": 16450 }, { "epoch": 1.950788568718131, "grad_norm": 1.448192083026777, "learning_rate": 2.7207852505342444e-05, "loss": 0.3364, "step": 16451 }, { "epoch": 1.9509071504802562, "grad_norm": 0.5202596946909429, "learning_rate": 2.7205461513022234e-05, "loss": 0.1123, "step": 16452 }, { "epoch": 1.951025732242381, "grad_norm": 0.8631740220265125, "learning_rate": 2.7203070500370377e-05, "loss": 0.1988, "step": 16453 }, { "epoch": 1.9511443140045062, "grad_norm": 0.7214218569144734, "learning_rate": 2.7200679467408906e-05, "loss": 0.1705, "step": 16454 }, { "epoch": 1.9512628957666311, "grad_norm": 0.8686523800743666, "learning_rate": 2.7198288414159877e-05, "loss": 0.1515, "step": 16455 }, { "epoch": 1.9513814775287561, "grad_norm": 0.5706630343584121, "learning_rate": 2.7195897340645315e-05, "loss": 0.1244, "step": 16456 }, { "epoch": 1.951500059290881, "grad_norm": 0.7454630893346157, "learning_rate": 2.7193506246887278e-05, "loss": 0.1682, "step": 16457 }, { "epoch": 1.951618641053006, "grad_norm": 1.0342558395440251, "learning_rate": 2.7191115132907807e-05, "loss": 0.2257, "step": 16458 }, { "epoch": 1.951737222815131, "grad_norm": 0.5128291243973956, "learning_rate": 2.718872399872893e-05, "loss": 0.1211, "step": 16459 }, { "epoch": 1.951855804577256, "grad_norm": 0.5849978130548785, "learning_rate": 2.7186332844372713e-05, "loss": 0.1367, "step": 16460 }, { "epoch": 1.951974386339381, "grad_norm": 0.6735204258300458, "learning_rate": 2.7183941669861184e-05, "loss": 0.193, "step": 16461 }, { "epoch": 1.952092968101506, "grad_norm": 0.8738642127558361, "learning_rate": 2.7181550475216394e-05, "loss": 0.2495, "step": 16462 }, { "epoch": 1.952211549863631, "grad_norm": 0.7788518849161493, "learning_rate": 2.7179159260460378e-05, "loss": 0.1376, "step": 16463 }, { "epoch": 1.952330131625756, "grad_norm": 0.7397904468829547, "learning_rate": 2.717676802561519e-05, "loss": 0.1355, "step": 16464 }, { "epoch": 1.952448713387881, "grad_norm": 0.8840622622493839, "learning_rate": 2.7174376770702864e-05, "loss": 0.2313, "step": 16465 }, { "epoch": 1.952567295150006, "grad_norm": 0.8594010060670205, "learning_rate": 2.7171985495745462e-05, "loss": 0.1903, "step": 16466 }, { "epoch": 1.952685876912131, "grad_norm": 0.647702481633997, "learning_rate": 2.7169594200765008e-05, "loss": 0.1585, "step": 16467 }, { "epoch": 1.952804458674256, "grad_norm": 0.66641278504275, "learning_rate": 2.716720288578356e-05, "loss": 0.1344, "step": 16468 }, { "epoch": 1.9529230404363809, "grad_norm": 0.8289383608158912, "learning_rate": 2.7164811550823155e-05, "loss": 0.1814, "step": 16469 }, { "epoch": 1.9530416221985059, "grad_norm": 0.7672959345547058, "learning_rate": 2.7162420195905847e-05, "loss": 0.1555, "step": 16470 }, { "epoch": 1.9531602039606308, "grad_norm": 0.912891594539876, "learning_rate": 2.7160028821053675e-05, "loss": 0.1629, "step": 16471 }, { "epoch": 1.9532787857227558, "grad_norm": 0.7409091767764244, "learning_rate": 2.7157637426288684e-05, "loss": 0.1461, "step": 16472 }, { "epoch": 1.9533973674848808, "grad_norm": 0.608858308766183, "learning_rate": 2.7155246011632923e-05, "loss": 0.1342, "step": 16473 }, { "epoch": 1.9535159492470058, "grad_norm": 0.6836665160066846, "learning_rate": 2.715285457710844e-05, "loss": 0.1333, "step": 16474 }, { "epoch": 1.9536345310091308, "grad_norm": 0.571941947554955, "learning_rate": 2.715046312273727e-05, "loss": 0.1095, "step": 16475 }, { "epoch": 1.9537531127712557, "grad_norm": 0.8985937513376654, "learning_rate": 2.7148071648541468e-05, "loss": 0.1465, "step": 16476 }, { "epoch": 1.9538716945333807, "grad_norm": 0.6740294221562099, "learning_rate": 2.7145680154543084e-05, "loss": 0.1269, "step": 16477 }, { "epoch": 1.9539902762955057, "grad_norm": 1.3407414813262402, "learning_rate": 2.7143288640764164e-05, "loss": 0.2524, "step": 16478 }, { "epoch": 1.9541088580576307, "grad_norm": 0.9449406028083756, "learning_rate": 2.714089710722675e-05, "loss": 0.196, "step": 16479 }, { "epoch": 1.9542274398197557, "grad_norm": 1.0323321655916324, "learning_rate": 2.713850555395288e-05, "loss": 0.2091, "step": 16480 }, { "epoch": 1.9543460215818809, "grad_norm": 0.6060339758311111, "learning_rate": 2.7136113980964623e-05, "loss": 0.1299, "step": 16481 }, { "epoch": 1.9544646033440056, "grad_norm": 0.8322419769211384, "learning_rate": 2.7133722388284004e-05, "loss": 0.1522, "step": 16482 }, { "epoch": 1.9545831851061308, "grad_norm": 1.0592457962418715, "learning_rate": 2.713133077593309e-05, "loss": 0.2363, "step": 16483 }, { "epoch": 1.9547017668682556, "grad_norm": 0.8432522120667064, "learning_rate": 2.7128939143933914e-05, "loss": 0.1895, "step": 16484 }, { "epoch": 1.9548203486303808, "grad_norm": 0.9619094737938466, "learning_rate": 2.7126547492308536e-05, "loss": 0.189, "step": 16485 }, { "epoch": 1.9549389303925055, "grad_norm": 0.6759590331290899, "learning_rate": 2.712415582107899e-05, "loss": 0.1328, "step": 16486 }, { "epoch": 1.9550575121546307, "grad_norm": 0.6619705200930935, "learning_rate": 2.712176413026733e-05, "loss": 0.176, "step": 16487 }, { "epoch": 1.9551760939167555, "grad_norm": 0.8593371676791806, "learning_rate": 2.7119372419895618e-05, "loss": 0.1782, "step": 16488 }, { "epoch": 1.9552946756788807, "grad_norm": 0.9361338359161707, "learning_rate": 2.7116980689985883e-05, "loss": 0.1528, "step": 16489 }, { "epoch": 1.9554132574410055, "grad_norm": 1.0437144047949376, "learning_rate": 2.7114588940560187e-05, "loss": 0.2044, "step": 16490 }, { "epoch": 1.9555318392031307, "grad_norm": 0.7949693411656087, "learning_rate": 2.7112197171640574e-05, "loss": 0.1815, "step": 16491 }, { "epoch": 1.9556504209652554, "grad_norm": 1.42201322754865, "learning_rate": 2.7109805383249093e-05, "loss": 0.3236, "step": 16492 }, { "epoch": 1.9557690027273806, "grad_norm": 0.7451534271756807, "learning_rate": 2.710741357540779e-05, "loss": 0.1728, "step": 16493 }, { "epoch": 1.9558875844895054, "grad_norm": 0.7328613903464013, "learning_rate": 2.7105021748138716e-05, "loss": 0.1875, "step": 16494 }, { "epoch": 1.9560061662516306, "grad_norm": 0.7306657605252423, "learning_rate": 2.7102629901463928e-05, "loss": 0.1169, "step": 16495 }, { "epoch": 1.9561247480137554, "grad_norm": 1.559609201058611, "learning_rate": 2.710023803540547e-05, "loss": 0.1666, "step": 16496 }, { "epoch": 1.9562433297758806, "grad_norm": 0.969975314832474, "learning_rate": 2.709784614998539e-05, "loss": 0.2072, "step": 16497 }, { "epoch": 1.9563619115380053, "grad_norm": 1.227099851526805, "learning_rate": 2.7095454245225748e-05, "loss": 0.1995, "step": 16498 }, { "epoch": 1.9564804933001305, "grad_norm": 0.6888542266660498, "learning_rate": 2.7093062321148584e-05, "loss": 0.1166, "step": 16499 }, { "epoch": 1.9565990750622553, "grad_norm": 0.7439167846939587, "learning_rate": 2.7090670377775945e-05, "loss": 0.1793, "step": 16500 }, { "epoch": 1.9567176568243805, "grad_norm": 0.7160934404120536, "learning_rate": 2.7088278415129898e-05, "loss": 0.1752, "step": 16501 }, { "epoch": 1.9568362385865052, "grad_norm": 0.9201499955829006, "learning_rate": 2.7085886433232476e-05, "loss": 0.1644, "step": 16502 }, { "epoch": 1.9569548203486304, "grad_norm": 0.6842413313526383, "learning_rate": 2.708349443210575e-05, "loss": 0.1165, "step": 16503 }, { "epoch": 1.9570734021107552, "grad_norm": 0.8758838169318328, "learning_rate": 2.708110241177175e-05, "loss": 0.1831, "step": 16504 }, { "epoch": 1.9571919838728804, "grad_norm": 0.993473493092241, "learning_rate": 2.707871037225255e-05, "loss": 0.1487, "step": 16505 }, { "epoch": 1.9573105656350054, "grad_norm": 0.6877421075801853, "learning_rate": 2.7076318313570177e-05, "loss": 0.1556, "step": 16506 }, { "epoch": 1.9574291473971304, "grad_norm": 0.9461478852080826, "learning_rate": 2.70739262357467e-05, "loss": 0.2483, "step": 16507 }, { "epoch": 1.9575477291592553, "grad_norm": 1.2431777479351123, "learning_rate": 2.7071534138804166e-05, "loss": 0.2419, "step": 16508 }, { "epoch": 1.9576663109213803, "grad_norm": 0.7443905324229713, "learning_rate": 2.7069142022764633e-05, "loss": 0.1421, "step": 16509 }, { "epoch": 1.9577848926835053, "grad_norm": 0.8744163205151868, "learning_rate": 2.706674988765014e-05, "loss": 0.1749, "step": 16510 }, { "epoch": 1.9579034744456303, "grad_norm": 0.7428399460025149, "learning_rate": 2.7064357733482753e-05, "loss": 0.1359, "step": 16511 }, { "epoch": 1.9580220562077553, "grad_norm": 0.7244064007065121, "learning_rate": 2.7061965560284512e-05, "loss": 0.1796, "step": 16512 }, { "epoch": 1.9581406379698802, "grad_norm": 0.798240407107549, "learning_rate": 2.7059573368077483e-05, "loss": 0.1732, "step": 16513 }, { "epoch": 1.9582592197320052, "grad_norm": 0.5644818622359752, "learning_rate": 2.705718115688371e-05, "loss": 0.117, "step": 16514 }, { "epoch": 1.9583778014941302, "grad_norm": 0.5744605797116448, "learning_rate": 2.705478892672525e-05, "loss": 0.1323, "step": 16515 }, { "epoch": 1.9584963832562552, "grad_norm": 0.6663017560383973, "learning_rate": 2.705239667762416e-05, "loss": 0.128, "step": 16516 }, { "epoch": 1.9586149650183802, "grad_norm": 0.8718954404925776, "learning_rate": 2.705000440960248e-05, "loss": 0.1687, "step": 16517 }, { "epoch": 1.9587335467805052, "grad_norm": 0.7067106272465382, "learning_rate": 2.7047612122682288e-05, "loss": 0.1647, "step": 16518 }, { "epoch": 1.9588521285426301, "grad_norm": 0.9135172075373489, "learning_rate": 2.7045219816885613e-05, "loss": 0.1944, "step": 16519 }, { "epoch": 1.9589707103047551, "grad_norm": 0.915151370210491, "learning_rate": 2.7042827492234524e-05, "loss": 0.1546, "step": 16520 }, { "epoch": 1.95908929206688, "grad_norm": 0.7479211474349213, "learning_rate": 2.7040435148751063e-05, "loss": 0.1691, "step": 16521 }, { "epoch": 1.959207873829005, "grad_norm": 1.101924943870033, "learning_rate": 2.70380427864573e-05, "loss": 0.2427, "step": 16522 }, { "epoch": 1.95932645559113, "grad_norm": 0.8908154424730587, "learning_rate": 2.703565040537528e-05, "loss": 0.1813, "step": 16523 }, { "epoch": 1.959445037353255, "grad_norm": 0.9872005773986313, "learning_rate": 2.7033258005527055e-05, "loss": 0.2132, "step": 16524 }, { "epoch": 1.95956361911538, "grad_norm": 0.6075991326308783, "learning_rate": 2.7030865586934682e-05, "loss": 0.1335, "step": 16525 }, { "epoch": 1.959682200877505, "grad_norm": 0.8755033145280114, "learning_rate": 2.7028473149620232e-05, "loss": 0.1684, "step": 16526 }, { "epoch": 1.95980078263963, "grad_norm": 0.8681917005486719, "learning_rate": 2.7026080693605737e-05, "loss": 0.1375, "step": 16527 }, { "epoch": 1.959919364401755, "grad_norm": 1.0427891547465393, "learning_rate": 2.7023688218913267e-05, "loss": 0.1972, "step": 16528 }, { "epoch": 1.96003794616388, "grad_norm": 0.8029077765828267, "learning_rate": 2.702129572556487e-05, "loss": 0.1649, "step": 16529 }, { "epoch": 1.9601565279260051, "grad_norm": 0.7498292623449471, "learning_rate": 2.7018903213582608e-05, "loss": 0.1561, "step": 16530 }, { "epoch": 1.96027510968813, "grad_norm": 0.8697453273812679, "learning_rate": 2.7016510682988532e-05, "loss": 0.1774, "step": 16531 }, { "epoch": 1.960393691450255, "grad_norm": 1.04718749242534, "learning_rate": 2.70141181338047e-05, "loss": 0.19, "step": 16532 }, { "epoch": 1.9605122732123799, "grad_norm": 0.8236271222683497, "learning_rate": 2.701172556605317e-05, "loss": 0.165, "step": 16533 }, { "epoch": 1.960630854974505, "grad_norm": 0.7674643083116511, "learning_rate": 2.700933297975599e-05, "loss": 0.1257, "step": 16534 }, { "epoch": 1.9607494367366298, "grad_norm": 1.1139014985542883, "learning_rate": 2.7006940374935234e-05, "loss": 0.2641, "step": 16535 }, { "epoch": 1.960868018498755, "grad_norm": 0.8124603543357449, "learning_rate": 2.7004547751612945e-05, "loss": 0.2222, "step": 16536 }, { "epoch": 1.9609866002608798, "grad_norm": 0.8525668558538714, "learning_rate": 2.700215510981119e-05, "loss": 0.1926, "step": 16537 }, { "epoch": 1.961105182023005, "grad_norm": 0.5998973795969763, "learning_rate": 2.6999762449552015e-05, "loss": 0.126, "step": 16538 }, { "epoch": 1.9612237637851297, "grad_norm": 0.7160057616007139, "learning_rate": 2.6997369770857484e-05, "loss": 0.1546, "step": 16539 }, { "epoch": 1.961342345547255, "grad_norm": 0.6356856040048676, "learning_rate": 2.6994977073749643e-05, "loss": 0.1406, "step": 16540 }, { "epoch": 1.9614609273093797, "grad_norm": 0.5780222789842443, "learning_rate": 2.6992584358250572e-05, "loss": 0.1408, "step": 16541 }, { "epoch": 1.961579509071505, "grad_norm": 0.7222747002664324, "learning_rate": 2.699019162438231e-05, "loss": 0.1978, "step": 16542 }, { "epoch": 1.9616980908336297, "grad_norm": 0.6221404230146084, "learning_rate": 2.6987798872166926e-05, "loss": 0.1652, "step": 16543 }, { "epoch": 1.9618166725957549, "grad_norm": 0.7532997175047124, "learning_rate": 2.698540610162647e-05, "loss": 0.1481, "step": 16544 }, { "epoch": 1.9619352543578796, "grad_norm": 0.7394093738772775, "learning_rate": 2.6983013312783007e-05, "loss": 0.1726, "step": 16545 }, { "epoch": 1.9620538361200048, "grad_norm": 1.204200053517064, "learning_rate": 2.6980620505658593e-05, "loss": 0.2692, "step": 16546 }, { "epoch": 1.9621724178821296, "grad_norm": 0.7859001873335504, "learning_rate": 2.6978227680275286e-05, "loss": 0.1542, "step": 16547 }, { "epoch": 1.9622909996442548, "grad_norm": 0.8481919099150347, "learning_rate": 2.6975834836655143e-05, "loss": 0.1879, "step": 16548 }, { "epoch": 1.9624095814063796, "grad_norm": 0.7239149716241182, "learning_rate": 2.6973441974820228e-05, "loss": 0.1597, "step": 16549 }, { "epoch": 1.9625281631685048, "grad_norm": 0.7367964903229638, "learning_rate": 2.6971049094792606e-05, "loss": 0.1723, "step": 16550 }, { "epoch": 1.9626467449306295, "grad_norm": 1.113211773473028, "learning_rate": 2.6968656196594315e-05, "loss": 0.1592, "step": 16551 }, { "epoch": 1.9627653266927547, "grad_norm": 1.1251086793883442, "learning_rate": 2.696626328024744e-05, "loss": 0.2209, "step": 16552 }, { "epoch": 1.9628839084548795, "grad_norm": 0.7078810502497369, "learning_rate": 2.6963870345774024e-05, "loss": 0.1134, "step": 16553 }, { "epoch": 1.9630024902170047, "grad_norm": 0.9566022699671289, "learning_rate": 2.6961477393196126e-05, "loss": 0.188, "step": 16554 }, { "epoch": 1.9631210719791297, "grad_norm": 0.9886913003032017, "learning_rate": 2.6959084422535813e-05, "loss": 0.2063, "step": 16555 }, { "epoch": 1.9632396537412546, "grad_norm": 1.6578754562000646, "learning_rate": 2.6956691433815152e-05, "loss": 0.3195, "step": 16556 }, { "epoch": 1.9633582355033796, "grad_norm": 0.8228138678375121, "learning_rate": 2.6954298427056192e-05, "loss": 0.1979, "step": 16557 }, { "epoch": 1.9634768172655046, "grad_norm": 1.1795256960888452, "learning_rate": 2.695190540228099e-05, "loss": 0.2476, "step": 16558 }, { "epoch": 1.9635953990276296, "grad_norm": 0.777647402648751, "learning_rate": 2.6949512359511625e-05, "loss": 0.1687, "step": 16559 }, { "epoch": 1.9637139807897546, "grad_norm": 0.7948791011403281, "learning_rate": 2.694711929877014e-05, "loss": 0.1345, "step": 16560 }, { "epoch": 1.9638325625518795, "grad_norm": 0.9385023832416008, "learning_rate": 2.69447262200786e-05, "loss": 0.2127, "step": 16561 }, { "epoch": 1.9639511443140045, "grad_norm": 1.455590392159929, "learning_rate": 2.6942333123459075e-05, "loss": 0.2126, "step": 16562 }, { "epoch": 1.9640697260761295, "grad_norm": 0.9423590928010162, "learning_rate": 2.6939940008933618e-05, "loss": 0.1776, "step": 16563 }, { "epoch": 1.9641883078382545, "grad_norm": 0.7276505991346319, "learning_rate": 2.693754687652429e-05, "loss": 0.1112, "step": 16564 }, { "epoch": 1.9643068896003795, "grad_norm": 0.7415338058040091, "learning_rate": 2.6935153726253165e-05, "loss": 0.1578, "step": 16565 }, { "epoch": 1.9644254713625044, "grad_norm": 0.5990731921463058, "learning_rate": 2.693276055814229e-05, "loss": 0.1395, "step": 16566 }, { "epoch": 1.9645440531246294, "grad_norm": 1.2622729789311924, "learning_rate": 2.6930367372213733e-05, "loss": 0.2209, "step": 16567 }, { "epoch": 1.9646626348867544, "grad_norm": 0.8498867807188308, "learning_rate": 2.6927974168489557e-05, "loss": 0.1882, "step": 16568 }, { "epoch": 1.9647812166488794, "grad_norm": 0.6309768493962936, "learning_rate": 2.6925580946991824e-05, "loss": 0.1371, "step": 16569 }, { "epoch": 1.9648997984110044, "grad_norm": 0.9190544658811112, "learning_rate": 2.692318770774259e-05, "loss": 0.1749, "step": 16570 }, { "epoch": 1.9650183801731294, "grad_norm": 0.9969878792619532, "learning_rate": 2.692079445076393e-05, "loss": 0.2143, "step": 16571 }, { "epoch": 1.9651369619352543, "grad_norm": 0.7589432066192396, "learning_rate": 2.6918401176077895e-05, "loss": 0.1814, "step": 16572 }, { "epoch": 1.9652555436973793, "grad_norm": 1.1533558753682775, "learning_rate": 2.6916007883706557e-05, "loss": 0.2298, "step": 16573 }, { "epoch": 1.9653741254595043, "grad_norm": 0.8380815348800528, "learning_rate": 2.6913614573671974e-05, "loss": 0.1813, "step": 16574 }, { "epoch": 1.9654927072216293, "grad_norm": 0.7731089431054058, "learning_rate": 2.691122124599621e-05, "loss": 0.1348, "step": 16575 }, { "epoch": 1.9656112889837543, "grad_norm": 0.7326949943866915, "learning_rate": 2.690882790070134e-05, "loss": 0.1589, "step": 16576 }, { "epoch": 1.9657298707458792, "grad_norm": 0.7076043935041055, "learning_rate": 2.6906434537809404e-05, "loss": 0.1704, "step": 16577 }, { "epoch": 1.9658484525080042, "grad_norm": 0.7057399487194216, "learning_rate": 2.690404115734249e-05, "loss": 0.1299, "step": 16578 }, { "epoch": 1.9659670342701292, "grad_norm": 0.9317370924309083, "learning_rate": 2.6901647759322647e-05, "loss": 0.2007, "step": 16579 }, { "epoch": 1.9660856160322542, "grad_norm": 0.8550707742518132, "learning_rate": 2.6899254343771946e-05, "loss": 0.1646, "step": 16580 }, { "epoch": 1.9662041977943794, "grad_norm": 0.893691361398224, "learning_rate": 2.6896860910712444e-05, "loss": 0.1785, "step": 16581 }, { "epoch": 1.9663227795565041, "grad_norm": 0.8177559930284295, "learning_rate": 2.689446746016621e-05, "loss": 0.1859, "step": 16582 }, { "epoch": 1.9664413613186293, "grad_norm": 0.8184391173826555, "learning_rate": 2.6892073992155315e-05, "loss": 0.1474, "step": 16583 }, { "epoch": 1.966559943080754, "grad_norm": 0.7646538600748064, "learning_rate": 2.688968050670182e-05, "loss": 0.1465, "step": 16584 }, { "epoch": 1.9666785248428793, "grad_norm": 1.3640930798883208, "learning_rate": 2.6887287003827776e-05, "loss": 0.1493, "step": 16585 }, { "epoch": 1.966797106605004, "grad_norm": 0.7868113372509428, "learning_rate": 2.6884893483555268e-05, "loss": 0.1492, "step": 16586 }, { "epoch": 1.9669156883671293, "grad_norm": 0.8051001913681641, "learning_rate": 2.6882499945906357e-05, "loss": 0.1751, "step": 16587 }, { "epoch": 1.967034270129254, "grad_norm": 0.7135540599428525, "learning_rate": 2.68801063909031e-05, "loss": 0.1441, "step": 16588 }, { "epoch": 1.9671528518913792, "grad_norm": 1.1296237550123442, "learning_rate": 2.6877712818567573e-05, "loss": 0.2837, "step": 16589 }, { "epoch": 1.967271433653504, "grad_norm": 0.7342182561587253, "learning_rate": 2.687531922892183e-05, "loss": 0.1653, "step": 16590 }, { "epoch": 1.9673900154156292, "grad_norm": 0.6979601421588587, "learning_rate": 2.6872925621987948e-05, "loss": 0.1165, "step": 16591 }, { "epoch": 1.967508597177754, "grad_norm": 0.8695124212484039, "learning_rate": 2.6870531997787984e-05, "loss": 0.2135, "step": 16592 }, { "epoch": 1.9676271789398792, "grad_norm": 0.8106851159846735, "learning_rate": 2.6868138356344008e-05, "loss": 0.1524, "step": 16593 }, { "epoch": 1.967745760702004, "grad_norm": 1.1054388525684375, "learning_rate": 2.686574469767809e-05, "loss": 0.2279, "step": 16594 }, { "epoch": 1.9678643424641291, "grad_norm": 0.5482725628502011, "learning_rate": 2.6863351021812298e-05, "loss": 0.121, "step": 16595 }, { "epoch": 1.9679829242262539, "grad_norm": 0.7923884220987686, "learning_rate": 2.6860957328768688e-05, "loss": 0.1712, "step": 16596 }, { "epoch": 1.968101505988379, "grad_norm": 0.6333308931433818, "learning_rate": 2.6858563618569336e-05, "loss": 0.1277, "step": 16597 }, { "epoch": 1.9682200877505038, "grad_norm": 0.7231753197549791, "learning_rate": 2.6856169891236304e-05, "loss": 0.1353, "step": 16598 }, { "epoch": 1.968338669512629, "grad_norm": 0.7132663422154958, "learning_rate": 2.685377614679167e-05, "loss": 0.1408, "step": 16599 }, { "epoch": 1.9684572512747538, "grad_norm": 1.5301320865223567, "learning_rate": 2.6851382385257485e-05, "loss": 0.3425, "step": 16600 }, { "epoch": 1.968575833036879, "grad_norm": 0.7808880347204206, "learning_rate": 2.684898860665583e-05, "loss": 0.1096, "step": 16601 }, { "epoch": 1.9686944147990038, "grad_norm": 0.6058943629175368, "learning_rate": 2.6846594811008763e-05, "loss": 0.122, "step": 16602 }, { "epoch": 1.968812996561129, "grad_norm": 0.747390341729377, "learning_rate": 2.6844200998338357e-05, "loss": 0.1502, "step": 16603 }, { "epoch": 1.9689315783232537, "grad_norm": 0.8704839991326623, "learning_rate": 2.6841807168666676e-05, "loss": 0.1814, "step": 16604 }, { "epoch": 1.969050160085379, "grad_norm": 0.770487641127278, "learning_rate": 2.6839413322015795e-05, "loss": 0.1868, "step": 16605 }, { "epoch": 1.969168741847504, "grad_norm": 0.913773267039488, "learning_rate": 2.6837019458407786e-05, "loss": 0.2069, "step": 16606 }, { "epoch": 1.9692873236096289, "grad_norm": 0.5921731462919477, "learning_rate": 2.68346255778647e-05, "loss": 0.1349, "step": 16607 }, { "epoch": 1.9694059053717539, "grad_norm": 1.0182525409164622, "learning_rate": 2.6832231680408626e-05, "loss": 0.2173, "step": 16608 }, { "epoch": 1.9695244871338788, "grad_norm": 0.9393029778679773, "learning_rate": 2.682983776606161e-05, "loss": 0.1424, "step": 16609 }, { "epoch": 1.9696430688960038, "grad_norm": 0.9608571217602713, "learning_rate": 2.6827443834845743e-05, "loss": 0.216, "step": 16610 }, { "epoch": 1.9697616506581288, "grad_norm": 0.6596452777394715, "learning_rate": 2.682504988678308e-05, "loss": 0.1319, "step": 16611 }, { "epoch": 1.9698802324202538, "grad_norm": 0.9606461461726469, "learning_rate": 2.6822655921895695e-05, "loss": 0.204, "step": 16612 }, { "epoch": 1.9699988141823788, "grad_norm": 1.1176293037247935, "learning_rate": 2.682026194020566e-05, "loss": 0.1453, "step": 16613 }, { "epoch": 1.9701173959445037, "grad_norm": 1.3917511399702827, "learning_rate": 2.681786794173504e-05, "loss": 0.3473, "step": 16614 }, { "epoch": 1.9702359777066287, "grad_norm": 1.1862650280459863, "learning_rate": 2.6815473926505912e-05, "loss": 0.2844, "step": 16615 }, { "epoch": 1.9703545594687537, "grad_norm": 0.7199899265324935, "learning_rate": 2.6813079894540333e-05, "loss": 0.1474, "step": 16616 }, { "epoch": 1.9704731412308787, "grad_norm": 0.9958322698673664, "learning_rate": 2.681068584586039e-05, "loss": 0.2013, "step": 16617 }, { "epoch": 1.9705917229930037, "grad_norm": 1.0000753191581726, "learning_rate": 2.6808291780488136e-05, "loss": 0.2339, "step": 16618 }, { "epoch": 1.9707103047551287, "grad_norm": 0.6604234604344458, "learning_rate": 2.6805897698445654e-05, "loss": 0.1409, "step": 16619 }, { "epoch": 1.9708288865172536, "grad_norm": 0.6505033432578616, "learning_rate": 2.6803503599755007e-05, "loss": 0.1346, "step": 16620 }, { "epoch": 1.9709474682793786, "grad_norm": 0.6745983159506549, "learning_rate": 2.6801109484438276e-05, "loss": 0.1378, "step": 16621 }, { "epoch": 1.9710660500415036, "grad_norm": 0.6211787126136665, "learning_rate": 2.679871535251751e-05, "loss": 0.1328, "step": 16622 }, { "epoch": 1.9711846318036286, "grad_norm": 0.5894084860811243, "learning_rate": 2.67963212040148e-05, "loss": 0.1274, "step": 16623 }, { "epoch": 1.9713032135657536, "grad_norm": 0.45540326031448286, "learning_rate": 2.679392703895221e-05, "loss": 0.1071, "step": 16624 }, { "epoch": 1.9714217953278785, "grad_norm": 0.528735025964823, "learning_rate": 2.679153285735182e-05, "loss": 0.0822, "step": 16625 }, { "epoch": 1.9715403770900035, "grad_norm": 0.5924799605818535, "learning_rate": 2.678913865923569e-05, "loss": 0.1269, "step": 16626 }, { "epoch": 1.9716589588521285, "grad_norm": 1.1076430098550323, "learning_rate": 2.67867444446259e-05, "loss": 0.2439, "step": 16627 }, { "epoch": 1.9717775406142535, "grad_norm": 0.6810233459505042, "learning_rate": 2.6784350213544508e-05, "loss": 0.1358, "step": 16628 }, { "epoch": 1.9718961223763785, "grad_norm": 0.7923239032533719, "learning_rate": 2.6781955966013594e-05, "loss": 0.1832, "step": 16629 }, { "epoch": 1.9720147041385037, "grad_norm": 0.8211981450119527, "learning_rate": 2.6779561702055246e-05, "loss": 0.1811, "step": 16630 }, { "epoch": 1.9721332859006284, "grad_norm": 0.9893360357653131, "learning_rate": 2.6777167421691505e-05, "loss": 0.2093, "step": 16631 }, { "epoch": 1.9722518676627536, "grad_norm": 0.775755598045236, "learning_rate": 2.6774773124944467e-05, "loss": 0.1438, "step": 16632 }, { "epoch": 1.9723704494248784, "grad_norm": 0.9098315965299378, "learning_rate": 2.6772378811836195e-05, "loss": 0.1941, "step": 16633 }, { "epoch": 1.9724890311870036, "grad_norm": 0.8753262439117093, "learning_rate": 2.6769984482388766e-05, "loss": 0.2103, "step": 16634 }, { "epoch": 1.9726076129491283, "grad_norm": 0.766947615716256, "learning_rate": 2.676759013662425e-05, "loss": 0.1964, "step": 16635 }, { "epoch": 1.9727261947112535, "grad_norm": 0.8379974278734846, "learning_rate": 2.676519577456472e-05, "loss": 0.1614, "step": 16636 }, { "epoch": 1.9728447764733783, "grad_norm": 1.0037501682092764, "learning_rate": 2.676280139623225e-05, "loss": 0.2116, "step": 16637 }, { "epoch": 1.9729633582355035, "grad_norm": 1.1657522941883365, "learning_rate": 2.6760407001648917e-05, "loss": 0.1887, "step": 16638 }, { "epoch": 1.9730819399976283, "grad_norm": 0.5948959504948274, "learning_rate": 2.6758012590836783e-05, "loss": 0.1377, "step": 16639 }, { "epoch": 1.9732005217597535, "grad_norm": 0.7371245321180657, "learning_rate": 2.6755618163817935e-05, "loss": 0.1346, "step": 16640 }, { "epoch": 1.9733191035218782, "grad_norm": 0.7608633923570466, "learning_rate": 2.675322372061444e-05, "loss": 0.1517, "step": 16641 }, { "epoch": 1.9734376852840034, "grad_norm": 0.57595914596734, "learning_rate": 2.6750829261248367e-05, "loss": 0.1172, "step": 16642 }, { "epoch": 1.9735562670461282, "grad_norm": 0.9958976191040615, "learning_rate": 2.6748434785741795e-05, "loss": 0.2094, "step": 16643 }, { "epoch": 1.9736748488082534, "grad_norm": 0.816306266084852, "learning_rate": 2.67460402941168e-05, "loss": 0.1763, "step": 16644 }, { "epoch": 1.9737934305703781, "grad_norm": 0.6759700751891955, "learning_rate": 2.6743645786395453e-05, "loss": 0.1752, "step": 16645 }, { "epoch": 1.9739120123325034, "grad_norm": 1.099885530803151, "learning_rate": 2.6741251262599838e-05, "loss": 0.2141, "step": 16646 }, { "epoch": 1.974030594094628, "grad_norm": 1.458129218322792, "learning_rate": 2.6738856722752016e-05, "loss": 0.3347, "step": 16647 }, { "epoch": 1.9741491758567533, "grad_norm": 0.7135795023003078, "learning_rate": 2.6736462166874066e-05, "loss": 0.1468, "step": 16648 }, { "epoch": 1.974267757618878, "grad_norm": 0.8896366335706559, "learning_rate": 2.673406759498807e-05, "loss": 0.2069, "step": 16649 }, { "epoch": 1.9743863393810033, "grad_norm": 0.6851255761746561, "learning_rate": 2.6731673007116093e-05, "loss": 0.1307, "step": 16650 }, { "epoch": 1.974504921143128, "grad_norm": 1.3551676690043897, "learning_rate": 2.6729278403280218e-05, "loss": 0.3068, "step": 16651 }, { "epoch": 1.9746235029052532, "grad_norm": 0.6844252931122223, "learning_rate": 2.6726883783502508e-05, "loss": 0.1395, "step": 16652 }, { "epoch": 1.974742084667378, "grad_norm": 1.0680400745883831, "learning_rate": 2.6724489147805048e-05, "loss": 0.2328, "step": 16653 }, { "epoch": 1.9748606664295032, "grad_norm": 0.7780248936626137, "learning_rate": 2.6722094496209922e-05, "loss": 0.1426, "step": 16654 }, { "epoch": 1.9749792481916282, "grad_norm": 1.011471855154614, "learning_rate": 2.6719699828739192e-05, "loss": 0.2384, "step": 16655 }, { "epoch": 1.9750978299537532, "grad_norm": 0.7177761672341957, "learning_rate": 2.6717305145414933e-05, "loss": 0.1519, "step": 16656 }, { "epoch": 1.9752164117158781, "grad_norm": 0.9548696664340259, "learning_rate": 2.6714910446259235e-05, "loss": 0.1843, "step": 16657 }, { "epoch": 1.9753349934780031, "grad_norm": 0.7472940646774819, "learning_rate": 2.6712515731294162e-05, "loss": 0.1479, "step": 16658 }, { "epoch": 1.975453575240128, "grad_norm": 0.9167677457225722, "learning_rate": 2.6710121000541794e-05, "loss": 0.1424, "step": 16659 }, { "epoch": 1.975572157002253, "grad_norm": 1.1592182238513966, "learning_rate": 2.670772625402421e-05, "loss": 0.208, "step": 16660 }, { "epoch": 1.975690738764378, "grad_norm": 0.8469375779706562, "learning_rate": 2.670533149176348e-05, "loss": 0.1454, "step": 16661 }, { "epoch": 1.975809320526503, "grad_norm": 1.6156986994132243, "learning_rate": 2.6702936713781685e-05, "loss": 0.397, "step": 16662 }, { "epoch": 1.975927902288628, "grad_norm": 0.6248122879127262, "learning_rate": 2.67005419201009e-05, "loss": 0.1474, "step": 16663 }, { "epoch": 1.976046484050753, "grad_norm": 1.0555234426866862, "learning_rate": 2.669814711074321e-05, "loss": 0.1999, "step": 16664 }, { "epoch": 1.976165065812878, "grad_norm": 0.6192123487913402, "learning_rate": 2.669575228573068e-05, "loss": 0.1698, "step": 16665 }, { "epoch": 1.976283647575003, "grad_norm": 0.9184004553738836, "learning_rate": 2.6693357445085405e-05, "loss": 0.1859, "step": 16666 }, { "epoch": 1.976402229337128, "grad_norm": 0.9250854446826734, "learning_rate": 2.669096258882944e-05, "loss": 0.1998, "step": 16667 }, { "epoch": 1.976520811099253, "grad_norm": 0.7010979532184618, "learning_rate": 2.668856771698488e-05, "loss": 0.1576, "step": 16668 }, { "epoch": 1.976639392861378, "grad_norm": 0.8668648912875988, "learning_rate": 2.6686172829573784e-05, "loss": 0.2092, "step": 16669 }, { "epoch": 1.976757974623503, "grad_norm": 0.5868756746951749, "learning_rate": 2.668377792661826e-05, "loss": 0.123, "step": 16670 }, { "epoch": 1.9768765563856279, "grad_norm": 0.6900566349989559, "learning_rate": 2.6681383008140354e-05, "loss": 0.1619, "step": 16671 }, { "epoch": 1.9769951381477529, "grad_norm": 0.8663930510722719, "learning_rate": 2.6678988074162163e-05, "loss": 0.1467, "step": 16672 }, { "epoch": 1.9771137199098778, "grad_norm": 0.7889850593638359, "learning_rate": 2.6676593124705756e-05, "loss": 0.2148, "step": 16673 }, { "epoch": 1.9772323016720028, "grad_norm": 1.9090091591682792, "learning_rate": 2.6674198159793224e-05, "loss": 0.5439, "step": 16674 }, { "epoch": 1.9773508834341278, "grad_norm": 0.8372788030231518, "learning_rate": 2.6671803179446636e-05, "loss": 0.1266, "step": 16675 }, { "epoch": 1.9774694651962528, "grad_norm": 0.46492497165881647, "learning_rate": 2.666940818368807e-05, "loss": 0.0924, "step": 16676 }, { "epoch": 1.9775880469583778, "grad_norm": 0.8313752080510048, "learning_rate": 2.6667013172539618e-05, "loss": 0.1873, "step": 16677 }, { "epoch": 1.9777066287205027, "grad_norm": 0.5282648218052124, "learning_rate": 2.6664618146023335e-05, "loss": 0.1276, "step": 16678 }, { "epoch": 1.977825210482628, "grad_norm": 0.5522505640707696, "learning_rate": 2.6662223104161325e-05, "loss": 0.1538, "step": 16679 }, { "epoch": 1.9779437922447527, "grad_norm": 0.9997401554329624, "learning_rate": 2.665982804697565e-05, "loss": 0.2783, "step": 16680 }, { "epoch": 1.978062374006878, "grad_norm": 0.8770707032339808, "learning_rate": 2.66574329744884e-05, "loss": 0.1952, "step": 16681 }, { "epoch": 1.9781809557690027, "grad_norm": 0.7103568032807374, "learning_rate": 2.6655037886721644e-05, "loss": 0.1194, "step": 16682 }, { "epoch": 1.9782995375311279, "grad_norm": 0.7330920004095182, "learning_rate": 2.6652642783697473e-05, "loss": 0.1752, "step": 16683 }, { "epoch": 1.9784181192932526, "grad_norm": 0.9647894609612958, "learning_rate": 2.6650247665437957e-05, "loss": 0.2465, "step": 16684 }, { "epoch": 1.9785367010553778, "grad_norm": 0.8466384348148461, "learning_rate": 2.664785253196519e-05, "loss": 0.1348, "step": 16685 }, { "epoch": 1.9786552828175026, "grad_norm": 0.573554056213586, "learning_rate": 2.6645457383301236e-05, "loss": 0.1364, "step": 16686 }, { "epoch": 1.9787738645796278, "grad_norm": 0.9738737101743017, "learning_rate": 2.6643062219468183e-05, "loss": 0.1947, "step": 16687 }, { "epoch": 1.9788924463417525, "grad_norm": 0.6607049594721701, "learning_rate": 2.6640667040488114e-05, "loss": 0.1467, "step": 16688 }, { "epoch": 1.9790110281038777, "grad_norm": 1.3189548496625905, "learning_rate": 2.6638271846383106e-05, "loss": 0.2606, "step": 16689 }, { "epoch": 1.9791296098660025, "grad_norm": 0.9043728980504382, "learning_rate": 2.6635876637175244e-05, "loss": 0.162, "step": 16690 }, { "epoch": 1.9792481916281277, "grad_norm": 0.6802738945856501, "learning_rate": 2.6633481412886603e-05, "loss": 0.1571, "step": 16691 }, { "epoch": 1.9793667733902525, "grad_norm": 0.7087497496167907, "learning_rate": 2.663108617353926e-05, "loss": 0.1599, "step": 16692 }, { "epoch": 1.9794853551523777, "grad_norm": 0.6635957519543633, "learning_rate": 2.6628690919155306e-05, "loss": 0.1519, "step": 16693 }, { "epoch": 1.9796039369145024, "grad_norm": 0.6707326282663121, "learning_rate": 2.6626295649756828e-05, "loss": 0.1253, "step": 16694 }, { "epoch": 1.9797225186766276, "grad_norm": 0.6698186404622073, "learning_rate": 2.662390036536589e-05, "loss": 0.139, "step": 16695 }, { "epoch": 1.9798411004387524, "grad_norm": 0.6547856019955827, "learning_rate": 2.6621505066004588e-05, "loss": 0.1539, "step": 16696 }, { "epoch": 1.9799596822008776, "grad_norm": 0.8160741165788528, "learning_rate": 2.6619109751694986e-05, "loss": 0.175, "step": 16697 }, { "epoch": 1.9800782639630023, "grad_norm": 1.0784914545002389, "learning_rate": 2.6616714422459194e-05, "loss": 0.2734, "step": 16698 }, { "epoch": 1.9801968457251276, "grad_norm": 0.9699277990435193, "learning_rate": 2.661431907831926e-05, "loss": 0.1807, "step": 16699 }, { "epoch": 1.9803154274872523, "grad_norm": 0.930059262531024, "learning_rate": 2.6611923719297294e-05, "loss": 0.2015, "step": 16700 }, { "epoch": 1.9804340092493775, "grad_norm": 0.9172297108729774, "learning_rate": 2.660952834541537e-05, "loss": 0.1686, "step": 16701 }, { "epoch": 1.9805525910115023, "grad_norm": 0.6775636866482642, "learning_rate": 2.6607132956695564e-05, "loss": 0.1067, "step": 16702 }, { "epoch": 1.9806711727736275, "grad_norm": 1.1195619735932778, "learning_rate": 2.660473755315996e-05, "loss": 0.2005, "step": 16703 }, { "epoch": 1.9807897545357525, "grad_norm": 0.8679999364343632, "learning_rate": 2.6602342134830643e-05, "loss": 0.1573, "step": 16704 }, { "epoch": 1.9809083362978774, "grad_norm": 0.9817329181210529, "learning_rate": 2.6599946701729705e-05, "loss": 0.1898, "step": 16705 }, { "epoch": 1.9810269180600024, "grad_norm": 0.8267311017415286, "learning_rate": 2.6597551253879217e-05, "loss": 0.1755, "step": 16706 }, { "epoch": 1.9811454998221274, "grad_norm": 0.8654103324490628, "learning_rate": 2.659515579130127e-05, "loss": 0.206, "step": 16707 }, { "epoch": 1.9812640815842524, "grad_norm": 0.7785756671938064, "learning_rate": 2.6592760314017927e-05, "loss": 0.167, "step": 16708 }, { "epoch": 1.9813826633463774, "grad_norm": 0.8413506827211478, "learning_rate": 2.6590364822051307e-05, "loss": 0.1687, "step": 16709 }, { "epoch": 1.9815012451085023, "grad_norm": 0.621049138470212, "learning_rate": 2.658796931542346e-05, "loss": 0.1533, "step": 16710 }, { "epoch": 1.9816198268706273, "grad_norm": 0.7495459686299473, "learning_rate": 2.6585573794156486e-05, "loss": 0.1443, "step": 16711 }, { "epoch": 1.9817384086327523, "grad_norm": 0.7067267317084622, "learning_rate": 2.6583178258272468e-05, "loss": 0.1342, "step": 16712 }, { "epoch": 1.9818569903948773, "grad_norm": 1.3565716088559836, "learning_rate": 2.6580782707793482e-05, "loss": 0.357, "step": 16713 }, { "epoch": 1.9819755721570023, "grad_norm": 0.5408800511761266, "learning_rate": 2.6578387142741624e-05, "loss": 0.1434, "step": 16714 }, { "epoch": 1.9820941539191272, "grad_norm": 0.8036031141519953, "learning_rate": 2.657599156313897e-05, "loss": 0.2262, "step": 16715 }, { "epoch": 1.9822127356812522, "grad_norm": 0.6415761772398542, "learning_rate": 2.6573595969007608e-05, "loss": 0.1327, "step": 16716 }, { "epoch": 1.9823313174433772, "grad_norm": 0.9084812600486402, "learning_rate": 2.657120036036962e-05, "loss": 0.1658, "step": 16717 }, { "epoch": 1.9824498992055022, "grad_norm": 1.1051348667694987, "learning_rate": 2.6568804737247092e-05, "loss": 0.2528, "step": 16718 }, { "epoch": 1.9825684809676272, "grad_norm": 0.9144892479263558, "learning_rate": 2.6566409099662108e-05, "loss": 0.2006, "step": 16719 }, { "epoch": 1.9826870627297521, "grad_norm": 0.9294517785993252, "learning_rate": 2.656401344763676e-05, "loss": 0.2476, "step": 16720 }, { "epoch": 1.9828056444918771, "grad_norm": 0.9004442135683733, "learning_rate": 2.6561617781193117e-05, "loss": 0.2015, "step": 16721 }, { "epoch": 1.982924226254002, "grad_norm": 0.836427409790765, "learning_rate": 2.655922210035327e-05, "loss": 0.2028, "step": 16722 }, { "epoch": 1.983042808016127, "grad_norm": 0.8465077163748829, "learning_rate": 2.6556826405139312e-05, "loss": 0.1779, "step": 16723 }, { "epoch": 1.983161389778252, "grad_norm": 0.6951682222892629, "learning_rate": 2.6554430695573322e-05, "loss": 0.1461, "step": 16724 }, { "epoch": 1.983279971540377, "grad_norm": 0.7643168491952845, "learning_rate": 2.6552034971677385e-05, "loss": 0.1325, "step": 16725 }, { "epoch": 1.983398553302502, "grad_norm": 0.7036208344148681, "learning_rate": 2.65496392334736e-05, "loss": 0.1529, "step": 16726 }, { "epoch": 1.983517135064627, "grad_norm": 0.5317387093638678, "learning_rate": 2.654724348098403e-05, "loss": 0.1169, "step": 16727 }, { "epoch": 1.983635716826752, "grad_norm": 0.5651873467426337, "learning_rate": 2.654484771423078e-05, "loss": 0.1214, "step": 16728 }, { "epoch": 1.983754298588877, "grad_norm": 0.9177841002068887, "learning_rate": 2.6542451933235922e-05, "loss": 0.2057, "step": 16729 }, { "epoch": 1.9838728803510022, "grad_norm": 0.7029486835647333, "learning_rate": 2.6540056138021552e-05, "loss": 0.118, "step": 16730 }, { "epoch": 1.983991462113127, "grad_norm": 0.8445857589650815, "learning_rate": 2.6537660328609758e-05, "loss": 0.1724, "step": 16731 }, { "epoch": 1.9841100438752521, "grad_norm": 0.49511693399145434, "learning_rate": 2.653526450502261e-05, "loss": 0.1082, "step": 16732 }, { "epoch": 1.984228625637377, "grad_norm": 0.6545419195598318, "learning_rate": 2.6532868667282212e-05, "loss": 0.1406, "step": 16733 }, { "epoch": 1.984347207399502, "grad_norm": 0.769774988578381, "learning_rate": 2.6530472815410645e-05, "loss": 0.1907, "step": 16734 }, { "epoch": 1.9844657891616269, "grad_norm": 0.9789345068205707, "learning_rate": 2.6528076949429997e-05, "loss": 0.1695, "step": 16735 }, { "epoch": 1.984584370923752, "grad_norm": 1.041920882999858, "learning_rate": 2.6525681069362353e-05, "loss": 0.2516, "step": 16736 }, { "epoch": 1.9847029526858768, "grad_norm": 0.8938804942486096, "learning_rate": 2.65232851752298e-05, "loss": 0.1909, "step": 16737 }, { "epoch": 1.984821534448002, "grad_norm": 1.070014751526564, "learning_rate": 2.6520889267054423e-05, "loss": 0.2043, "step": 16738 }, { "epoch": 1.9849401162101268, "grad_norm": 0.8285521653925805, "learning_rate": 2.6518493344858315e-05, "loss": 0.1471, "step": 16739 }, { "epoch": 1.985058697972252, "grad_norm": 0.9111993487237223, "learning_rate": 2.6516097408663555e-05, "loss": 0.195, "step": 16740 }, { "epoch": 1.9851772797343767, "grad_norm": 0.8691044034809952, "learning_rate": 2.6513701458492242e-05, "loss": 0.1655, "step": 16741 }, { "epoch": 1.985295861496502, "grad_norm": 1.0407043061123809, "learning_rate": 2.651130549436645e-05, "loss": 0.2036, "step": 16742 }, { "epoch": 1.9854144432586267, "grad_norm": 0.9962973514006571, "learning_rate": 2.6508909516308277e-05, "loss": 0.1782, "step": 16743 }, { "epoch": 1.985533025020752, "grad_norm": 0.7027074787503084, "learning_rate": 2.6506513524339815e-05, "loss": 0.176, "step": 16744 }, { "epoch": 1.9856516067828767, "grad_norm": 1.0024690650913353, "learning_rate": 2.6504117518483147e-05, "loss": 0.171, "step": 16745 }, { "epoch": 1.9857701885450019, "grad_norm": 0.7566022858552286, "learning_rate": 2.6501721498760357e-05, "loss": 0.1771, "step": 16746 }, { "epoch": 1.9858887703071266, "grad_norm": 0.8582618050864335, "learning_rate": 2.649932546519353e-05, "loss": 0.1593, "step": 16747 }, { "epoch": 1.9860073520692518, "grad_norm": 0.8294361221501095, "learning_rate": 2.649692941780477e-05, "loss": 0.1328, "step": 16748 }, { "epoch": 1.9861259338313766, "grad_norm": 0.97134547629697, "learning_rate": 2.6494533356616153e-05, "loss": 0.2288, "step": 16749 }, { "epoch": 1.9862445155935018, "grad_norm": 0.7516701005184288, "learning_rate": 2.649213728164977e-05, "loss": 0.1419, "step": 16750 }, { "epoch": 1.9863630973556266, "grad_norm": 0.9127637391173027, "learning_rate": 2.6489741192927716e-05, "loss": 0.1651, "step": 16751 }, { "epoch": 1.9864816791177518, "grad_norm": 0.710579801350816, "learning_rate": 2.6487345090472066e-05, "loss": 0.1652, "step": 16752 }, { "epoch": 1.9866002608798765, "grad_norm": 0.8368760428768354, "learning_rate": 2.648494897430492e-05, "loss": 0.1638, "step": 16753 }, { "epoch": 1.9867188426420017, "grad_norm": 0.7544859279349941, "learning_rate": 2.648255284444837e-05, "loss": 0.1778, "step": 16754 }, { "epoch": 1.9868374244041267, "grad_norm": 0.8200142245103672, "learning_rate": 2.6480156700924498e-05, "loss": 0.1566, "step": 16755 }, { "epoch": 1.9869560061662517, "grad_norm": 0.6479656451314105, "learning_rate": 2.64777605437554e-05, "loss": 0.1188, "step": 16756 }, { "epoch": 1.9870745879283767, "grad_norm": 0.6750447965807376, "learning_rate": 2.6475364372963158e-05, "loss": 0.1318, "step": 16757 }, { "epoch": 1.9871931696905016, "grad_norm": 0.8597994116642423, "learning_rate": 2.6472968188569864e-05, "loss": 0.1965, "step": 16758 }, { "epoch": 1.9873117514526266, "grad_norm": 0.8670267389509636, "learning_rate": 2.6470571990597616e-05, "loss": 0.1975, "step": 16759 }, { "epoch": 1.9874303332147516, "grad_norm": 0.9843776937705451, "learning_rate": 2.6468175779068494e-05, "loss": 0.2128, "step": 16760 }, { "epoch": 1.9875489149768766, "grad_norm": 0.8408952980226283, "learning_rate": 2.6465779554004595e-05, "loss": 0.1272, "step": 16761 }, { "epoch": 1.9876674967390016, "grad_norm": 0.830125877533524, "learning_rate": 2.6463383315427997e-05, "loss": 0.1891, "step": 16762 }, { "epoch": 1.9877860785011265, "grad_norm": 1.4026057165130161, "learning_rate": 2.6460987063360803e-05, "loss": 0.2902, "step": 16763 }, { "epoch": 1.9879046602632515, "grad_norm": 0.5481902966749679, "learning_rate": 2.64585907978251e-05, "loss": 0.1136, "step": 16764 }, { "epoch": 1.9880232420253765, "grad_norm": 1.0644283803307553, "learning_rate": 2.6456194518842987e-05, "loss": 0.2492, "step": 16765 }, { "epoch": 1.9881418237875015, "grad_norm": 1.044711391541753, "learning_rate": 2.6453798226436534e-05, "loss": 0.1963, "step": 16766 }, { "epoch": 1.9882604055496265, "grad_norm": 0.8084993382768519, "learning_rate": 2.6451401920627854e-05, "loss": 0.1626, "step": 16767 }, { "epoch": 1.9883789873117514, "grad_norm": 0.5437533695924358, "learning_rate": 2.644900560143902e-05, "loss": 0.1099, "step": 16768 }, { "epoch": 1.9884975690738764, "grad_norm": 0.6515448604857998, "learning_rate": 2.644660926889214e-05, "loss": 0.1169, "step": 16769 }, { "epoch": 1.9886161508360014, "grad_norm": 0.8858863734503022, "learning_rate": 2.6444212923009288e-05, "loss": 0.1546, "step": 16770 }, { "epoch": 1.9887347325981264, "grad_norm": 0.6699893289368085, "learning_rate": 2.6441816563812565e-05, "loss": 0.1718, "step": 16771 }, { "epoch": 1.9888533143602514, "grad_norm": 0.6022967993330868, "learning_rate": 2.6439420191324066e-05, "loss": 0.164, "step": 16772 }, { "epoch": 1.9889718961223763, "grad_norm": 1.1165596664933077, "learning_rate": 2.643702380556587e-05, "loss": 0.2574, "step": 16773 }, { "epoch": 1.9890904778845013, "grad_norm": 0.8387274703863548, "learning_rate": 2.6434627406560087e-05, "loss": 0.1762, "step": 16774 }, { "epoch": 1.9892090596466263, "grad_norm": 1.0249678575923673, "learning_rate": 2.6432230994328795e-05, "loss": 0.2283, "step": 16775 }, { "epoch": 1.9893276414087513, "grad_norm": 0.8030606994305773, "learning_rate": 2.6429834568894092e-05, "loss": 0.1825, "step": 16776 }, { "epoch": 1.9894462231708763, "grad_norm": 0.8409218269301227, "learning_rate": 2.642743813027806e-05, "loss": 0.2188, "step": 16777 }, { "epoch": 1.9895648049330013, "grad_norm": 0.8218039141698346, "learning_rate": 2.6425041678502804e-05, "loss": 0.1856, "step": 16778 }, { "epoch": 1.9896833866951265, "grad_norm": 0.6246852684401207, "learning_rate": 2.642264521359041e-05, "loss": 0.1206, "step": 16779 }, { "epoch": 1.9898019684572512, "grad_norm": 0.916595214589751, "learning_rate": 2.6420248735562974e-05, "loss": 0.2092, "step": 16780 }, { "epoch": 1.9899205502193764, "grad_norm": 0.7445435063982053, "learning_rate": 2.641785224444258e-05, "loss": 0.1499, "step": 16781 }, { "epoch": 1.9900391319815012, "grad_norm": 0.6528769747193705, "learning_rate": 2.6415455740251334e-05, "loss": 0.1655, "step": 16782 }, { "epoch": 1.9901577137436264, "grad_norm": 0.6637937587192689, "learning_rate": 2.6413059223011318e-05, "loss": 0.1371, "step": 16783 }, { "epoch": 1.9902762955057511, "grad_norm": 0.6584263811286387, "learning_rate": 2.6410662692744633e-05, "loss": 0.1299, "step": 16784 }, { "epoch": 1.9903948772678763, "grad_norm": 0.9684162862073348, "learning_rate": 2.640826614947336e-05, "loss": 0.1996, "step": 16785 }, { "epoch": 1.990513459030001, "grad_norm": 0.6312743360700395, "learning_rate": 2.6405869593219602e-05, "loss": 0.115, "step": 16786 }, { "epoch": 1.9906320407921263, "grad_norm": 1.0174965277611054, "learning_rate": 2.640347302400546e-05, "loss": 0.1971, "step": 16787 }, { "epoch": 1.990750622554251, "grad_norm": 0.9600383270110412, "learning_rate": 2.6401076441853007e-05, "loss": 0.2581, "step": 16788 }, { "epoch": 1.9908692043163763, "grad_norm": 0.896703965894882, "learning_rate": 2.639867984678435e-05, "loss": 0.1738, "step": 16789 }, { "epoch": 1.990987786078501, "grad_norm": 0.7427486514169015, "learning_rate": 2.6396283238821583e-05, "loss": 0.1297, "step": 16790 }, { "epoch": 1.9911063678406262, "grad_norm": 0.9751250788630055, "learning_rate": 2.6393886617986795e-05, "loss": 0.1704, "step": 16791 }, { "epoch": 1.991224949602751, "grad_norm": 0.6430364087319996, "learning_rate": 2.6391489984302076e-05, "loss": 0.1297, "step": 16792 }, { "epoch": 1.9913435313648762, "grad_norm": 1.0239276229655427, "learning_rate": 2.6389093337789538e-05, "loss": 0.213, "step": 16793 }, { "epoch": 1.991462113127001, "grad_norm": 0.5981605111391587, "learning_rate": 2.638669667847125e-05, "loss": 0.1282, "step": 16794 }, { "epoch": 1.9915806948891261, "grad_norm": 1.2506493868436903, "learning_rate": 2.6384300006369333e-05, "loss": 0.18, "step": 16795 }, { "epoch": 1.991699276651251, "grad_norm": 0.8255312838389638, "learning_rate": 2.6381903321505857e-05, "loss": 0.1564, "step": 16796 }, { "epoch": 1.991817858413376, "grad_norm": 0.8669727747430396, "learning_rate": 2.6379506623902934e-05, "loss": 0.1546, "step": 16797 }, { "epoch": 1.9919364401755009, "grad_norm": 0.7777618462447186, "learning_rate": 2.6377109913582642e-05, "loss": 0.1612, "step": 16798 }, { "epoch": 1.992055021937626, "grad_norm": 0.6665875064749528, "learning_rate": 2.6374713190567098e-05, "loss": 0.1309, "step": 16799 }, { "epoch": 1.9921736036997508, "grad_norm": 0.9883374136627054, "learning_rate": 2.6372316454878372e-05, "loss": 0.2091, "step": 16800 }, { "epoch": 1.992292185461876, "grad_norm": 0.6975006252181103, "learning_rate": 2.6369919706538576e-05, "loss": 0.1274, "step": 16801 }, { "epoch": 1.9924107672240008, "grad_norm": 0.7313476820301791, "learning_rate": 2.6367522945569796e-05, "loss": 0.1633, "step": 16802 }, { "epoch": 1.992529348986126, "grad_norm": 0.7172105782415245, "learning_rate": 2.6365126171994137e-05, "loss": 0.1096, "step": 16803 }, { "epoch": 1.992647930748251, "grad_norm": 0.8174314120476643, "learning_rate": 2.636272938583369e-05, "loss": 0.1554, "step": 16804 }, { "epoch": 1.992766512510376, "grad_norm": 0.8524341529360219, "learning_rate": 2.6360332587110543e-05, "loss": 0.1424, "step": 16805 }, { "epoch": 1.992885094272501, "grad_norm": 0.7567588635717409, "learning_rate": 2.6357935775846804e-05, "loss": 0.1751, "step": 16806 }, { "epoch": 1.993003676034626, "grad_norm": 0.9131409944394483, "learning_rate": 2.6355538952064558e-05, "loss": 0.2371, "step": 16807 }, { "epoch": 1.993122257796751, "grad_norm": 0.5373783287418633, "learning_rate": 2.6353142115785912e-05, "loss": 0.1026, "step": 16808 }, { "epoch": 1.9932408395588759, "grad_norm": 1.0629390420497449, "learning_rate": 2.6350745267032945e-05, "loss": 0.2258, "step": 16809 }, { "epoch": 1.9933594213210009, "grad_norm": 0.6825397849098519, "learning_rate": 2.634834840582777e-05, "loss": 0.1352, "step": 16810 }, { "epoch": 1.9934780030831258, "grad_norm": 0.8612489445615479, "learning_rate": 2.6345951532192475e-05, "loss": 0.1648, "step": 16811 }, { "epoch": 1.9935965848452508, "grad_norm": 1.0186199978027857, "learning_rate": 2.6343554646149154e-05, "loss": 0.1284, "step": 16812 }, { "epoch": 1.9937151666073758, "grad_norm": 0.8198833322047672, "learning_rate": 2.6341157747719903e-05, "loss": 0.1736, "step": 16813 }, { "epoch": 1.9938337483695008, "grad_norm": 0.874558041527294, "learning_rate": 2.6338760836926834e-05, "loss": 0.1833, "step": 16814 }, { "epoch": 1.9939523301316258, "grad_norm": 1.2924128910941703, "learning_rate": 2.633636391379202e-05, "loss": 0.2445, "step": 16815 }, { "epoch": 1.9940709118937507, "grad_norm": 0.5635895581138842, "learning_rate": 2.6333966978337575e-05, "loss": 0.1214, "step": 16816 }, { "epoch": 1.9941894936558757, "grad_norm": 0.9045732882162189, "learning_rate": 2.6331570030585596e-05, "loss": 0.229, "step": 16817 }, { "epoch": 1.9943080754180007, "grad_norm": 0.8586422930364269, "learning_rate": 2.6329173070558168e-05, "loss": 0.1753, "step": 16818 }, { "epoch": 1.9944266571801257, "grad_norm": 0.7327277010546452, "learning_rate": 2.6326776098277395e-05, "loss": 0.1553, "step": 16819 }, { "epoch": 1.9945452389422507, "grad_norm": 0.8561549109880168, "learning_rate": 2.6324379113765375e-05, "loss": 0.2134, "step": 16820 }, { "epoch": 1.9946638207043756, "grad_norm": 0.8751228099666745, "learning_rate": 2.6321982117044196e-05, "loss": 0.1844, "step": 16821 }, { "epoch": 1.9947824024665006, "grad_norm": 0.9225967994311773, "learning_rate": 2.631958510813597e-05, "loss": 0.2057, "step": 16822 }, { "epoch": 1.9949009842286256, "grad_norm": 0.8110724734236764, "learning_rate": 2.631718808706279e-05, "loss": 0.1717, "step": 16823 }, { "epoch": 1.9950195659907506, "grad_norm": 0.7455972795830244, "learning_rate": 2.6314791053846745e-05, "loss": 0.1639, "step": 16824 }, { "epoch": 1.9951381477528756, "grad_norm": 0.7686965587823957, "learning_rate": 2.6312394008509943e-05, "loss": 0.1843, "step": 16825 }, { "epoch": 1.9952567295150005, "grad_norm": 0.9241554586892446, "learning_rate": 2.6309996951074478e-05, "loss": 0.1487, "step": 16826 }, { "epoch": 1.9953753112771255, "grad_norm": 0.6156150184021076, "learning_rate": 2.630759988156245e-05, "loss": 0.1388, "step": 16827 }, { "epoch": 1.9954938930392505, "grad_norm": 1.176632745761282, "learning_rate": 2.630520279999595e-05, "loss": 0.2238, "step": 16828 }, { "epoch": 1.9956124748013755, "grad_norm": 1.2480846180744192, "learning_rate": 2.6302805706397077e-05, "loss": 0.2164, "step": 16829 }, { "epoch": 1.9957310565635007, "grad_norm": 0.6406532505794443, "learning_rate": 2.6300408600787942e-05, "loss": 0.1167, "step": 16830 }, { "epoch": 1.9958496383256255, "grad_norm": 0.8651108721580609, "learning_rate": 2.629801148319062e-05, "loss": 0.2578, "step": 16831 }, { "epoch": 1.9959682200877507, "grad_norm": 0.8490404979505096, "learning_rate": 2.6295614353627234e-05, "loss": 0.1792, "step": 16832 }, { "epoch": 1.9960868018498754, "grad_norm": 0.9466059656605132, "learning_rate": 2.629321721211987e-05, "loss": 0.1887, "step": 16833 }, { "epoch": 1.9962053836120006, "grad_norm": 1.0355705925665357, "learning_rate": 2.6290820058690636e-05, "loss": 0.2428, "step": 16834 }, { "epoch": 1.9963239653741254, "grad_norm": 0.6678457007275491, "learning_rate": 2.6288422893361615e-05, "loss": 0.1475, "step": 16835 }, { "epoch": 1.9964425471362506, "grad_norm": 0.555195645967404, "learning_rate": 2.6286025716154922e-05, "loss": 0.1228, "step": 16836 }, { "epoch": 1.9965611288983753, "grad_norm": 0.7579055650607659, "learning_rate": 2.6283628527092642e-05, "loss": 0.1927, "step": 16837 }, { "epoch": 1.9966797106605005, "grad_norm": 0.6966314226540752, "learning_rate": 2.6281231326196887e-05, "loss": 0.1407, "step": 16838 }, { "epoch": 1.9967982924226253, "grad_norm": 0.9971030956119412, "learning_rate": 2.6278834113489746e-05, "loss": 0.1888, "step": 16839 }, { "epoch": 1.9969168741847505, "grad_norm": 0.8288479408966057, "learning_rate": 2.6276436888993326e-05, "loss": 0.1863, "step": 16840 }, { "epoch": 1.9970354559468753, "grad_norm": 1.033365528780065, "learning_rate": 2.6274039652729725e-05, "loss": 0.2234, "step": 16841 }, { "epoch": 1.9971540377090005, "grad_norm": 0.8360276960406765, "learning_rate": 2.6271642404721035e-05, "loss": 0.1538, "step": 16842 }, { "epoch": 1.9972726194711252, "grad_norm": 0.863976590954877, "learning_rate": 2.6269245144989363e-05, "loss": 0.1523, "step": 16843 }, { "epoch": 1.9973912012332504, "grad_norm": 0.929357078077887, "learning_rate": 2.6266847873556804e-05, "loss": 0.2322, "step": 16844 }, { "epoch": 1.9975097829953752, "grad_norm": 1.1130274515089205, "learning_rate": 2.6264450590445467e-05, "loss": 0.2291, "step": 16845 }, { "epoch": 1.9976283647575004, "grad_norm": 0.6776861365466567, "learning_rate": 2.6262053295677445e-05, "loss": 0.1178, "step": 16846 }, { "epoch": 1.9977469465196251, "grad_norm": 0.6428839327571725, "learning_rate": 2.625965598927484e-05, "loss": 0.1338, "step": 16847 }, { "epoch": 1.9978655282817503, "grad_norm": 0.9888267107554723, "learning_rate": 2.625725867125975e-05, "loss": 0.1968, "step": 16848 }, { "epoch": 1.997984110043875, "grad_norm": 0.7202458671209022, "learning_rate": 2.625486134165428e-05, "loss": 0.1482, "step": 16849 }, { "epoch": 1.9981026918060003, "grad_norm": 0.8207856398553659, "learning_rate": 2.625246400048052e-05, "loss": 0.1817, "step": 16850 }, { "epoch": 1.998221273568125, "grad_norm": 0.8386813664870308, "learning_rate": 2.625006664776058e-05, "loss": 0.1406, "step": 16851 }, { "epoch": 1.9983398553302503, "grad_norm": 0.6168524400517565, "learning_rate": 2.624766928351656e-05, "loss": 0.1356, "step": 16852 }, { "epoch": 1.998458437092375, "grad_norm": 0.8324910929050228, "learning_rate": 2.6245271907770564e-05, "loss": 0.1608, "step": 16853 }, { "epoch": 1.9985770188545002, "grad_norm": 1.035864251144, "learning_rate": 2.6242874520544682e-05, "loss": 0.2238, "step": 16854 }, { "epoch": 1.9986956006166252, "grad_norm": 0.8609641547090066, "learning_rate": 2.6240477121861025e-05, "loss": 0.1411, "step": 16855 }, { "epoch": 1.9988141823787502, "grad_norm": 1.045482775372267, "learning_rate": 2.6238079711741685e-05, "loss": 0.2008, "step": 16856 }, { "epoch": 1.9989327641408752, "grad_norm": 0.869227913995403, "learning_rate": 2.6235682290208773e-05, "loss": 0.1744, "step": 16857 }, { "epoch": 1.9990513459030002, "grad_norm": 0.7130851428563042, "learning_rate": 2.6233284857284385e-05, "loss": 0.1363, "step": 16858 }, { "epoch": 1.9991699276651251, "grad_norm": 0.831475924730594, "learning_rate": 2.6230887412990616e-05, "loss": 0.1552, "step": 16859 }, { "epoch": 1.9992885094272501, "grad_norm": 0.7833658932336717, "learning_rate": 2.6228489957349584e-05, "loss": 0.1667, "step": 16860 }, { "epoch": 1.999407091189375, "grad_norm": 0.9088244395006139, "learning_rate": 2.6226092490383376e-05, "loss": 0.1663, "step": 16861 }, { "epoch": 1.9995256729515, "grad_norm": 0.9796285776540565, "learning_rate": 2.6223695012114096e-05, "loss": 0.1813, "step": 16862 }, { "epoch": 1.999644254713625, "grad_norm": 0.9920832083473846, "learning_rate": 2.6221297522563852e-05, "loss": 0.1768, "step": 16863 }, { "epoch": 1.99976283647575, "grad_norm": 0.611802599086474, "learning_rate": 2.6218900021754744e-05, "loss": 0.1417, "step": 16864 }, { "epoch": 1.999881418237875, "grad_norm": 0.9250190679278198, "learning_rate": 2.6216502509708874e-05, "loss": 0.1755, "step": 16865 }, { "epoch": 2.0, "grad_norm": 0.8563955982635383, "learning_rate": 2.621410498644834e-05, "loss": 0.1644, "step": 16866 }, { "epoch": 2.000118581762125, "grad_norm": 0.6961978689603201, "learning_rate": 2.621170745199525e-05, "loss": 0.1311, "step": 16867 }, { "epoch": 2.00023716352425, "grad_norm": 0.745372156249083, "learning_rate": 2.62093099063717e-05, "loss": 0.0948, "step": 16868 }, { "epoch": 2.000355745286375, "grad_norm": 0.6387425794802617, "learning_rate": 2.6206912349599792e-05, "loss": 0.0953, "step": 16869 }, { "epoch": 2.0004743270485, "grad_norm": 0.5243466703832472, "learning_rate": 2.6204514781701633e-05, "loss": 0.122, "step": 16870 }, { "epoch": 2.000592908810625, "grad_norm": 0.504872226471732, "learning_rate": 2.620211720269933e-05, "loss": 0.0822, "step": 16871 }, { "epoch": 2.00071149057275, "grad_norm": 0.7193061337083732, "learning_rate": 2.6199719612614974e-05, "loss": 0.1169, "step": 16872 }, { "epoch": 2.000830072334875, "grad_norm": 0.5317301181910219, "learning_rate": 2.619732201147068e-05, "loss": 0.0748, "step": 16873 }, { "epoch": 2.000948654097, "grad_norm": 0.7171824160879245, "learning_rate": 2.6194924399288544e-05, "loss": 0.1115, "step": 16874 }, { "epoch": 2.001067235859125, "grad_norm": 0.6838937274892781, "learning_rate": 2.6192526776090677e-05, "loss": 0.1172, "step": 16875 }, { "epoch": 2.00118581762125, "grad_norm": 0.5644426353016092, "learning_rate": 2.6190129141899165e-05, "loss": 0.0834, "step": 16876 }, { "epoch": 2.001304399383375, "grad_norm": 0.8755914648187667, "learning_rate": 2.6187731496736127e-05, "loss": 0.1384, "step": 16877 }, { "epoch": 2.0014229811454998, "grad_norm": 0.7667935085120312, "learning_rate": 2.618533384062366e-05, "loss": 0.1501, "step": 16878 }, { "epoch": 2.001541562907625, "grad_norm": 0.6808975428029741, "learning_rate": 2.6182936173583873e-05, "loss": 0.093, "step": 16879 }, { "epoch": 2.0016601446697497, "grad_norm": 0.8897320784869379, "learning_rate": 2.6180538495638858e-05, "loss": 0.1433, "step": 16880 }, { "epoch": 2.001778726431875, "grad_norm": 0.899711926367931, "learning_rate": 2.6178140806810726e-05, "loss": 0.105, "step": 16881 }, { "epoch": 2.0018973081939997, "grad_norm": 0.5447082510947037, "learning_rate": 2.6175743107121575e-05, "loss": 0.1056, "step": 16882 }, { "epoch": 2.002015889956125, "grad_norm": 0.5040033953458318, "learning_rate": 2.617334539659353e-05, "loss": 0.0726, "step": 16883 }, { "epoch": 2.0021344717182497, "grad_norm": 0.5740791764465832, "learning_rate": 2.617094767524867e-05, "loss": 0.1124, "step": 16884 }, { "epoch": 2.002253053480375, "grad_norm": 0.7866093668182068, "learning_rate": 2.6168549943109112e-05, "loss": 0.1329, "step": 16885 }, { "epoch": 2.0023716352424996, "grad_norm": 0.7653327687452609, "learning_rate": 2.6166152200196948e-05, "loss": 0.1159, "step": 16886 }, { "epoch": 2.002490217004625, "grad_norm": 0.901510355386172, "learning_rate": 2.6163754446534295e-05, "loss": 0.1092, "step": 16887 }, { "epoch": 2.0026087987667496, "grad_norm": 0.5326249617430503, "learning_rate": 2.6161356682143262e-05, "loss": 0.0815, "step": 16888 }, { "epoch": 2.002727380528875, "grad_norm": 0.9342038160847689, "learning_rate": 2.6158958907045933e-05, "loss": 0.128, "step": 16889 }, { "epoch": 2.0028459622909995, "grad_norm": 0.6225871461584435, "learning_rate": 2.6156561121264435e-05, "loss": 0.0757, "step": 16890 }, { "epoch": 2.0029645440531247, "grad_norm": 0.7912308725801209, "learning_rate": 2.6154163324820852e-05, "loss": 0.1172, "step": 16891 }, { "epoch": 2.0030831258152495, "grad_norm": 0.7221587995587107, "learning_rate": 2.61517655177373e-05, "loss": 0.1044, "step": 16892 }, { "epoch": 2.0032017075773747, "grad_norm": 0.8139702789420203, "learning_rate": 2.6149367700035882e-05, "loss": 0.1459, "step": 16893 }, { "epoch": 2.0033202893394995, "grad_norm": 0.6528844354137825, "learning_rate": 2.6146969871738707e-05, "loss": 0.1059, "step": 16894 }, { "epoch": 2.0034388711016247, "grad_norm": 0.9168180641870578, "learning_rate": 2.614457203286787e-05, "loss": 0.127, "step": 16895 }, { "epoch": 2.0035574528637494, "grad_norm": 0.7380234195757243, "learning_rate": 2.6142174183445494e-05, "loss": 0.1016, "step": 16896 }, { "epoch": 2.0036760346258746, "grad_norm": 0.7544639467536766, "learning_rate": 2.6139776323493657e-05, "loss": 0.0839, "step": 16897 }, { "epoch": 2.0037946163879994, "grad_norm": 0.785235094021379, "learning_rate": 2.6137378453034496e-05, "loss": 0.1063, "step": 16898 }, { "epoch": 2.0039131981501246, "grad_norm": 0.7398946334525268, "learning_rate": 2.613498057209009e-05, "loss": 0.1144, "step": 16899 }, { "epoch": 2.0040317799122493, "grad_norm": 1.2593451157567548, "learning_rate": 2.6132582680682556e-05, "loss": 0.121, "step": 16900 }, { "epoch": 2.0041503616743745, "grad_norm": 0.8177921352759894, "learning_rate": 2.6130184778834e-05, "loss": 0.1401, "step": 16901 }, { "epoch": 2.0042689434364993, "grad_norm": 0.7695475676350472, "learning_rate": 2.6127786866566522e-05, "loss": 0.1127, "step": 16902 }, { "epoch": 2.0043875251986245, "grad_norm": 0.8334307628914533, "learning_rate": 2.612538894390224e-05, "loss": 0.1146, "step": 16903 }, { "epoch": 2.0045061069607493, "grad_norm": 0.70994232412504, "learning_rate": 2.612299101086325e-05, "loss": 0.1155, "step": 16904 }, { "epoch": 2.0046246887228745, "grad_norm": 0.7284738355394564, "learning_rate": 2.612059306747166e-05, "loss": 0.0919, "step": 16905 }, { "epoch": 2.0047432704849992, "grad_norm": 0.7416262431404086, "learning_rate": 2.6118195113749572e-05, "loss": 0.0885, "step": 16906 }, { "epoch": 2.0048618522471244, "grad_norm": 0.5834293879172184, "learning_rate": 2.6115797149719108e-05, "loss": 0.0807, "step": 16907 }, { "epoch": 2.004980434009249, "grad_norm": 0.8234780990837411, "learning_rate": 2.6113399175402346e-05, "loss": 0.1029, "step": 16908 }, { "epoch": 2.0050990157713744, "grad_norm": 0.7454549156477106, "learning_rate": 2.6111001190821423e-05, "loss": 0.099, "step": 16909 }, { "epoch": 2.005217597533499, "grad_norm": 0.5295786043925863, "learning_rate": 2.610860319599842e-05, "loss": 0.0662, "step": 16910 }, { "epoch": 2.0053361792956244, "grad_norm": 0.6682096145104655, "learning_rate": 2.610620519095546e-05, "loss": 0.0942, "step": 16911 }, { "epoch": 2.005454761057749, "grad_norm": 0.7204573119520556, "learning_rate": 2.6103807175714645e-05, "loss": 0.1077, "step": 16912 }, { "epoch": 2.0055733428198743, "grad_norm": 0.593220691626613, "learning_rate": 2.6101409150298084e-05, "loss": 0.0782, "step": 16913 }, { "epoch": 2.005691924581999, "grad_norm": 0.6255777257618824, "learning_rate": 2.6099011114727874e-05, "loss": 0.0927, "step": 16914 }, { "epoch": 2.0058105063441243, "grad_norm": 0.6348903183774963, "learning_rate": 2.6096613069026134e-05, "loss": 0.0684, "step": 16915 }, { "epoch": 2.0059290881062495, "grad_norm": 1.0836262035527398, "learning_rate": 2.6094215013214967e-05, "loss": 0.1491, "step": 16916 }, { "epoch": 2.0060476698683742, "grad_norm": 0.8388435802924391, "learning_rate": 2.6091816947316478e-05, "loss": 0.126, "step": 16917 }, { "epoch": 2.0061662516304994, "grad_norm": 0.8930856505456013, "learning_rate": 2.6089418871352778e-05, "loss": 0.1287, "step": 16918 }, { "epoch": 2.006284833392624, "grad_norm": 0.5575754587405908, "learning_rate": 2.6087020785345968e-05, "loss": 0.1017, "step": 16919 }, { "epoch": 2.0064034151547494, "grad_norm": 0.786389780303627, "learning_rate": 2.608462268931816e-05, "loss": 0.1187, "step": 16920 }, { "epoch": 2.006521996916874, "grad_norm": 0.9317462419717074, "learning_rate": 2.608222458329146e-05, "loss": 0.1352, "step": 16921 }, { "epoch": 2.0066405786789994, "grad_norm": 0.5318646374341366, "learning_rate": 2.6079826467287984e-05, "loss": 0.0925, "step": 16922 }, { "epoch": 2.006759160441124, "grad_norm": 0.5517292711088767, "learning_rate": 2.6077428341329824e-05, "loss": 0.066, "step": 16923 }, { "epoch": 2.0068777422032493, "grad_norm": 0.6548734587211145, "learning_rate": 2.60750302054391e-05, "loss": 0.0814, "step": 16924 }, { "epoch": 2.006996323965374, "grad_norm": 0.5459847375615196, "learning_rate": 2.6072632059637915e-05, "loss": 0.075, "step": 16925 }, { "epoch": 2.0071149057274993, "grad_norm": 0.8304234699798361, "learning_rate": 2.6070233903948378e-05, "loss": 0.1118, "step": 16926 }, { "epoch": 2.007233487489624, "grad_norm": 0.7391247659650374, "learning_rate": 2.6067835738392593e-05, "loss": 0.083, "step": 16927 }, { "epoch": 2.0073520692517492, "grad_norm": 0.7700225731492324, "learning_rate": 2.6065437562992677e-05, "loss": 0.1019, "step": 16928 }, { "epoch": 2.007470651013874, "grad_norm": 0.6944742249104041, "learning_rate": 2.6063039377770732e-05, "loss": 0.1049, "step": 16929 }, { "epoch": 2.007589232775999, "grad_norm": 0.7206789976277662, "learning_rate": 2.6060641182748864e-05, "loss": 0.0959, "step": 16930 }, { "epoch": 2.007707814538124, "grad_norm": 1.0297303843752335, "learning_rate": 2.6058242977949183e-05, "loss": 0.1272, "step": 16931 }, { "epoch": 2.007826396300249, "grad_norm": 0.8692985466088227, "learning_rate": 2.6055844763393806e-05, "loss": 0.1252, "step": 16932 }, { "epoch": 2.007944978062374, "grad_norm": 0.8752688196611741, "learning_rate": 2.6053446539104832e-05, "loss": 0.1217, "step": 16933 }, { "epoch": 2.008063559824499, "grad_norm": 0.6180421802264305, "learning_rate": 2.6051048305104374e-05, "loss": 0.0787, "step": 16934 }, { "epoch": 2.008182141586624, "grad_norm": 0.6783292623190534, "learning_rate": 2.6048650061414544e-05, "loss": 0.0888, "step": 16935 }, { "epoch": 2.008300723348749, "grad_norm": 0.726376165528946, "learning_rate": 2.6046251808057435e-05, "loss": 0.1078, "step": 16936 }, { "epoch": 2.008419305110874, "grad_norm": 0.8334341221253349, "learning_rate": 2.604385354505518e-05, "loss": 0.1425, "step": 16937 }, { "epoch": 2.008537886872999, "grad_norm": 0.7396686224603697, "learning_rate": 2.6041455272429867e-05, "loss": 0.121, "step": 16938 }, { "epoch": 2.008656468635124, "grad_norm": 0.9477300834051182, "learning_rate": 2.603905699020362e-05, "loss": 0.1234, "step": 16939 }, { "epoch": 2.008775050397249, "grad_norm": 0.5973177473525001, "learning_rate": 2.6036658698398536e-05, "loss": 0.0595, "step": 16940 }, { "epoch": 2.0088936321593738, "grad_norm": 0.6643940249242061, "learning_rate": 2.603426039703673e-05, "loss": 0.1175, "step": 16941 }, { "epoch": 2.009012213921499, "grad_norm": 0.8267686158779364, "learning_rate": 2.6031862086140306e-05, "loss": 0.0987, "step": 16942 }, { "epoch": 2.0091307956836237, "grad_norm": 0.5168702090147972, "learning_rate": 2.602946376573139e-05, "loss": 0.0735, "step": 16943 }, { "epoch": 2.009249377445749, "grad_norm": 0.7053517735392276, "learning_rate": 2.6027065435832076e-05, "loss": 0.0885, "step": 16944 }, { "epoch": 2.0093679592078737, "grad_norm": 0.8633472622962132, "learning_rate": 2.6024667096464477e-05, "loss": 0.1258, "step": 16945 }, { "epoch": 2.009486540969999, "grad_norm": 0.6426944626628582, "learning_rate": 2.6022268747650706e-05, "loss": 0.0908, "step": 16946 }, { "epoch": 2.0096051227321237, "grad_norm": 0.6513382780759567, "learning_rate": 2.601987038941287e-05, "loss": 0.1095, "step": 16947 }, { "epoch": 2.009723704494249, "grad_norm": 0.8472548328547815, "learning_rate": 2.6017472021773083e-05, "loss": 0.1098, "step": 16948 }, { "epoch": 2.0098422862563736, "grad_norm": 0.6882126867928771, "learning_rate": 2.6015073644753447e-05, "loss": 0.0974, "step": 16949 }, { "epoch": 2.009960868018499, "grad_norm": 0.6144364098396409, "learning_rate": 2.6012675258376078e-05, "loss": 0.0861, "step": 16950 }, { "epoch": 2.0100794497806236, "grad_norm": 0.9937533218659163, "learning_rate": 2.601027686266308e-05, "loss": 0.1544, "step": 16951 }, { "epoch": 2.010198031542749, "grad_norm": 0.8312559883003887, "learning_rate": 2.6007878457636582e-05, "loss": 0.138, "step": 16952 }, { "epoch": 2.0103166133048735, "grad_norm": 0.7000010387459265, "learning_rate": 2.600548004331867e-05, "loss": 0.1106, "step": 16953 }, { "epoch": 2.0104351950669987, "grad_norm": 0.6835837669410336, "learning_rate": 2.6003081619731466e-05, "loss": 0.0978, "step": 16954 }, { "epoch": 2.0105537768291235, "grad_norm": 0.8152892936270512, "learning_rate": 2.6000683186897078e-05, "loss": 0.0927, "step": 16955 }, { "epoch": 2.0106723585912487, "grad_norm": 0.6634205507938633, "learning_rate": 2.5998284744837627e-05, "loss": 0.1013, "step": 16956 }, { "epoch": 2.0107909403533735, "grad_norm": 0.835426075139618, "learning_rate": 2.5995886293575207e-05, "loss": 0.1144, "step": 16957 }, { "epoch": 2.0109095221154987, "grad_norm": 0.8987520586653677, "learning_rate": 2.5993487833131936e-05, "loss": 0.0876, "step": 16958 }, { "epoch": 2.0110281038776234, "grad_norm": 0.7296205630944472, "learning_rate": 2.599108936352993e-05, "loss": 0.1204, "step": 16959 }, { "epoch": 2.0111466856397486, "grad_norm": 0.6054068570723142, "learning_rate": 2.5988690884791287e-05, "loss": 0.1027, "step": 16960 }, { "epoch": 2.0112652674018734, "grad_norm": 0.8810517763727529, "learning_rate": 2.5986292396938132e-05, "loss": 0.1293, "step": 16961 }, { "epoch": 2.0113838491639986, "grad_norm": 0.6499481785379851, "learning_rate": 2.5983893899992572e-05, "loss": 0.086, "step": 16962 }, { "epoch": 2.0115024309261234, "grad_norm": 0.8824483877288267, "learning_rate": 2.598149539397672e-05, "loss": 0.0988, "step": 16963 }, { "epoch": 2.0116210126882486, "grad_norm": 0.6579386180120341, "learning_rate": 2.5979096878912675e-05, "loss": 0.1026, "step": 16964 }, { "epoch": 2.0117395944503733, "grad_norm": 1.032103425834114, "learning_rate": 2.5976698354822565e-05, "loss": 0.1414, "step": 16965 }, { "epoch": 2.0118581762124985, "grad_norm": 0.8918200846715295, "learning_rate": 2.5974299821728492e-05, "loss": 0.1189, "step": 16966 }, { "epoch": 2.0119767579746237, "grad_norm": 0.8254702687106407, "learning_rate": 2.5971901279652573e-05, "loss": 0.143, "step": 16967 }, { "epoch": 2.0120953397367485, "grad_norm": 0.7809869838898381, "learning_rate": 2.5969502728616906e-05, "loss": 0.1094, "step": 16968 }, { "epoch": 2.0122139214988737, "grad_norm": 0.6433097795537194, "learning_rate": 2.5967104168643625e-05, "loss": 0.1079, "step": 16969 }, { "epoch": 2.0123325032609984, "grad_norm": 0.8662513051917791, "learning_rate": 2.5964705599754817e-05, "loss": 0.1097, "step": 16970 }, { "epoch": 2.0124510850231236, "grad_norm": 0.7389450364307818, "learning_rate": 2.596230702197261e-05, "loss": 0.1205, "step": 16971 }, { "epoch": 2.0125696667852484, "grad_norm": 0.6250607579765874, "learning_rate": 2.5959908435319113e-05, "loss": 0.081, "step": 16972 }, { "epoch": 2.0126882485473736, "grad_norm": 0.48101555982249194, "learning_rate": 2.595750983981644e-05, "loss": 0.0702, "step": 16973 }, { "epoch": 2.0128068303094984, "grad_norm": 0.7505440314580127, "learning_rate": 2.59551112354867e-05, "loss": 0.1214, "step": 16974 }, { "epoch": 2.0129254120716236, "grad_norm": 0.7699049564552406, "learning_rate": 2.5952712622352006e-05, "loss": 0.1284, "step": 16975 }, { "epoch": 2.0130439938337483, "grad_norm": 0.9778149577013496, "learning_rate": 2.5950314000434473e-05, "loss": 0.1248, "step": 16976 }, { "epoch": 2.0131625755958735, "grad_norm": 0.8255202814870506, "learning_rate": 2.5947915369756198e-05, "loss": 0.106, "step": 16977 }, { "epoch": 2.0132811573579983, "grad_norm": 0.6821848788801772, "learning_rate": 2.594551673033932e-05, "loss": 0.0858, "step": 16978 }, { "epoch": 2.0133997391201235, "grad_norm": 0.572912726212391, "learning_rate": 2.594311808220593e-05, "loss": 0.0941, "step": 16979 }, { "epoch": 2.0135183208822482, "grad_norm": 0.8513278352284833, "learning_rate": 2.5940719425378146e-05, "loss": 0.1154, "step": 16980 }, { "epoch": 2.0136369026443734, "grad_norm": 0.8342271752063198, "learning_rate": 2.5938320759878082e-05, "loss": 0.1003, "step": 16981 }, { "epoch": 2.013755484406498, "grad_norm": 0.9680128147651842, "learning_rate": 2.593592208572786e-05, "loss": 0.1181, "step": 16982 }, { "epoch": 2.0138740661686234, "grad_norm": 0.7527110966688894, "learning_rate": 2.5933523402949576e-05, "loss": 0.1241, "step": 16983 }, { "epoch": 2.013992647930748, "grad_norm": 0.6050118490138805, "learning_rate": 2.5931124711565357e-05, "loss": 0.0773, "step": 16984 }, { "epoch": 2.0141112296928734, "grad_norm": 0.6675047260071221, "learning_rate": 2.5928726011597303e-05, "loss": 0.086, "step": 16985 }, { "epoch": 2.014229811454998, "grad_norm": 0.809397439483032, "learning_rate": 2.5926327303067538e-05, "loss": 0.0952, "step": 16986 }, { "epoch": 2.0143483932171233, "grad_norm": 0.9165082921960497, "learning_rate": 2.5923928585998177e-05, "loss": 0.0967, "step": 16987 }, { "epoch": 2.014466974979248, "grad_norm": 0.9305151506498842, "learning_rate": 2.5921529860411316e-05, "loss": 0.1387, "step": 16988 }, { "epoch": 2.0145855567413733, "grad_norm": 0.6480431659725122, "learning_rate": 2.5919131126329088e-05, "loss": 0.0941, "step": 16989 }, { "epoch": 2.014704138503498, "grad_norm": 0.6974491318931529, "learning_rate": 2.5916732383773596e-05, "loss": 0.1011, "step": 16990 }, { "epoch": 2.0148227202656233, "grad_norm": 0.6662945695628625, "learning_rate": 2.591433363276695e-05, "loss": 0.0805, "step": 16991 }, { "epoch": 2.014941302027748, "grad_norm": 0.8279482362281937, "learning_rate": 2.5911934873331272e-05, "loss": 0.1164, "step": 16992 }, { "epoch": 2.015059883789873, "grad_norm": 1.060580728359988, "learning_rate": 2.590953610548868e-05, "loss": 0.1066, "step": 16993 }, { "epoch": 2.015178465551998, "grad_norm": 0.8758924257011432, "learning_rate": 2.5907137329261272e-05, "loss": 0.1034, "step": 16994 }, { "epoch": 2.015297047314123, "grad_norm": 0.7941194629503767, "learning_rate": 2.5904738544671174e-05, "loss": 0.1102, "step": 16995 }, { "epoch": 2.015415629076248, "grad_norm": 0.8765848638423117, "learning_rate": 2.5902339751740495e-05, "loss": 0.1073, "step": 16996 }, { "epoch": 2.015534210838373, "grad_norm": 0.7156224532582327, "learning_rate": 2.5899940950491352e-05, "loss": 0.0937, "step": 16997 }, { "epoch": 2.015652792600498, "grad_norm": 0.6402336079564108, "learning_rate": 2.5897542140945858e-05, "loss": 0.0933, "step": 16998 }, { "epoch": 2.015771374362623, "grad_norm": 0.763940236974313, "learning_rate": 2.589514332312612e-05, "loss": 0.1127, "step": 16999 }, { "epoch": 2.015889956124748, "grad_norm": 0.8704257632058873, "learning_rate": 2.5892744497054262e-05, "loss": 0.1213, "step": 17000 }, { "epoch": 2.016008537886873, "grad_norm": 0.565320939896494, "learning_rate": 2.589034566275239e-05, "loss": 0.0746, "step": 17001 }, { "epoch": 2.016127119648998, "grad_norm": 0.8070066555212387, "learning_rate": 2.5887946820242633e-05, "loss": 0.1045, "step": 17002 }, { "epoch": 2.016245701411123, "grad_norm": 0.9749089117483943, "learning_rate": 2.5885547969547085e-05, "loss": 0.1444, "step": 17003 }, { "epoch": 2.016364283173248, "grad_norm": 0.8377268095765578, "learning_rate": 2.5883149110687877e-05, "loss": 0.1057, "step": 17004 }, { "epoch": 2.016482864935373, "grad_norm": 0.6322534696494635, "learning_rate": 2.5880750243687114e-05, "loss": 0.0834, "step": 17005 }, { "epoch": 2.0166014466974977, "grad_norm": 1.477683817095557, "learning_rate": 2.5878351368566918e-05, "loss": 0.1374, "step": 17006 }, { "epoch": 2.016720028459623, "grad_norm": 0.6282295733433491, "learning_rate": 2.587595248534939e-05, "loss": 0.077, "step": 17007 }, { "epoch": 2.0168386102217477, "grad_norm": 0.9184628926302872, "learning_rate": 2.5873553594056665e-05, "loss": 0.1066, "step": 17008 }, { "epoch": 2.016957191983873, "grad_norm": 0.7572574993974743, "learning_rate": 2.587115469471084e-05, "loss": 0.0878, "step": 17009 }, { "epoch": 2.0170757737459977, "grad_norm": 0.6563275916124504, "learning_rate": 2.5868755787334044e-05, "loss": 0.0867, "step": 17010 }, { "epoch": 2.017194355508123, "grad_norm": 0.6450573351207184, "learning_rate": 2.5866356871948377e-05, "loss": 0.1021, "step": 17011 }, { "epoch": 2.0173129372702476, "grad_norm": 0.7782744407286468, "learning_rate": 2.586395794857597e-05, "loss": 0.0953, "step": 17012 }, { "epoch": 2.017431519032373, "grad_norm": 0.6505224118571163, "learning_rate": 2.5861559017238924e-05, "loss": 0.0935, "step": 17013 }, { "epoch": 2.0175501007944976, "grad_norm": 0.9775341457861001, "learning_rate": 2.5859160077959366e-05, "loss": 0.1087, "step": 17014 }, { "epoch": 2.017668682556623, "grad_norm": 0.7507399204993604, "learning_rate": 2.58567611307594e-05, "loss": 0.118, "step": 17015 }, { "epoch": 2.0177872643187476, "grad_norm": 0.802512166205142, "learning_rate": 2.5854362175661145e-05, "loss": 0.0956, "step": 17016 }, { "epoch": 2.0179058460808728, "grad_norm": 0.8483026718060154, "learning_rate": 2.5851963212686725e-05, "loss": 0.1259, "step": 17017 }, { "epoch": 2.018024427842998, "grad_norm": 0.5599421421210834, "learning_rate": 2.5849564241858243e-05, "loss": 0.0843, "step": 17018 }, { "epoch": 2.0181430096051227, "grad_norm": 0.8836350504547238, "learning_rate": 2.5847165263197833e-05, "loss": 0.1218, "step": 17019 }, { "epoch": 2.018261591367248, "grad_norm": 0.7979396013056426, "learning_rate": 2.5844766276727582e-05, "loss": 0.104, "step": 17020 }, { "epoch": 2.0183801731293727, "grad_norm": 0.7283250772029296, "learning_rate": 2.5842367282469626e-05, "loss": 0.1064, "step": 17021 }, { "epoch": 2.018498754891498, "grad_norm": 0.7416022670645755, "learning_rate": 2.5839968280446082e-05, "loss": 0.0947, "step": 17022 }, { "epoch": 2.0186173366536226, "grad_norm": 0.7418149138658839, "learning_rate": 2.5837569270679058e-05, "loss": 0.0916, "step": 17023 }, { "epoch": 2.018735918415748, "grad_norm": 0.7180702601576594, "learning_rate": 2.583517025319067e-05, "loss": 0.0911, "step": 17024 }, { "epoch": 2.0188545001778726, "grad_norm": 0.9536080717815969, "learning_rate": 2.5832771228003043e-05, "loss": 0.1259, "step": 17025 }, { "epoch": 2.018973081939998, "grad_norm": 1.2895852461760804, "learning_rate": 2.583037219513828e-05, "loss": 0.1497, "step": 17026 }, { "epoch": 2.0190916637021226, "grad_norm": 0.5890402150356825, "learning_rate": 2.5827973154618512e-05, "loss": 0.0622, "step": 17027 }, { "epoch": 2.0192102454642478, "grad_norm": 0.8736120384860733, "learning_rate": 2.582557410646584e-05, "loss": 0.1066, "step": 17028 }, { "epoch": 2.0193288272263725, "grad_norm": 0.6284803573145943, "learning_rate": 2.582317505070238e-05, "loss": 0.0836, "step": 17029 }, { "epoch": 2.0194474089884977, "grad_norm": 0.8646189838002291, "learning_rate": 2.5820775987350264e-05, "loss": 0.1192, "step": 17030 }, { "epoch": 2.0195659907506225, "grad_norm": 1.1344278692567753, "learning_rate": 2.5818376916431598e-05, "loss": 0.1298, "step": 17031 }, { "epoch": 2.0196845725127477, "grad_norm": 0.5802589463332644, "learning_rate": 2.5815977837968504e-05, "loss": 0.0871, "step": 17032 }, { "epoch": 2.0198031542748724, "grad_norm": 0.561298136400072, "learning_rate": 2.581357875198309e-05, "loss": 0.0801, "step": 17033 }, { "epoch": 2.0199217360369977, "grad_norm": 0.7154329905210174, "learning_rate": 2.5811179658497488e-05, "loss": 0.1078, "step": 17034 }, { "epoch": 2.0200403177991224, "grad_norm": 1.1173325290434155, "learning_rate": 2.580878055753379e-05, "loss": 0.1233, "step": 17035 }, { "epoch": 2.0201588995612476, "grad_norm": 0.7587198296802913, "learning_rate": 2.5806381449114137e-05, "loss": 0.1036, "step": 17036 }, { "epoch": 2.0202774813233724, "grad_norm": 0.7454429412774327, "learning_rate": 2.580398233326063e-05, "loss": 0.0895, "step": 17037 }, { "epoch": 2.0203960630854976, "grad_norm": 0.9363438058079177, "learning_rate": 2.58015832099954e-05, "loss": 0.1122, "step": 17038 }, { "epoch": 2.0205146448476223, "grad_norm": 0.6651623728141232, "learning_rate": 2.5799184079340544e-05, "loss": 0.1084, "step": 17039 }, { "epoch": 2.0206332266097475, "grad_norm": 0.6002753393589734, "learning_rate": 2.5796784941318197e-05, "loss": 0.0917, "step": 17040 }, { "epoch": 2.0207518083718723, "grad_norm": 0.981664567124827, "learning_rate": 2.5794385795950464e-05, "loss": 0.1069, "step": 17041 }, { "epoch": 2.0208703901339975, "grad_norm": 0.6437059364630277, "learning_rate": 2.579198664325948e-05, "loss": 0.0824, "step": 17042 }, { "epoch": 2.0209889718961223, "grad_norm": 0.8412214987496069, "learning_rate": 2.5789587483267342e-05, "loss": 0.1041, "step": 17043 }, { "epoch": 2.0211075536582475, "grad_norm": 0.7276524318429696, "learning_rate": 2.5787188315996175e-05, "loss": 0.0888, "step": 17044 }, { "epoch": 2.021226135420372, "grad_norm": 0.695936480006316, "learning_rate": 2.5784789141468103e-05, "loss": 0.1058, "step": 17045 }, { "epoch": 2.0213447171824974, "grad_norm": 0.5626316256599394, "learning_rate": 2.578238995970523e-05, "loss": 0.0762, "step": 17046 }, { "epoch": 2.021463298944622, "grad_norm": 0.6230322931026746, "learning_rate": 2.5779990770729695e-05, "loss": 0.0755, "step": 17047 }, { "epoch": 2.0215818807067474, "grad_norm": 0.6651746926003953, "learning_rate": 2.5777591574563587e-05, "loss": 0.0838, "step": 17048 }, { "epoch": 2.021700462468872, "grad_norm": 0.6452315594034443, "learning_rate": 2.577519237122904e-05, "loss": 0.0989, "step": 17049 }, { "epoch": 2.0218190442309973, "grad_norm": 0.9349372324677323, "learning_rate": 2.577279316074817e-05, "loss": 0.0991, "step": 17050 }, { "epoch": 2.021937625993122, "grad_norm": 0.8716240620074638, "learning_rate": 2.5770393943143107e-05, "loss": 0.1086, "step": 17051 }, { "epoch": 2.0220562077552473, "grad_norm": 0.8973891395558506, "learning_rate": 2.576799471843594e-05, "loss": 0.0961, "step": 17052 }, { "epoch": 2.022174789517372, "grad_norm": 0.7805505614285784, "learning_rate": 2.5765595486648813e-05, "loss": 0.0946, "step": 17053 }, { "epoch": 2.0222933712794973, "grad_norm": 0.8853996509020174, "learning_rate": 2.5763196247803832e-05, "loss": 0.1239, "step": 17054 }, { "epoch": 2.022411953041622, "grad_norm": 0.8497478953595783, "learning_rate": 2.576079700192312e-05, "loss": 0.1131, "step": 17055 }, { "epoch": 2.0225305348037472, "grad_norm": 0.7257950660912152, "learning_rate": 2.575839774902879e-05, "loss": 0.0909, "step": 17056 }, { "epoch": 2.022649116565872, "grad_norm": 0.6268970624268199, "learning_rate": 2.5755998489142967e-05, "loss": 0.0958, "step": 17057 }, { "epoch": 2.022767698327997, "grad_norm": 0.9753271143104818, "learning_rate": 2.5753599222287762e-05, "loss": 0.1125, "step": 17058 }, { "epoch": 2.022886280090122, "grad_norm": 0.7515901901542646, "learning_rate": 2.5751199948485287e-05, "loss": 0.0867, "step": 17059 }, { "epoch": 2.023004861852247, "grad_norm": 0.9194005373700264, "learning_rate": 2.574880066775768e-05, "loss": 0.0927, "step": 17060 }, { "epoch": 2.023123443614372, "grad_norm": 0.6945759128399535, "learning_rate": 2.5746401380127046e-05, "loss": 0.0592, "step": 17061 }, { "epoch": 2.023242025376497, "grad_norm": 0.7696287877206168, "learning_rate": 2.574400208561551e-05, "loss": 0.1148, "step": 17062 }, { "epoch": 2.023360607138622, "grad_norm": 0.7532940315124429, "learning_rate": 2.5741602784245183e-05, "loss": 0.11, "step": 17063 }, { "epoch": 2.023479188900747, "grad_norm": 0.6937599794131871, "learning_rate": 2.5739203476038192e-05, "loss": 0.073, "step": 17064 }, { "epoch": 2.023597770662872, "grad_norm": 0.7791104663188599, "learning_rate": 2.573680416101665e-05, "loss": 0.1074, "step": 17065 }, { "epoch": 2.023716352424997, "grad_norm": 0.9057167020333378, "learning_rate": 2.573440483920268e-05, "loss": 0.1016, "step": 17066 }, { "epoch": 2.0238349341871222, "grad_norm": 0.6316059368218953, "learning_rate": 2.573200551061839e-05, "loss": 0.0809, "step": 17067 }, { "epoch": 2.023953515949247, "grad_norm": 0.7822805528735209, "learning_rate": 2.5729606175285914e-05, "loss": 0.0861, "step": 17068 }, { "epoch": 2.024072097711372, "grad_norm": 0.74660995038701, "learning_rate": 2.5727206833227357e-05, "loss": 0.1041, "step": 17069 }, { "epoch": 2.024190679473497, "grad_norm": 0.8356690181588445, "learning_rate": 2.5724807484464845e-05, "loss": 0.1059, "step": 17070 }, { "epoch": 2.024309261235622, "grad_norm": 0.5339107080348244, "learning_rate": 2.5722408129020504e-05, "loss": 0.0587, "step": 17071 }, { "epoch": 2.024427842997747, "grad_norm": 0.9804231617734867, "learning_rate": 2.5720008766916437e-05, "loss": 0.1427, "step": 17072 }, { "epoch": 2.024546424759872, "grad_norm": 0.9604653151741845, "learning_rate": 2.571760939817478e-05, "loss": 0.1233, "step": 17073 }, { "epoch": 2.024665006521997, "grad_norm": 0.6884276691468444, "learning_rate": 2.5715210022817643e-05, "loss": 0.1042, "step": 17074 }, { "epoch": 2.024783588284122, "grad_norm": 0.9060697744579413, "learning_rate": 2.5712810640867147e-05, "loss": 0.1141, "step": 17075 }, { "epoch": 2.024902170046247, "grad_norm": 0.7057465923283026, "learning_rate": 2.5710411252345407e-05, "loss": 0.0938, "step": 17076 }, { "epoch": 2.025020751808372, "grad_norm": 0.6883731034488817, "learning_rate": 2.570801185727455e-05, "loss": 0.0869, "step": 17077 }, { "epoch": 2.025139333570497, "grad_norm": 1.186062895007446, "learning_rate": 2.570561245567669e-05, "loss": 0.163, "step": 17078 }, { "epoch": 2.025257915332622, "grad_norm": 1.1543371666190576, "learning_rate": 2.5703213047573948e-05, "loss": 0.1467, "step": 17079 }, { "epoch": 2.0253764970947468, "grad_norm": 0.6572502179547971, "learning_rate": 2.570081363298844e-05, "loss": 0.083, "step": 17080 }, { "epoch": 2.025495078856872, "grad_norm": 0.7886250622203809, "learning_rate": 2.56984142119423e-05, "loss": 0.0935, "step": 17081 }, { "epoch": 2.0256136606189967, "grad_norm": 0.994791917373475, "learning_rate": 2.5696014784457634e-05, "loss": 0.1332, "step": 17082 }, { "epoch": 2.025732242381122, "grad_norm": 0.5648207338701364, "learning_rate": 2.5693615350556564e-05, "loss": 0.0603, "step": 17083 }, { "epoch": 2.0258508241432467, "grad_norm": 0.7229275810734652, "learning_rate": 2.5691215910261206e-05, "loss": 0.0928, "step": 17084 }, { "epoch": 2.025969405905372, "grad_norm": 0.8276013725490831, "learning_rate": 2.5688816463593696e-05, "loss": 0.1175, "step": 17085 }, { "epoch": 2.0260879876674966, "grad_norm": 0.8444169687651416, "learning_rate": 2.5686417010576136e-05, "loss": 0.1238, "step": 17086 }, { "epoch": 2.026206569429622, "grad_norm": 0.7475774983235246, "learning_rate": 2.5684017551230648e-05, "loss": 0.0842, "step": 17087 }, { "epoch": 2.0263251511917466, "grad_norm": 0.5182330941711295, "learning_rate": 2.5681618085579368e-05, "loss": 0.0663, "step": 17088 }, { "epoch": 2.026443732953872, "grad_norm": 0.6347464389758741, "learning_rate": 2.5679218613644395e-05, "loss": 0.1043, "step": 17089 }, { "epoch": 2.0265623147159966, "grad_norm": 0.7578994837101011, "learning_rate": 2.567681913544786e-05, "loss": 0.0842, "step": 17090 }, { "epoch": 2.0266808964781218, "grad_norm": 0.6320586104307965, "learning_rate": 2.567441965101189e-05, "loss": 0.0746, "step": 17091 }, { "epoch": 2.0267994782402465, "grad_norm": 0.8724461601409338, "learning_rate": 2.5672020160358596e-05, "loss": 0.1172, "step": 17092 }, { "epoch": 2.0269180600023717, "grad_norm": 1.2592429030621068, "learning_rate": 2.5669620663510097e-05, "loss": 0.1544, "step": 17093 }, { "epoch": 2.0270366417644965, "grad_norm": 0.7668679154467282, "learning_rate": 2.566722116048852e-05, "loss": 0.1044, "step": 17094 }, { "epoch": 2.0271552235266217, "grad_norm": 0.559205659210123, "learning_rate": 2.5664821651315972e-05, "loss": 0.0798, "step": 17095 }, { "epoch": 2.0272738052887465, "grad_norm": 0.7220715382530938, "learning_rate": 2.5662422136014596e-05, "loss": 0.1012, "step": 17096 }, { "epoch": 2.0273923870508717, "grad_norm": 0.7657363291980417, "learning_rate": 2.56600226146065e-05, "loss": 0.0993, "step": 17097 }, { "epoch": 2.0275109688129964, "grad_norm": 0.6784183079101309, "learning_rate": 2.56576230871138e-05, "loss": 0.0932, "step": 17098 }, { "epoch": 2.0276295505751216, "grad_norm": 0.6030045868708649, "learning_rate": 2.565522355355862e-05, "loss": 0.0652, "step": 17099 }, { "epoch": 2.0277481323372464, "grad_norm": 0.9313354223738955, "learning_rate": 2.5652824013963082e-05, "loss": 0.1054, "step": 17100 }, { "epoch": 2.0278667140993716, "grad_norm": 0.7352117462217475, "learning_rate": 2.565042446834931e-05, "loss": 0.0764, "step": 17101 }, { "epoch": 2.0279852958614963, "grad_norm": 1.0323174354291909, "learning_rate": 2.5648024916739417e-05, "loss": 0.1216, "step": 17102 }, { "epoch": 2.0281038776236215, "grad_norm": 0.967034565508246, "learning_rate": 2.5645625359155538e-05, "loss": 0.1168, "step": 17103 }, { "epoch": 2.0282224593857463, "grad_norm": 0.6200292497675817, "learning_rate": 2.564322579561978e-05, "loss": 0.0936, "step": 17104 }, { "epoch": 2.0283410411478715, "grad_norm": 0.9083567889595318, "learning_rate": 2.5640826226154275e-05, "loss": 0.1175, "step": 17105 }, { "epoch": 2.0284596229099963, "grad_norm": 0.5955925553708699, "learning_rate": 2.563842665078113e-05, "loss": 0.0917, "step": 17106 }, { "epoch": 2.0285782046721215, "grad_norm": 0.9420046345779765, "learning_rate": 2.563602706952248e-05, "loss": 0.1374, "step": 17107 }, { "epoch": 2.0286967864342462, "grad_norm": 0.8942214771122347, "learning_rate": 2.563362748240043e-05, "loss": 0.1136, "step": 17108 }, { "epoch": 2.0288153681963714, "grad_norm": 0.7427830062636183, "learning_rate": 2.563122788943712e-05, "loss": 0.0828, "step": 17109 }, { "epoch": 2.028933949958496, "grad_norm": 0.8469970320479016, "learning_rate": 2.562882829065466e-05, "loss": 0.0907, "step": 17110 }, { "epoch": 2.0290525317206214, "grad_norm": 0.7356860877597104, "learning_rate": 2.5626428686075175e-05, "loss": 0.1006, "step": 17111 }, { "epoch": 2.029171113482746, "grad_norm": 0.6072532689891982, "learning_rate": 2.5624029075720785e-05, "loss": 0.0976, "step": 17112 }, { "epoch": 2.0292896952448713, "grad_norm": 0.8215295268604407, "learning_rate": 2.5621629459613618e-05, "loss": 0.0934, "step": 17113 }, { "epoch": 2.029408277006996, "grad_norm": 1.0828270828400735, "learning_rate": 2.561922983777578e-05, "loss": 0.1445, "step": 17114 }, { "epoch": 2.0295268587691213, "grad_norm": 0.7043023402986945, "learning_rate": 2.5616830210229407e-05, "loss": 0.1019, "step": 17115 }, { "epoch": 2.0296454405312465, "grad_norm": 0.6252632262102591, "learning_rate": 2.5614430576996623e-05, "loss": 0.0991, "step": 17116 }, { "epoch": 2.0297640222933713, "grad_norm": 0.5759798820493216, "learning_rate": 2.5612030938099536e-05, "loss": 0.0827, "step": 17117 }, { "epoch": 2.0298826040554965, "grad_norm": 0.7912524568474645, "learning_rate": 2.5609631293560275e-05, "loss": 0.1095, "step": 17118 }, { "epoch": 2.0300011858176212, "grad_norm": 0.6802165957015239, "learning_rate": 2.5607231643400957e-05, "loss": 0.1006, "step": 17119 }, { "epoch": 2.0301197675797464, "grad_norm": 0.8040302901074956, "learning_rate": 2.5604831987643714e-05, "loss": 0.1141, "step": 17120 }, { "epoch": 2.030238349341871, "grad_norm": 0.8420660064069628, "learning_rate": 2.5602432326310656e-05, "loss": 0.1258, "step": 17121 }, { "epoch": 2.0303569311039964, "grad_norm": 0.5430076816338275, "learning_rate": 2.5600032659423918e-05, "loss": 0.0672, "step": 17122 }, { "epoch": 2.030475512866121, "grad_norm": 0.803751424029278, "learning_rate": 2.5597632987005604e-05, "loss": 0.0996, "step": 17123 }, { "epoch": 2.0305940946282464, "grad_norm": 0.9154898645706518, "learning_rate": 2.559523330907786e-05, "loss": 0.1164, "step": 17124 }, { "epoch": 2.030712676390371, "grad_norm": 0.8258304849405087, "learning_rate": 2.559283362566278e-05, "loss": 0.1213, "step": 17125 }, { "epoch": 2.0308312581524963, "grad_norm": 0.8546657257625709, "learning_rate": 2.5590433936782514e-05, "loss": 0.1054, "step": 17126 }, { "epoch": 2.030949839914621, "grad_norm": 0.7237655126132667, "learning_rate": 2.5588034242459162e-05, "loss": 0.0914, "step": 17127 }, { "epoch": 2.0310684216767463, "grad_norm": 0.9373188261096604, "learning_rate": 2.558563454271486e-05, "loss": 0.1359, "step": 17128 }, { "epoch": 2.031187003438871, "grad_norm": 0.7212723887970613, "learning_rate": 2.5583234837571724e-05, "loss": 0.0868, "step": 17129 }, { "epoch": 2.0313055852009962, "grad_norm": 0.697681672936011, "learning_rate": 2.558083512705188e-05, "loss": 0.0935, "step": 17130 }, { "epoch": 2.031424166963121, "grad_norm": 0.856077545768736, "learning_rate": 2.5578435411177448e-05, "loss": 0.1098, "step": 17131 }, { "epoch": 2.031542748725246, "grad_norm": 0.711742310593575, "learning_rate": 2.5576035689970545e-05, "loss": 0.0902, "step": 17132 }, { "epoch": 2.031661330487371, "grad_norm": 0.7216678140190265, "learning_rate": 2.5573635963453314e-05, "loss": 0.1119, "step": 17133 }, { "epoch": 2.031779912249496, "grad_norm": 0.6582327566660553, "learning_rate": 2.557123623164785e-05, "loss": 0.0823, "step": 17134 }, { "epoch": 2.031898494011621, "grad_norm": 0.7436828718539309, "learning_rate": 2.5568836494576294e-05, "loss": 0.0794, "step": 17135 }, { "epoch": 2.032017075773746, "grad_norm": 0.45305465666756656, "learning_rate": 2.556643675226076e-05, "loss": 0.0664, "step": 17136 }, { "epoch": 2.032135657535871, "grad_norm": 0.8837176322523292, "learning_rate": 2.556403700472338e-05, "loss": 0.1107, "step": 17137 }, { "epoch": 2.032254239297996, "grad_norm": 0.8724355849660478, "learning_rate": 2.5561637251986258e-05, "loss": 0.1027, "step": 17138 }, { "epoch": 2.032372821060121, "grad_norm": 1.0413564499154007, "learning_rate": 2.5559237494071535e-05, "loss": 0.1266, "step": 17139 }, { "epoch": 2.032491402822246, "grad_norm": 0.7421550628781671, "learning_rate": 2.5556837731001326e-05, "loss": 0.0967, "step": 17140 }, { "epoch": 2.032609984584371, "grad_norm": 0.7064835562595498, "learning_rate": 2.555443796279777e-05, "loss": 0.0809, "step": 17141 }, { "epoch": 2.032728566346496, "grad_norm": 1.0000569459316584, "learning_rate": 2.5552038189482957e-05, "loss": 0.1331, "step": 17142 }, { "epoch": 2.0328471481086208, "grad_norm": 0.6163324208097111, "learning_rate": 2.5549638411079042e-05, "loss": 0.0924, "step": 17143 }, { "epoch": 2.032965729870746, "grad_norm": 0.560162115620009, "learning_rate": 2.5547238627608126e-05, "loss": 0.0656, "step": 17144 }, { "epoch": 2.0330843116328707, "grad_norm": 0.8187552082806973, "learning_rate": 2.5544838839092346e-05, "loss": 0.1132, "step": 17145 }, { "epoch": 2.033202893394996, "grad_norm": 0.8143407419419377, "learning_rate": 2.5542439045553823e-05, "loss": 0.099, "step": 17146 }, { "epoch": 2.0333214751571207, "grad_norm": 0.9975604018190043, "learning_rate": 2.5540039247014668e-05, "loss": 0.1116, "step": 17147 }, { "epoch": 2.033440056919246, "grad_norm": 0.89708968692309, "learning_rate": 2.5537639443497023e-05, "loss": 0.1135, "step": 17148 }, { "epoch": 2.0335586386813707, "grad_norm": 0.7374388256173335, "learning_rate": 2.5535239635022995e-05, "loss": 0.0895, "step": 17149 }, { "epoch": 2.033677220443496, "grad_norm": 0.5945262275342809, "learning_rate": 2.553283982161472e-05, "loss": 0.0959, "step": 17150 }, { "epoch": 2.0337958022056206, "grad_norm": 0.5949568784428877, "learning_rate": 2.5530440003294302e-05, "loss": 0.1014, "step": 17151 }, { "epoch": 2.033914383967746, "grad_norm": 1.2070718924110628, "learning_rate": 2.5528040180083897e-05, "loss": 0.1002, "step": 17152 }, { "epoch": 2.0340329657298706, "grad_norm": 0.5481130166178332, "learning_rate": 2.5525640352005593e-05, "loss": 0.0851, "step": 17153 }, { "epoch": 2.034151547491996, "grad_norm": 0.6650548282519411, "learning_rate": 2.552324051908154e-05, "loss": 0.0921, "step": 17154 }, { "epoch": 2.0342701292541205, "grad_norm": 0.6579508545418454, "learning_rate": 2.5520840681333846e-05, "loss": 0.0668, "step": 17155 }, { "epoch": 2.0343887110162457, "grad_norm": 0.7726965330373178, "learning_rate": 2.551844083878464e-05, "loss": 0.108, "step": 17156 }, { "epoch": 2.0345072927783705, "grad_norm": 0.5769942489225839, "learning_rate": 2.5516040991456046e-05, "loss": 0.0715, "step": 17157 }, { "epoch": 2.0346258745404957, "grad_norm": 0.7019123335956361, "learning_rate": 2.5513641139370182e-05, "loss": 0.0989, "step": 17158 }, { "epoch": 2.0347444563026205, "grad_norm": 0.899457040598042, "learning_rate": 2.551124128254918e-05, "loss": 0.1282, "step": 17159 }, { "epoch": 2.0348630380647457, "grad_norm": 0.6136690650020217, "learning_rate": 2.5508841421015163e-05, "loss": 0.0731, "step": 17160 }, { "epoch": 2.0349816198268704, "grad_norm": 0.7111649250780857, "learning_rate": 2.550644155479025e-05, "loss": 0.0895, "step": 17161 }, { "epoch": 2.0351002015889956, "grad_norm": 0.9837016002678934, "learning_rate": 2.5504041683896567e-05, "loss": 0.1444, "step": 17162 }, { "epoch": 2.0352187833511204, "grad_norm": 0.8094281431542377, "learning_rate": 2.5501641808356243e-05, "loss": 0.1238, "step": 17163 }, { "epoch": 2.0353373651132456, "grad_norm": 0.8933273521801496, "learning_rate": 2.5499241928191386e-05, "loss": 0.1082, "step": 17164 }, { "epoch": 2.035455946875371, "grad_norm": 0.8627677110838211, "learning_rate": 2.549684204342414e-05, "loss": 0.0961, "step": 17165 }, { "epoch": 2.0355745286374956, "grad_norm": 0.937447163237741, "learning_rate": 2.5494442154076614e-05, "loss": 0.1265, "step": 17166 }, { "epoch": 2.0356931103996208, "grad_norm": 1.1245233445450844, "learning_rate": 2.5492042260170944e-05, "loss": 0.173, "step": 17167 }, { "epoch": 2.0358116921617455, "grad_norm": 1.2233203299505409, "learning_rate": 2.5489642361729244e-05, "loss": 0.1115, "step": 17168 }, { "epoch": 2.0359302739238707, "grad_norm": 0.6846928329742754, "learning_rate": 2.5487242458773634e-05, "loss": 0.1007, "step": 17169 }, { "epoch": 2.0360488556859955, "grad_norm": 0.77680738697427, "learning_rate": 2.5484842551326256e-05, "loss": 0.1348, "step": 17170 }, { "epoch": 2.0361674374481207, "grad_norm": 0.7328886391236876, "learning_rate": 2.548244263940922e-05, "loss": 0.116, "step": 17171 }, { "epoch": 2.0362860192102454, "grad_norm": 0.4123414097727911, "learning_rate": 2.5480042723044656e-05, "loss": 0.0618, "step": 17172 }, { "epoch": 2.0364046009723706, "grad_norm": 0.9044851453408387, "learning_rate": 2.5477642802254686e-05, "loss": 0.106, "step": 17173 }, { "epoch": 2.0365231827344954, "grad_norm": 1.0090451154892517, "learning_rate": 2.547524287706144e-05, "loss": 0.1077, "step": 17174 }, { "epoch": 2.0366417644966206, "grad_norm": 0.8476140649731685, "learning_rate": 2.547284294748703e-05, "loss": 0.0804, "step": 17175 }, { "epoch": 2.0367603462587454, "grad_norm": 1.2644333767774454, "learning_rate": 2.5470443013553598e-05, "loss": 0.1589, "step": 17176 }, { "epoch": 2.0368789280208706, "grad_norm": 0.7963339222909953, "learning_rate": 2.5468043075283243e-05, "loss": 0.0838, "step": 17177 }, { "epoch": 2.0369975097829953, "grad_norm": 0.7740753392343336, "learning_rate": 2.5465643132698113e-05, "loss": 0.1304, "step": 17178 }, { "epoch": 2.0371160915451205, "grad_norm": 0.574792918004117, "learning_rate": 2.5463243185820324e-05, "loss": 0.0595, "step": 17179 }, { "epoch": 2.0372346733072453, "grad_norm": 0.7161064856029704, "learning_rate": 2.5460843234672006e-05, "loss": 0.0688, "step": 17180 }, { "epoch": 2.0373532550693705, "grad_norm": 0.6910893052919906, "learning_rate": 2.5458443279275268e-05, "loss": 0.0979, "step": 17181 }, { "epoch": 2.0374718368314952, "grad_norm": 0.8132331231290478, "learning_rate": 2.5456043319652256e-05, "loss": 0.1155, "step": 17182 }, { "epoch": 2.0375904185936204, "grad_norm": 0.7534945017637467, "learning_rate": 2.545364335582508e-05, "loss": 0.0905, "step": 17183 }, { "epoch": 2.037709000355745, "grad_norm": 0.7840895937700697, "learning_rate": 2.545124338781587e-05, "loss": 0.0935, "step": 17184 }, { "epoch": 2.0378275821178704, "grad_norm": 0.5410301749022228, "learning_rate": 2.5448843415646745e-05, "loss": 0.0644, "step": 17185 }, { "epoch": 2.037946163879995, "grad_norm": 0.7203645657491574, "learning_rate": 2.544644343933984e-05, "loss": 0.0733, "step": 17186 }, { "epoch": 2.0380647456421204, "grad_norm": 1.4618114902460237, "learning_rate": 2.5444043458917267e-05, "loss": 0.0759, "step": 17187 }, { "epoch": 2.038183327404245, "grad_norm": 0.5680300174097448, "learning_rate": 2.5441643474401156e-05, "loss": 0.083, "step": 17188 }, { "epoch": 2.0383019091663703, "grad_norm": 0.73499607337744, "learning_rate": 2.5439243485813636e-05, "loss": 0.0937, "step": 17189 }, { "epoch": 2.038420490928495, "grad_norm": 0.6183150148114197, "learning_rate": 2.543684349317683e-05, "loss": 0.1038, "step": 17190 }, { "epoch": 2.0385390726906203, "grad_norm": 0.7717527362636167, "learning_rate": 2.543444349651287e-05, "loss": 0.095, "step": 17191 }, { "epoch": 2.038657654452745, "grad_norm": 0.6274099893015204, "learning_rate": 2.5432043495843866e-05, "loss": 0.0859, "step": 17192 }, { "epoch": 2.0387762362148703, "grad_norm": 0.5591399586915927, "learning_rate": 2.5429643491191955e-05, "loss": 0.0826, "step": 17193 }, { "epoch": 2.038894817976995, "grad_norm": 0.7108724385944604, "learning_rate": 2.5427243482579255e-05, "loss": 0.0962, "step": 17194 }, { "epoch": 2.03901339973912, "grad_norm": 0.738325371324134, "learning_rate": 2.54248434700279e-05, "loss": 0.1019, "step": 17195 }, { "epoch": 2.039131981501245, "grad_norm": 0.7696069687778948, "learning_rate": 2.542244345356e-05, "loss": 0.1049, "step": 17196 }, { "epoch": 2.03925056326337, "grad_norm": 0.9183024538949882, "learning_rate": 2.54200434331977e-05, "loss": 0.1367, "step": 17197 }, { "epoch": 2.039369145025495, "grad_norm": 0.8919507929824677, "learning_rate": 2.5417643408963106e-05, "loss": 0.1224, "step": 17198 }, { "epoch": 2.03948772678762, "grad_norm": 0.8155695047315147, "learning_rate": 2.541524338087835e-05, "loss": 0.1043, "step": 17199 }, { "epoch": 2.039606308549745, "grad_norm": 0.9296601033692349, "learning_rate": 2.5412843348965566e-05, "loss": 0.1354, "step": 17200 }, { "epoch": 2.03972489031187, "grad_norm": 0.6131784579344348, "learning_rate": 2.541044331324687e-05, "loss": 0.0731, "step": 17201 }, { "epoch": 2.039843472073995, "grad_norm": 0.5760572333107017, "learning_rate": 2.540804327374439e-05, "loss": 0.0745, "step": 17202 }, { "epoch": 2.03996205383612, "grad_norm": 0.8100182908553717, "learning_rate": 2.5405643230480247e-05, "loss": 0.1027, "step": 17203 }, { "epoch": 2.040080635598245, "grad_norm": 0.9540105050339577, "learning_rate": 2.540324318347658e-05, "loss": 0.1067, "step": 17204 }, { "epoch": 2.04019921736037, "grad_norm": 0.5488115776568724, "learning_rate": 2.5400843132755498e-05, "loss": 0.0654, "step": 17205 }, { "epoch": 2.040317799122495, "grad_norm": 0.7577541249916874, "learning_rate": 2.539844307833914e-05, "loss": 0.1181, "step": 17206 }, { "epoch": 2.04043638088462, "grad_norm": 0.7088967797834963, "learning_rate": 2.539604302024962e-05, "loss": 0.0877, "step": 17207 }, { "epoch": 2.0405549626467447, "grad_norm": 1.05132792706795, "learning_rate": 2.539364295850907e-05, "loss": 0.1641, "step": 17208 }, { "epoch": 2.04067354440887, "grad_norm": 0.8111291263289154, "learning_rate": 2.5391242893139617e-05, "loss": 0.1215, "step": 17209 }, { "epoch": 2.0407921261709947, "grad_norm": 0.8870372342542985, "learning_rate": 2.5388842824163384e-05, "loss": 0.0896, "step": 17210 }, { "epoch": 2.04091070793312, "grad_norm": 0.8114259758363742, "learning_rate": 2.538644275160249e-05, "loss": 0.1219, "step": 17211 }, { "epoch": 2.0410292896952447, "grad_norm": 0.7929628907671117, "learning_rate": 2.538404267547908e-05, "loss": 0.0932, "step": 17212 }, { "epoch": 2.04114787145737, "grad_norm": 0.8667705238107806, "learning_rate": 2.538164259581526e-05, "loss": 0.1275, "step": 17213 }, { "epoch": 2.0412664532194946, "grad_norm": 0.8673871174423586, "learning_rate": 2.5379242512633167e-05, "loss": 0.095, "step": 17214 }, { "epoch": 2.04138503498162, "grad_norm": 0.9381284563251776, "learning_rate": 2.5376842425954918e-05, "loss": 0.1235, "step": 17215 }, { "epoch": 2.0415036167437446, "grad_norm": 0.8355597687483332, "learning_rate": 2.5374442335802645e-05, "loss": 0.0951, "step": 17216 }, { "epoch": 2.04162219850587, "grad_norm": 0.8024581956754345, "learning_rate": 2.537204224219848e-05, "loss": 0.0981, "step": 17217 }, { "epoch": 2.041740780267995, "grad_norm": 0.5734878581488076, "learning_rate": 2.536964214516453e-05, "loss": 0.1069, "step": 17218 }, { "epoch": 2.0418593620301198, "grad_norm": 0.4953034237645289, "learning_rate": 2.5367242044722938e-05, "loss": 0.0751, "step": 17219 }, { "epoch": 2.041977943792245, "grad_norm": 0.7903399546548958, "learning_rate": 2.5364841940895823e-05, "loss": 0.0878, "step": 17220 }, { "epoch": 2.0420965255543697, "grad_norm": 0.9503575303070972, "learning_rate": 2.536244183370532e-05, "loss": 0.1178, "step": 17221 }, { "epoch": 2.042215107316495, "grad_norm": 0.6148463090628069, "learning_rate": 2.536004172317354e-05, "loss": 0.0849, "step": 17222 }, { "epoch": 2.0423336890786197, "grad_norm": 0.8389076791776786, "learning_rate": 2.5357641609322625e-05, "loss": 0.0946, "step": 17223 }, { "epoch": 2.042452270840745, "grad_norm": 0.8170796702119986, "learning_rate": 2.5355241492174687e-05, "loss": 0.1244, "step": 17224 }, { "epoch": 2.0425708526028696, "grad_norm": 0.5144558116630039, "learning_rate": 2.5352841371751855e-05, "loss": 0.062, "step": 17225 }, { "epoch": 2.042689434364995, "grad_norm": 0.8269906688445805, "learning_rate": 2.535044124807626e-05, "loss": 0.135, "step": 17226 }, { "epoch": 2.0428080161271196, "grad_norm": 0.6930781588355727, "learning_rate": 2.5348041121170036e-05, "loss": 0.0858, "step": 17227 }, { "epoch": 2.042926597889245, "grad_norm": 0.8007216634349194, "learning_rate": 2.534564099105529e-05, "loss": 0.0996, "step": 17228 }, { "epoch": 2.0430451796513696, "grad_norm": 0.8381035860951902, "learning_rate": 2.534324085775416e-05, "loss": 0.104, "step": 17229 }, { "epoch": 2.0431637614134948, "grad_norm": 0.9335938603043002, "learning_rate": 2.5340840721288767e-05, "loss": 0.1302, "step": 17230 }, { "epoch": 2.0432823431756195, "grad_norm": 0.8966432668567896, "learning_rate": 2.533844058168124e-05, "loss": 0.1387, "step": 17231 }, { "epoch": 2.0434009249377447, "grad_norm": 0.7738285089363771, "learning_rate": 2.5336040438953712e-05, "loss": 0.0996, "step": 17232 }, { "epoch": 2.0435195066998695, "grad_norm": 0.9371198833730369, "learning_rate": 2.53336402931283e-05, "loss": 0.1135, "step": 17233 }, { "epoch": 2.0436380884619947, "grad_norm": 0.7201626254657149, "learning_rate": 2.5331240144227138e-05, "loss": 0.0974, "step": 17234 }, { "epoch": 2.0437566702241194, "grad_norm": 0.7445293250278847, "learning_rate": 2.5328839992272342e-05, "loss": 0.0987, "step": 17235 }, { "epoch": 2.0438752519862446, "grad_norm": 0.8162464115155144, "learning_rate": 2.5326439837286053e-05, "loss": 0.088, "step": 17236 }, { "epoch": 2.0439938337483694, "grad_norm": 0.9822702970230561, "learning_rate": 2.5324039679290378e-05, "loss": 0.1235, "step": 17237 }, { "epoch": 2.0441124155104946, "grad_norm": 0.8237309809111716, "learning_rate": 2.532163951830746e-05, "loss": 0.087, "step": 17238 }, { "epoch": 2.0442309972726194, "grad_norm": 0.710659450243611, "learning_rate": 2.5319239354359415e-05, "loss": 0.0978, "step": 17239 }, { "epoch": 2.0443495790347446, "grad_norm": 0.8297034296333, "learning_rate": 2.5316839187468383e-05, "loss": 0.0965, "step": 17240 }, { "epoch": 2.0444681607968693, "grad_norm": 0.6933196632210565, "learning_rate": 2.5314439017656476e-05, "loss": 0.0879, "step": 17241 }, { "epoch": 2.0445867425589945, "grad_norm": 0.6467023829003546, "learning_rate": 2.5312038844945836e-05, "loss": 0.0741, "step": 17242 }, { "epoch": 2.0447053243211193, "grad_norm": 0.7794495041392944, "learning_rate": 2.530963866935857e-05, "loss": 0.1094, "step": 17243 }, { "epoch": 2.0448239060832445, "grad_norm": 0.5531254766616353, "learning_rate": 2.5307238490916818e-05, "loss": 0.0704, "step": 17244 }, { "epoch": 2.0449424878453693, "grad_norm": 0.9129993805151213, "learning_rate": 2.530483830964271e-05, "loss": 0.113, "step": 17245 }, { "epoch": 2.0450610696074945, "grad_norm": 0.5672802099551186, "learning_rate": 2.5302438125558354e-05, "loss": 0.0807, "step": 17246 }, { "epoch": 2.045179651369619, "grad_norm": 0.6827076468992048, "learning_rate": 2.53000379386859e-05, "loss": 0.0801, "step": 17247 }, { "epoch": 2.0452982331317444, "grad_norm": 0.8603059305043104, "learning_rate": 2.529763774904746e-05, "loss": 0.0987, "step": 17248 }, { "epoch": 2.045416814893869, "grad_norm": 1.1044416377059272, "learning_rate": 2.5295237556665163e-05, "loss": 0.1361, "step": 17249 }, { "epoch": 2.0455353966559944, "grad_norm": 0.5844603037224718, "learning_rate": 2.529283736156114e-05, "loss": 0.0604, "step": 17250 }, { "epoch": 2.045653978418119, "grad_norm": 0.7444316748625215, "learning_rate": 2.529043716375752e-05, "loss": 0.0861, "step": 17251 }, { "epoch": 2.0457725601802443, "grad_norm": 1.0653554613999525, "learning_rate": 2.528803696327642e-05, "loss": 0.147, "step": 17252 }, { "epoch": 2.045891141942369, "grad_norm": 0.6378941383447395, "learning_rate": 2.5285636760139973e-05, "loss": 0.0681, "step": 17253 }, { "epoch": 2.0460097237044943, "grad_norm": 0.6628789154445944, "learning_rate": 2.5283236554370308e-05, "loss": 0.0919, "step": 17254 }, { "epoch": 2.046128305466619, "grad_norm": 0.6387138604432548, "learning_rate": 2.528083634598955e-05, "loss": 0.086, "step": 17255 }, { "epoch": 2.0462468872287443, "grad_norm": 0.9159851150955914, "learning_rate": 2.5278436135019824e-05, "loss": 0.1419, "step": 17256 }, { "epoch": 2.046365468990869, "grad_norm": 0.9577667548978641, "learning_rate": 2.5276035921483253e-05, "loss": 0.1441, "step": 17257 }, { "epoch": 2.046484050752994, "grad_norm": 0.8545859831779087, "learning_rate": 2.527363570540197e-05, "loss": 0.1128, "step": 17258 }, { "epoch": 2.046602632515119, "grad_norm": 0.9779277245345986, "learning_rate": 2.5271235486798107e-05, "loss": 0.1311, "step": 17259 }, { "epoch": 2.046721214277244, "grad_norm": 0.6590918317056579, "learning_rate": 2.5268835265693785e-05, "loss": 0.1172, "step": 17260 }, { "epoch": 2.046839796039369, "grad_norm": 0.7050818995517045, "learning_rate": 2.5266435042111132e-05, "loss": 0.0845, "step": 17261 }, { "epoch": 2.046958377801494, "grad_norm": 0.6686723449738298, "learning_rate": 2.5264034816072274e-05, "loss": 0.1, "step": 17262 }, { "epoch": 2.047076959563619, "grad_norm": 0.8261229084007741, "learning_rate": 2.526163458759933e-05, "loss": 0.1193, "step": 17263 }, { "epoch": 2.047195541325744, "grad_norm": 0.8283100946640986, "learning_rate": 2.525923435671445e-05, "loss": 0.1139, "step": 17264 }, { "epoch": 2.047314123087869, "grad_norm": 0.4235627386627182, "learning_rate": 2.525683412343974e-05, "loss": 0.0619, "step": 17265 }, { "epoch": 2.047432704849994, "grad_norm": 0.6684450782990227, "learning_rate": 2.5254433887797336e-05, "loss": 0.1073, "step": 17266 }, { "epoch": 2.0475512866121193, "grad_norm": 0.8844674247488972, "learning_rate": 2.525203364980936e-05, "loss": 0.1068, "step": 17267 }, { "epoch": 2.047669868374244, "grad_norm": 0.6548932855823383, "learning_rate": 2.5249633409497946e-05, "loss": 0.0659, "step": 17268 }, { "epoch": 2.0477884501363692, "grad_norm": 0.806759899845544, "learning_rate": 2.5247233166885214e-05, "loss": 0.1016, "step": 17269 }, { "epoch": 2.047907031898494, "grad_norm": 0.6849290243785915, "learning_rate": 2.52448329219933e-05, "loss": 0.1067, "step": 17270 }, { "epoch": 2.048025613660619, "grad_norm": 0.6678225180566302, "learning_rate": 2.5242432674844324e-05, "loss": 0.0833, "step": 17271 }, { "epoch": 2.048144195422744, "grad_norm": 0.6709001309126991, "learning_rate": 2.5240032425460414e-05, "loss": 0.0975, "step": 17272 }, { "epoch": 2.048262777184869, "grad_norm": 0.9820974719667647, "learning_rate": 2.5237632173863705e-05, "loss": 0.1282, "step": 17273 }, { "epoch": 2.048381358946994, "grad_norm": 0.8551823750784705, "learning_rate": 2.523523192007632e-05, "loss": 0.1002, "step": 17274 }, { "epoch": 2.048499940709119, "grad_norm": 0.8527961322160159, "learning_rate": 2.5232831664120382e-05, "loss": 0.1173, "step": 17275 }, { "epoch": 2.048618522471244, "grad_norm": 0.5650993200792589, "learning_rate": 2.523043140601802e-05, "loss": 0.089, "step": 17276 }, { "epoch": 2.048737104233369, "grad_norm": 0.8539906279720711, "learning_rate": 2.522803114579137e-05, "loss": 0.1489, "step": 17277 }, { "epoch": 2.048855685995494, "grad_norm": 0.9337977110155599, "learning_rate": 2.5225630883462543e-05, "loss": 0.122, "step": 17278 }, { "epoch": 2.048974267757619, "grad_norm": 1.232632667251937, "learning_rate": 2.522323061905368e-05, "loss": 0.1568, "step": 17279 }, { "epoch": 2.049092849519744, "grad_norm": 0.6523927614928772, "learning_rate": 2.52208303525869e-05, "loss": 0.1005, "step": 17280 }, { "epoch": 2.049211431281869, "grad_norm": 0.5378886319855706, "learning_rate": 2.5218430084084345e-05, "loss": 0.0697, "step": 17281 }, { "epoch": 2.0493300130439938, "grad_norm": 0.6918385525423768, "learning_rate": 2.5216029813568122e-05, "loss": 0.0782, "step": 17282 }, { "epoch": 2.049448594806119, "grad_norm": 0.7226585950906734, "learning_rate": 2.5213629541060375e-05, "loss": 0.0772, "step": 17283 }, { "epoch": 2.0495671765682437, "grad_norm": 0.5853777654080909, "learning_rate": 2.5211229266583226e-05, "loss": 0.0806, "step": 17284 }, { "epoch": 2.049685758330369, "grad_norm": 0.801103574635759, "learning_rate": 2.52088289901588e-05, "loss": 0.0991, "step": 17285 }, { "epoch": 2.0498043400924937, "grad_norm": 0.9565901047513836, "learning_rate": 2.5206428711809226e-05, "loss": 0.1492, "step": 17286 }, { "epoch": 2.049922921854619, "grad_norm": 0.5594351624436538, "learning_rate": 2.520402843155663e-05, "loss": 0.0613, "step": 17287 }, { "epoch": 2.0500415036167436, "grad_norm": 0.5615061941742623, "learning_rate": 2.5201628149423147e-05, "loss": 0.081, "step": 17288 }, { "epoch": 2.050160085378869, "grad_norm": 0.49190214833464163, "learning_rate": 2.519922786543089e-05, "loss": 0.0778, "step": 17289 }, { "epoch": 2.0502786671409936, "grad_norm": 0.5361735252613351, "learning_rate": 2.5196827579602013e-05, "loss": 0.0759, "step": 17290 }, { "epoch": 2.050397248903119, "grad_norm": 0.8431625119314979, "learning_rate": 2.5194427291958617e-05, "loss": 0.1066, "step": 17291 }, { "epoch": 2.0505158306652436, "grad_norm": 0.814619379733209, "learning_rate": 2.5192027002522843e-05, "loss": 0.1017, "step": 17292 }, { "epoch": 2.0506344124273688, "grad_norm": 1.0009646459090147, "learning_rate": 2.518962671131681e-05, "loss": 0.1482, "step": 17293 }, { "epoch": 2.0507529941894935, "grad_norm": 0.6488660960195891, "learning_rate": 2.5187226418362664e-05, "loss": 0.073, "step": 17294 }, { "epoch": 2.0508715759516187, "grad_norm": 0.859331075777397, "learning_rate": 2.518482612368251e-05, "loss": 0.12, "step": 17295 }, { "epoch": 2.0509901577137435, "grad_norm": 0.7769438307464144, "learning_rate": 2.518242582729849e-05, "loss": 0.089, "step": 17296 }, { "epoch": 2.0511087394758687, "grad_norm": 1.1180904894558377, "learning_rate": 2.518002552923272e-05, "loss": 0.1472, "step": 17297 }, { "epoch": 2.0512273212379935, "grad_norm": 0.6975673398570656, "learning_rate": 2.517762522950734e-05, "loss": 0.0917, "step": 17298 }, { "epoch": 2.0513459030001187, "grad_norm": 0.5700265467605818, "learning_rate": 2.5175224928144468e-05, "loss": 0.0911, "step": 17299 }, { "epoch": 2.0514644847622434, "grad_norm": 0.9473984891778904, "learning_rate": 2.5172824625166246e-05, "loss": 0.1152, "step": 17300 }, { "epoch": 2.0515830665243686, "grad_norm": 0.7007925952210723, "learning_rate": 2.517042432059479e-05, "loss": 0.0752, "step": 17301 }, { "epoch": 2.0517016482864934, "grad_norm": 0.7857191563515793, "learning_rate": 2.5168024014452224e-05, "loss": 0.1068, "step": 17302 }, { "epoch": 2.0518202300486186, "grad_norm": 0.9585678909029525, "learning_rate": 2.5165623706760694e-05, "loss": 0.148, "step": 17303 }, { "epoch": 2.0519388118107433, "grad_norm": 0.5379678442702583, "learning_rate": 2.5163223397542306e-05, "loss": 0.0696, "step": 17304 }, { "epoch": 2.0520573935728685, "grad_norm": 0.6434673449976782, "learning_rate": 2.5160823086819208e-05, "loss": 0.0801, "step": 17305 }, { "epoch": 2.0521759753349933, "grad_norm": 0.8145734542412243, "learning_rate": 2.5158422774613517e-05, "loss": 0.1067, "step": 17306 }, { "epoch": 2.0522945570971185, "grad_norm": 1.0703879973038493, "learning_rate": 2.5156022460947355e-05, "loss": 0.155, "step": 17307 }, { "epoch": 2.0524131388592433, "grad_norm": 0.9127719219159255, "learning_rate": 2.5153622145842857e-05, "loss": 0.1136, "step": 17308 }, { "epoch": 2.0525317206213685, "grad_norm": 0.6317510982479302, "learning_rate": 2.5151221829322157e-05, "loss": 0.0778, "step": 17309 }, { "epoch": 2.052650302383493, "grad_norm": 0.7562078681946065, "learning_rate": 2.5148821511407377e-05, "loss": 0.0926, "step": 17310 }, { "epoch": 2.0527688841456184, "grad_norm": 0.7701297182479242, "learning_rate": 2.5146421192120646e-05, "loss": 0.0974, "step": 17311 }, { "epoch": 2.052887465907743, "grad_norm": 0.8802155301391068, "learning_rate": 2.5144020871484088e-05, "loss": 0.1247, "step": 17312 }, { "epoch": 2.0530060476698684, "grad_norm": 0.6551511174288259, "learning_rate": 2.514162054951984e-05, "loss": 0.106, "step": 17313 }, { "epoch": 2.053124629431993, "grad_norm": 0.6006635753676851, "learning_rate": 2.5139220226250015e-05, "loss": 0.0859, "step": 17314 }, { "epoch": 2.0532432111941183, "grad_norm": 0.8559248001461366, "learning_rate": 2.5136819901696755e-05, "loss": 0.114, "step": 17315 }, { "epoch": 2.0533617929562435, "grad_norm": 0.9144379690630021, "learning_rate": 2.5134419575882183e-05, "loss": 0.1159, "step": 17316 }, { "epoch": 2.0534803747183683, "grad_norm": 0.4681361720810994, "learning_rate": 2.5132019248828425e-05, "loss": 0.07, "step": 17317 }, { "epoch": 2.0535989564804935, "grad_norm": 0.6647163999155792, "learning_rate": 2.5129618920557613e-05, "loss": 0.0977, "step": 17318 }, { "epoch": 2.0537175382426183, "grad_norm": 0.7878900383569605, "learning_rate": 2.512721859109187e-05, "loss": 0.0962, "step": 17319 }, { "epoch": 2.0538361200047435, "grad_norm": 0.8051633033110276, "learning_rate": 2.512481826045334e-05, "loss": 0.1328, "step": 17320 }, { "epoch": 2.0539547017668682, "grad_norm": 0.6162299887870892, "learning_rate": 2.5122417928664126e-05, "loss": 0.0628, "step": 17321 }, { "epoch": 2.0540732835289934, "grad_norm": 0.664237532245905, "learning_rate": 2.512001759574638e-05, "loss": 0.087, "step": 17322 }, { "epoch": 2.054191865291118, "grad_norm": 0.5538695362114293, "learning_rate": 2.5117617261722208e-05, "loss": 0.0685, "step": 17323 }, { "epoch": 2.0543104470532434, "grad_norm": 0.8698840115791621, "learning_rate": 2.511521692661376e-05, "loss": 0.1145, "step": 17324 }, { "epoch": 2.054429028815368, "grad_norm": 0.8154775192280642, "learning_rate": 2.5112816590443145e-05, "loss": 0.0933, "step": 17325 }, { "epoch": 2.0545476105774934, "grad_norm": 0.479542029380308, "learning_rate": 2.5110416253232504e-05, "loss": 0.0683, "step": 17326 }, { "epoch": 2.054666192339618, "grad_norm": 0.49458804676511914, "learning_rate": 2.510801591500395e-05, "loss": 0.05, "step": 17327 }, { "epoch": 2.0547847741017433, "grad_norm": 0.6743539508358545, "learning_rate": 2.5105615575779628e-05, "loss": 0.1075, "step": 17328 }, { "epoch": 2.054903355863868, "grad_norm": 0.6242414086413426, "learning_rate": 2.5103215235581663e-05, "loss": 0.0853, "step": 17329 }, { "epoch": 2.0550219376259933, "grad_norm": 0.8067809867257045, "learning_rate": 2.5100814894432173e-05, "loss": 0.1129, "step": 17330 }, { "epoch": 2.055140519388118, "grad_norm": 0.7586488835444153, "learning_rate": 2.5098414552353304e-05, "loss": 0.1026, "step": 17331 }, { "epoch": 2.0552591011502432, "grad_norm": 0.6696189680738625, "learning_rate": 2.5096014209367168e-05, "loss": 0.0817, "step": 17332 }, { "epoch": 2.055377682912368, "grad_norm": 0.7791020247650454, "learning_rate": 2.5093613865495903e-05, "loss": 0.1002, "step": 17333 }, { "epoch": 2.055496264674493, "grad_norm": 0.9544249166240728, "learning_rate": 2.5091213520761626e-05, "loss": 0.1087, "step": 17334 }, { "epoch": 2.055614846436618, "grad_norm": 0.7384603064975516, "learning_rate": 2.5088813175186476e-05, "loss": 0.0881, "step": 17335 }, { "epoch": 2.055733428198743, "grad_norm": 0.5349458299250663, "learning_rate": 2.5086412828792578e-05, "loss": 0.087, "step": 17336 }, { "epoch": 2.055852009960868, "grad_norm": 0.7139839381947889, "learning_rate": 2.5084012481602055e-05, "loss": 0.0806, "step": 17337 }, { "epoch": 2.055970591722993, "grad_norm": 0.741966354295771, "learning_rate": 2.508161213363704e-05, "loss": 0.082, "step": 17338 }, { "epoch": 2.056089173485118, "grad_norm": 1.0294325898716359, "learning_rate": 2.5079211784919664e-05, "loss": 0.1051, "step": 17339 }, { "epoch": 2.056207755247243, "grad_norm": 0.5012361050374275, "learning_rate": 2.5076811435472053e-05, "loss": 0.0673, "step": 17340 }, { "epoch": 2.056326337009368, "grad_norm": 0.4966263252115782, "learning_rate": 2.5074411085316342e-05, "loss": 0.0714, "step": 17341 }, { "epoch": 2.056444918771493, "grad_norm": 0.6145999541600821, "learning_rate": 2.507201073447464e-05, "loss": 0.0899, "step": 17342 }, { "epoch": 2.056563500533618, "grad_norm": 0.7172226531210161, "learning_rate": 2.5069610382969093e-05, "loss": 0.1026, "step": 17343 }, { "epoch": 2.056682082295743, "grad_norm": 0.9305722033819583, "learning_rate": 2.5067210030821818e-05, "loss": 0.1323, "step": 17344 }, { "epoch": 2.0568006640578678, "grad_norm": 0.7016495056442461, "learning_rate": 2.5064809678054955e-05, "loss": 0.0896, "step": 17345 }, { "epoch": 2.056919245819993, "grad_norm": 0.49300254050396214, "learning_rate": 2.5062409324690626e-05, "loss": 0.0665, "step": 17346 }, { "epoch": 2.0570378275821177, "grad_norm": 0.6162774312940101, "learning_rate": 2.5060008970750958e-05, "loss": 0.0809, "step": 17347 }, { "epoch": 2.057156409344243, "grad_norm": 0.6707944989800667, "learning_rate": 2.5057608616258076e-05, "loss": 0.085, "step": 17348 }, { "epoch": 2.0572749911063677, "grad_norm": 0.6836190295525338, "learning_rate": 2.505520826123412e-05, "loss": 0.0957, "step": 17349 }, { "epoch": 2.057393572868493, "grad_norm": 0.6130067806482371, "learning_rate": 2.5052807905701215e-05, "loss": 0.0777, "step": 17350 }, { "epoch": 2.0575121546306177, "grad_norm": 0.620184341828502, "learning_rate": 2.5050407549681475e-05, "loss": 0.0844, "step": 17351 }, { "epoch": 2.057630736392743, "grad_norm": 0.6615369907875109, "learning_rate": 2.504800719319705e-05, "loss": 0.0933, "step": 17352 }, { "epoch": 2.0577493181548676, "grad_norm": 0.9509091231322642, "learning_rate": 2.504560683627005e-05, "loss": 0.1069, "step": 17353 }, { "epoch": 2.057867899916993, "grad_norm": 0.7606201870355929, "learning_rate": 2.5043206478922614e-05, "loss": 0.0948, "step": 17354 }, { "epoch": 2.0579864816791176, "grad_norm": 0.7926225459572829, "learning_rate": 2.5040806121176868e-05, "loss": 0.0984, "step": 17355 }, { "epoch": 2.0581050634412428, "grad_norm": 0.694434994328793, "learning_rate": 2.503840576305494e-05, "loss": 0.081, "step": 17356 }, { "epoch": 2.0582236452033675, "grad_norm": 0.9263722873944029, "learning_rate": 2.5036005404578954e-05, "loss": 0.1568, "step": 17357 }, { "epoch": 2.0583422269654927, "grad_norm": 0.7824805916775245, "learning_rate": 2.5033605045771046e-05, "loss": 0.0773, "step": 17358 }, { "epoch": 2.0584608087276175, "grad_norm": 0.9976794330936313, "learning_rate": 2.5031204686653336e-05, "loss": 0.0978, "step": 17359 }, { "epoch": 2.0585793904897427, "grad_norm": 0.8639758336376184, "learning_rate": 2.5028804327247958e-05, "loss": 0.1177, "step": 17360 }, { "epoch": 2.0586979722518675, "grad_norm": 1.0204906541131034, "learning_rate": 2.5026403967577045e-05, "loss": 0.1393, "step": 17361 }, { "epoch": 2.0588165540139927, "grad_norm": 0.794047996862685, "learning_rate": 2.5024003607662715e-05, "loss": 0.097, "step": 17362 }, { "epoch": 2.0589351357761174, "grad_norm": 0.6230972933056133, "learning_rate": 2.5021603247527103e-05, "loss": 0.0773, "step": 17363 }, { "epoch": 2.0590537175382426, "grad_norm": 0.5779396167671657, "learning_rate": 2.5019202887192332e-05, "loss": 0.0679, "step": 17364 }, { "epoch": 2.059172299300368, "grad_norm": 0.7188071480834007, "learning_rate": 2.5016802526680543e-05, "loss": 0.081, "step": 17365 }, { "epoch": 2.0592908810624926, "grad_norm": 0.7175663178486109, "learning_rate": 2.501440216601385e-05, "loss": 0.0809, "step": 17366 }, { "epoch": 2.059409462824618, "grad_norm": 0.8167147709468672, "learning_rate": 2.501200180521438e-05, "loss": 0.0952, "step": 17367 }, { "epoch": 2.0595280445867425, "grad_norm": 0.5601203037887833, "learning_rate": 2.5009601444304276e-05, "loss": 0.0714, "step": 17368 }, { "epoch": 2.0596466263488677, "grad_norm": 0.710404668345531, "learning_rate": 2.500720108330566e-05, "loss": 0.0804, "step": 17369 }, { "epoch": 2.0597652081109925, "grad_norm": 1.1915573272871223, "learning_rate": 2.5004800722240652e-05, "loss": 0.1329, "step": 17370 }, { "epoch": 2.0598837898731177, "grad_norm": 0.9346327366477544, "learning_rate": 2.5002400361131396e-05, "loss": 0.1136, "step": 17371 }, { "epoch": 2.0600023716352425, "grad_norm": 0.9218079671991417, "learning_rate": 2.5e-05, "loss": 0.0922, "step": 17372 }, { "epoch": 2.0601209533973677, "grad_norm": 0.8079216936192907, "learning_rate": 2.4997599638868617e-05, "loss": 0.1009, "step": 17373 }, { "epoch": 2.0602395351594924, "grad_norm": 0.7058243040490104, "learning_rate": 2.4995199277759347e-05, "loss": 0.1105, "step": 17374 }, { "epoch": 2.0603581169216176, "grad_norm": 0.9013244553079822, "learning_rate": 2.4992798916694346e-05, "loss": 0.1285, "step": 17375 }, { "epoch": 2.0604766986837424, "grad_norm": 0.7544294568680402, "learning_rate": 2.4990398555695733e-05, "loss": 0.0955, "step": 17376 }, { "epoch": 2.0605952804458676, "grad_norm": 0.9910509765308024, "learning_rate": 2.498799819478562e-05, "loss": 0.1374, "step": 17377 }, { "epoch": 2.0607138622079924, "grad_norm": 0.9116811334241266, "learning_rate": 2.4985597833986158e-05, "loss": 0.125, "step": 17378 }, { "epoch": 2.0608324439701176, "grad_norm": 0.9496698813014962, "learning_rate": 2.498319747331946e-05, "loss": 0.1116, "step": 17379 }, { "epoch": 2.0609510257322423, "grad_norm": 0.6033501593675298, "learning_rate": 2.498079711280767e-05, "loss": 0.0757, "step": 17380 }, { "epoch": 2.0610696074943675, "grad_norm": 0.9742039958132089, "learning_rate": 2.4978396752472903e-05, "loss": 0.1261, "step": 17381 }, { "epoch": 2.0611881892564923, "grad_norm": 0.7052985673542551, "learning_rate": 2.4975996392337288e-05, "loss": 0.0849, "step": 17382 }, { "epoch": 2.0613067710186175, "grad_norm": 0.6331210219547435, "learning_rate": 2.497359603242296e-05, "loss": 0.085, "step": 17383 }, { "epoch": 2.0614253527807422, "grad_norm": 0.7613102774878522, "learning_rate": 2.4971195672752048e-05, "loss": 0.08, "step": 17384 }, { "epoch": 2.0615439345428674, "grad_norm": 0.5814165903207715, "learning_rate": 2.4968795313346673e-05, "loss": 0.0821, "step": 17385 }, { "epoch": 2.061662516304992, "grad_norm": 0.7968028624284378, "learning_rate": 2.496639495422896e-05, "loss": 0.0857, "step": 17386 }, { "epoch": 2.0617810980671174, "grad_norm": 0.7280287603778376, "learning_rate": 2.4963994595421052e-05, "loss": 0.0871, "step": 17387 }, { "epoch": 2.061899679829242, "grad_norm": 0.7575432961342443, "learning_rate": 2.496159423694507e-05, "loss": 0.107, "step": 17388 }, { "epoch": 2.0620182615913674, "grad_norm": 0.7819633502139661, "learning_rate": 2.495919387882313e-05, "loss": 0.094, "step": 17389 }, { "epoch": 2.062136843353492, "grad_norm": 0.7573236215913882, "learning_rate": 2.495679352107739e-05, "loss": 0.1011, "step": 17390 }, { "epoch": 2.0622554251156173, "grad_norm": 0.6168926173444154, "learning_rate": 2.4954393163729955e-05, "loss": 0.0814, "step": 17391 }, { "epoch": 2.062374006877742, "grad_norm": 0.8145914435734596, "learning_rate": 2.495199280680296e-05, "loss": 0.1167, "step": 17392 }, { "epoch": 2.0624925886398673, "grad_norm": 0.5177892389260967, "learning_rate": 2.4949592450318525e-05, "loss": 0.0672, "step": 17393 }, { "epoch": 2.062611170401992, "grad_norm": 1.0768878183172357, "learning_rate": 2.494719209429879e-05, "loss": 0.1814, "step": 17394 }, { "epoch": 2.0627297521641172, "grad_norm": 0.832796037165638, "learning_rate": 2.4944791738765885e-05, "loss": 0.1148, "step": 17395 }, { "epoch": 2.062848333926242, "grad_norm": 0.8008599774313413, "learning_rate": 2.4942391383741926e-05, "loss": 0.1067, "step": 17396 }, { "epoch": 2.062966915688367, "grad_norm": 0.7117029905504332, "learning_rate": 2.4939991029249048e-05, "loss": 0.1067, "step": 17397 }, { "epoch": 2.063085497450492, "grad_norm": 0.9848679158363012, "learning_rate": 2.4937590675309376e-05, "loss": 0.1291, "step": 17398 }, { "epoch": 2.063204079212617, "grad_norm": 0.9473192993649446, "learning_rate": 2.493519032194505e-05, "loss": 0.125, "step": 17399 }, { "epoch": 2.063322660974742, "grad_norm": 0.9863792740506593, "learning_rate": 2.4932789969178188e-05, "loss": 0.1251, "step": 17400 }, { "epoch": 2.063441242736867, "grad_norm": 0.9422638928282245, "learning_rate": 2.4930389617030913e-05, "loss": 0.1061, "step": 17401 }, { "epoch": 2.063559824498992, "grad_norm": 0.6489765338447396, "learning_rate": 2.4927989265525367e-05, "loss": 0.1, "step": 17402 }, { "epoch": 2.063678406261117, "grad_norm": 0.7555565256337547, "learning_rate": 2.4925588914683674e-05, "loss": 0.0924, "step": 17403 }, { "epoch": 2.063796988023242, "grad_norm": 0.67499499680026, "learning_rate": 2.4923188564527946e-05, "loss": 0.0775, "step": 17404 }, { "epoch": 2.063915569785367, "grad_norm": 0.9226940720124173, "learning_rate": 2.4920788215080338e-05, "loss": 0.1205, "step": 17405 }, { "epoch": 2.064034151547492, "grad_norm": 0.815871920096329, "learning_rate": 2.4918387866362967e-05, "loss": 0.1041, "step": 17406 }, { "epoch": 2.064152733309617, "grad_norm": 1.0368687441616573, "learning_rate": 2.491598751839795e-05, "loss": 0.1386, "step": 17407 }, { "epoch": 2.0642713150717418, "grad_norm": 0.5477907425189817, "learning_rate": 2.491358717120743e-05, "loss": 0.064, "step": 17408 }, { "epoch": 2.064389896833867, "grad_norm": 0.8273362174699883, "learning_rate": 2.491118682481353e-05, "loss": 0.0981, "step": 17409 }, { "epoch": 2.0645084785959917, "grad_norm": 1.1202345412987194, "learning_rate": 2.490878647923838e-05, "loss": 0.1095, "step": 17410 }, { "epoch": 2.064627060358117, "grad_norm": 1.0357873266219841, "learning_rate": 2.490638613450411e-05, "loss": 0.1479, "step": 17411 }, { "epoch": 2.0647456421202417, "grad_norm": 1.0007571879469142, "learning_rate": 2.4903985790632835e-05, "loss": 0.1194, "step": 17412 }, { "epoch": 2.064864223882367, "grad_norm": 0.8617522199577885, "learning_rate": 2.49015854476467e-05, "loss": 0.108, "step": 17413 }, { "epoch": 2.064982805644492, "grad_norm": 0.5471503747883969, "learning_rate": 2.489918510556783e-05, "loss": 0.0795, "step": 17414 }, { "epoch": 2.065101387406617, "grad_norm": 0.5893531229275928, "learning_rate": 2.489678476441834e-05, "loss": 0.0681, "step": 17415 }, { "epoch": 2.0652199691687416, "grad_norm": 0.8561924053142149, "learning_rate": 2.4894384424220375e-05, "loss": 0.1375, "step": 17416 }, { "epoch": 2.065338550930867, "grad_norm": 0.6637277093717949, "learning_rate": 2.489198408499605e-05, "loss": 0.0928, "step": 17417 }, { "epoch": 2.065457132692992, "grad_norm": 0.825625850667534, "learning_rate": 2.488958374676751e-05, "loss": 0.0984, "step": 17418 }, { "epoch": 2.065575714455117, "grad_norm": 0.7856345974055992, "learning_rate": 2.4887183409556857e-05, "loss": 0.1423, "step": 17419 }, { "epoch": 2.065694296217242, "grad_norm": 0.8775316451716293, "learning_rate": 2.4884783073386246e-05, "loss": 0.1143, "step": 17420 }, { "epoch": 2.0658128779793667, "grad_norm": 1.2032394781552735, "learning_rate": 2.4882382738277794e-05, "loss": 0.1487, "step": 17421 }, { "epoch": 2.065931459741492, "grad_norm": 0.8158620796001086, "learning_rate": 2.4879982404253633e-05, "loss": 0.105, "step": 17422 }, { "epoch": 2.0660500415036167, "grad_norm": 0.7724698444628536, "learning_rate": 2.4877582071335873e-05, "loss": 0.1023, "step": 17423 }, { "epoch": 2.066168623265742, "grad_norm": 0.6189519374648508, "learning_rate": 2.4875181739546665e-05, "loss": 0.1072, "step": 17424 }, { "epoch": 2.0662872050278667, "grad_norm": 0.8364203404272675, "learning_rate": 2.487278140890813e-05, "loss": 0.1049, "step": 17425 }, { "epoch": 2.066405786789992, "grad_norm": 1.0718750502652417, "learning_rate": 2.487038107944239e-05, "loss": 0.1435, "step": 17426 }, { "epoch": 2.0665243685521166, "grad_norm": 0.591789614784563, "learning_rate": 2.4867980751171577e-05, "loss": 0.0706, "step": 17427 }, { "epoch": 2.066642950314242, "grad_norm": 0.8023197446764379, "learning_rate": 2.486558042411782e-05, "loss": 0.1025, "step": 17428 }, { "epoch": 2.0667615320763666, "grad_norm": 0.6470044403810371, "learning_rate": 2.486318009830325e-05, "loss": 0.1073, "step": 17429 }, { "epoch": 2.066880113838492, "grad_norm": 0.46605170592685263, "learning_rate": 2.4860779773749984e-05, "loss": 0.0694, "step": 17430 }, { "epoch": 2.0669986956006166, "grad_norm": 0.7506070100844899, "learning_rate": 2.4858379450480167e-05, "loss": 0.0769, "step": 17431 }, { "epoch": 2.0671172773627418, "grad_norm": 0.718483224984212, "learning_rate": 2.4855979128515914e-05, "loss": 0.0819, "step": 17432 }, { "epoch": 2.0672358591248665, "grad_norm": 0.5167825363655733, "learning_rate": 2.4853578807879363e-05, "loss": 0.0873, "step": 17433 }, { "epoch": 2.0673544408869917, "grad_norm": 0.693918161626258, "learning_rate": 2.4851178488592622e-05, "loss": 0.0942, "step": 17434 }, { "epoch": 2.0674730226491165, "grad_norm": 0.5991142259960728, "learning_rate": 2.4848778170677845e-05, "loss": 0.0946, "step": 17435 }, { "epoch": 2.0675916044112417, "grad_norm": 0.6589735329084676, "learning_rate": 2.4846377854157145e-05, "loss": 0.0955, "step": 17436 }, { "epoch": 2.0677101861733664, "grad_norm": 0.7673235348964575, "learning_rate": 2.484397753905265e-05, "loss": 0.1143, "step": 17437 }, { "epoch": 2.0678287679354916, "grad_norm": 0.5566386486894151, "learning_rate": 2.4841577225386492e-05, "loss": 0.0742, "step": 17438 }, { "epoch": 2.0679473496976164, "grad_norm": 1.006724832717608, "learning_rate": 2.4839176913180795e-05, "loss": 0.1075, "step": 17439 }, { "epoch": 2.0680659314597416, "grad_norm": 0.9521801682840229, "learning_rate": 2.4836776602457696e-05, "loss": 0.1407, "step": 17440 }, { "epoch": 2.0681845132218664, "grad_norm": 0.5503396239170611, "learning_rate": 2.4834376293239318e-05, "loss": 0.0752, "step": 17441 }, { "epoch": 2.0683030949839916, "grad_norm": 0.7356941114633565, "learning_rate": 2.4831975985547775e-05, "loss": 0.0992, "step": 17442 }, { "epoch": 2.0684216767461163, "grad_norm": 0.9029228739961515, "learning_rate": 2.4829575679405217e-05, "loss": 0.135, "step": 17443 }, { "epoch": 2.0685402585082415, "grad_norm": 0.6779259424724365, "learning_rate": 2.4827175374833763e-05, "loss": 0.0944, "step": 17444 }, { "epoch": 2.0686588402703663, "grad_norm": 0.7128935352287706, "learning_rate": 2.482477507185553e-05, "loss": 0.105, "step": 17445 }, { "epoch": 2.0687774220324915, "grad_norm": 0.7644405180561832, "learning_rate": 2.482237477049267e-05, "loss": 0.1005, "step": 17446 }, { "epoch": 2.0688960037946162, "grad_norm": 1.0111958693075733, "learning_rate": 2.4819974470767283e-05, "loss": 0.1292, "step": 17447 }, { "epoch": 2.0690145855567414, "grad_norm": 1.100901571546219, "learning_rate": 2.4817574172701524e-05, "loss": 0.1695, "step": 17448 }, { "epoch": 2.069133167318866, "grad_norm": 0.6627063151389971, "learning_rate": 2.4815173876317494e-05, "loss": 0.109, "step": 17449 }, { "epoch": 2.0692517490809914, "grad_norm": 0.6846387311082963, "learning_rate": 2.4812773581637345e-05, "loss": 0.0781, "step": 17450 }, { "epoch": 2.069370330843116, "grad_norm": 0.7456614756363709, "learning_rate": 2.4810373288683192e-05, "loss": 0.0884, "step": 17451 }, { "epoch": 2.0694889126052414, "grad_norm": 0.7755630180188738, "learning_rate": 2.480797299747717e-05, "loss": 0.1154, "step": 17452 }, { "epoch": 2.069607494367366, "grad_norm": 0.6800636511163054, "learning_rate": 2.4805572708041385e-05, "loss": 0.1016, "step": 17453 }, { "epoch": 2.0697260761294913, "grad_norm": 0.7674928930303733, "learning_rate": 2.4803172420397993e-05, "loss": 0.1169, "step": 17454 }, { "epoch": 2.069844657891616, "grad_norm": 0.7542241209138679, "learning_rate": 2.4800772134569112e-05, "loss": 0.0821, "step": 17455 }, { "epoch": 2.0699632396537413, "grad_norm": 0.7062613411967146, "learning_rate": 2.4798371850576862e-05, "loss": 0.0909, "step": 17456 }, { "epoch": 2.070081821415866, "grad_norm": 0.8002659449382707, "learning_rate": 2.4795971568443376e-05, "loss": 0.1174, "step": 17457 }, { "epoch": 2.0702004031779913, "grad_norm": 0.731067998027003, "learning_rate": 2.479357128819078e-05, "loss": 0.087, "step": 17458 }, { "epoch": 2.070318984940116, "grad_norm": 0.672228602592148, "learning_rate": 2.479117100984121e-05, "loss": 0.0971, "step": 17459 }, { "epoch": 2.070437566702241, "grad_norm": 1.1704731102944372, "learning_rate": 2.4788770733416776e-05, "loss": 0.1502, "step": 17460 }, { "epoch": 2.070556148464366, "grad_norm": 0.6870449740869414, "learning_rate": 2.478637045893963e-05, "loss": 0.0958, "step": 17461 }, { "epoch": 2.070674730226491, "grad_norm": 0.8634317731259946, "learning_rate": 2.4783970186431884e-05, "loss": 0.132, "step": 17462 }, { "epoch": 2.070793311988616, "grad_norm": 0.6619572377933647, "learning_rate": 2.478156991591567e-05, "loss": 0.1125, "step": 17463 }, { "epoch": 2.070911893750741, "grad_norm": 0.6098575742332693, "learning_rate": 2.47791696474131e-05, "loss": 0.0903, "step": 17464 }, { "epoch": 2.071030475512866, "grad_norm": 0.5977225952807884, "learning_rate": 2.477676938094633e-05, "loss": 0.0861, "step": 17465 }, { "epoch": 2.071149057274991, "grad_norm": 0.9308505459956217, "learning_rate": 2.4774369116537462e-05, "loss": 0.1169, "step": 17466 }, { "epoch": 2.0712676390371163, "grad_norm": 0.6537130849144013, "learning_rate": 2.4771968854208643e-05, "loss": 0.0859, "step": 17467 }, { "epoch": 2.071386220799241, "grad_norm": 0.7001951201599079, "learning_rate": 2.476956859398198e-05, "loss": 0.0782, "step": 17468 }, { "epoch": 2.0715048025613663, "grad_norm": 0.8151969136124458, "learning_rate": 2.4767168335879624e-05, "loss": 0.1302, "step": 17469 }, { "epoch": 2.071623384323491, "grad_norm": 0.5906058042568211, "learning_rate": 2.476476807992369e-05, "loss": 0.0727, "step": 17470 }, { "epoch": 2.0717419660856162, "grad_norm": 0.5502265845891462, "learning_rate": 2.4762367826136304e-05, "loss": 0.0765, "step": 17471 }, { "epoch": 2.071860547847741, "grad_norm": 0.6540609997372239, "learning_rate": 2.475996757453958e-05, "loss": 0.0833, "step": 17472 }, { "epoch": 2.071979129609866, "grad_norm": 0.6107237720493016, "learning_rate": 2.4757567325155682e-05, "loss": 0.0858, "step": 17473 }, { "epoch": 2.072097711371991, "grad_norm": 0.7826467514719663, "learning_rate": 2.475516707800671e-05, "loss": 0.115, "step": 17474 }, { "epoch": 2.072216293134116, "grad_norm": 0.834847133865399, "learning_rate": 2.4752766833114788e-05, "loss": 0.0989, "step": 17475 }, { "epoch": 2.072334874896241, "grad_norm": 0.9637651027812393, "learning_rate": 2.475036659050206e-05, "loss": 0.1282, "step": 17476 }, { "epoch": 2.072453456658366, "grad_norm": 0.824813591040381, "learning_rate": 2.4747966350190642e-05, "loss": 0.1007, "step": 17477 }, { "epoch": 2.072572038420491, "grad_norm": 0.8512803186351394, "learning_rate": 2.4745566112202673e-05, "loss": 0.1124, "step": 17478 }, { "epoch": 2.072690620182616, "grad_norm": 0.835513229090426, "learning_rate": 2.4743165876560263e-05, "loss": 0.1357, "step": 17479 }, { "epoch": 2.072809201944741, "grad_norm": 0.7230408011134642, "learning_rate": 2.4740765643285555e-05, "loss": 0.0845, "step": 17480 }, { "epoch": 2.072927783706866, "grad_norm": 0.5292730228554473, "learning_rate": 2.473836541240067e-05, "loss": 0.068, "step": 17481 }, { "epoch": 2.073046365468991, "grad_norm": 0.851011873802425, "learning_rate": 2.473596518392774e-05, "loss": 0.1234, "step": 17482 }, { "epoch": 2.073164947231116, "grad_norm": 0.6998948592881266, "learning_rate": 2.473356495788887e-05, "loss": 0.1116, "step": 17483 }, { "epoch": 2.0732835289932408, "grad_norm": 0.7823131013689462, "learning_rate": 2.473116473430622e-05, "loss": 0.0957, "step": 17484 }, { "epoch": 2.073402110755366, "grad_norm": 0.7262117156711655, "learning_rate": 2.47287645132019e-05, "loss": 0.1059, "step": 17485 }, { "epoch": 2.0735206925174907, "grad_norm": 0.7446288195084892, "learning_rate": 2.4726364294598034e-05, "loss": 0.0771, "step": 17486 }, { "epoch": 2.073639274279616, "grad_norm": 0.8686893097541203, "learning_rate": 2.472396407851675e-05, "loss": 0.1046, "step": 17487 }, { "epoch": 2.0737578560417407, "grad_norm": 0.9251466581273083, "learning_rate": 2.4721563864980182e-05, "loss": 0.0998, "step": 17488 }, { "epoch": 2.073876437803866, "grad_norm": 0.6214377853897842, "learning_rate": 2.471916365401046e-05, "loss": 0.0902, "step": 17489 }, { "epoch": 2.0739950195659906, "grad_norm": 0.6254542291000516, "learning_rate": 2.4716763445629694e-05, "loss": 0.0855, "step": 17490 }, { "epoch": 2.074113601328116, "grad_norm": 0.7523832362464642, "learning_rate": 2.471436323986003e-05, "loss": 0.0972, "step": 17491 }, { "epoch": 2.0742321830902406, "grad_norm": 0.8089277586336766, "learning_rate": 2.4711963036723586e-05, "loss": 0.1077, "step": 17492 }, { "epoch": 2.074350764852366, "grad_norm": 0.9745265771555571, "learning_rate": 2.470956283624249e-05, "loss": 0.1404, "step": 17493 }, { "epoch": 2.0744693466144906, "grad_norm": 0.523514167998119, "learning_rate": 2.4707162638438862e-05, "loss": 0.0707, "step": 17494 }, { "epoch": 2.0745879283766158, "grad_norm": 0.6697298216260636, "learning_rate": 2.4704762443334843e-05, "loss": 0.0992, "step": 17495 }, { "epoch": 2.0747065101387405, "grad_norm": 1.0885762035123567, "learning_rate": 2.4702362250952547e-05, "loss": 0.1761, "step": 17496 }, { "epoch": 2.0748250919008657, "grad_norm": 0.5550552897508173, "learning_rate": 2.469996206131411e-05, "loss": 0.0735, "step": 17497 }, { "epoch": 2.0749436736629905, "grad_norm": 0.763012749970266, "learning_rate": 2.4697561874441645e-05, "loss": 0.109, "step": 17498 }, { "epoch": 2.0750622554251157, "grad_norm": 0.7762135285534536, "learning_rate": 2.46951616903573e-05, "loss": 0.0876, "step": 17499 }, { "epoch": 2.0751808371872404, "grad_norm": 1.0242974179155813, "learning_rate": 2.469276150908319e-05, "loss": 0.1279, "step": 17500 }, { "epoch": 2.0752994189493656, "grad_norm": 0.6581940153226955, "learning_rate": 2.4690361330641432e-05, "loss": 0.0738, "step": 17501 }, { "epoch": 2.0754180007114904, "grad_norm": 0.8103622334349971, "learning_rate": 2.4687961155054173e-05, "loss": 0.1313, "step": 17502 }, { "epoch": 2.0755365824736156, "grad_norm": 0.5619542655968248, "learning_rate": 2.4685560982343527e-05, "loss": 0.075, "step": 17503 }, { "epoch": 2.0756551642357404, "grad_norm": 0.6337911668808532, "learning_rate": 2.4683160812531627e-05, "loss": 0.0765, "step": 17504 }, { "epoch": 2.0757737459978656, "grad_norm": 0.9845624363669623, "learning_rate": 2.4680760645640584e-05, "loss": 0.125, "step": 17505 }, { "epoch": 2.0758923277599903, "grad_norm": 0.9167232944212368, "learning_rate": 2.4678360481692548e-05, "loss": 0.1088, "step": 17506 }, { "epoch": 2.0760109095221155, "grad_norm": 0.7797191591109154, "learning_rate": 2.4675960320709628e-05, "loss": 0.1256, "step": 17507 }, { "epoch": 2.0761294912842403, "grad_norm": 0.5345357508808961, "learning_rate": 2.467356016271396e-05, "loss": 0.0727, "step": 17508 }, { "epoch": 2.0762480730463655, "grad_norm": 0.5948296596698751, "learning_rate": 2.467116000772766e-05, "loss": 0.0777, "step": 17509 }, { "epoch": 2.0763666548084903, "grad_norm": 0.6811774285130657, "learning_rate": 2.4668759855772868e-05, "loss": 0.0913, "step": 17510 }, { "epoch": 2.0764852365706155, "grad_norm": 0.6720119257941368, "learning_rate": 2.4666359706871704e-05, "loss": 0.0971, "step": 17511 }, { "epoch": 2.07660381833274, "grad_norm": 0.5958859309678184, "learning_rate": 2.46639595610463e-05, "loss": 0.0774, "step": 17512 }, { "epoch": 2.0767224000948654, "grad_norm": 0.716237341038888, "learning_rate": 2.4661559418318757e-05, "loss": 0.0852, "step": 17513 }, { "epoch": 2.07684098185699, "grad_norm": 1.0105586561385984, "learning_rate": 2.465915927871124e-05, "loss": 0.1247, "step": 17514 }, { "epoch": 2.0769595636191154, "grad_norm": 0.8492776749552114, "learning_rate": 2.4656759142245846e-05, "loss": 0.1133, "step": 17515 }, { "epoch": 2.0770781453812406, "grad_norm": 0.8272961600384933, "learning_rate": 2.4654359008944716e-05, "loss": 0.1258, "step": 17516 }, { "epoch": 2.0771967271433653, "grad_norm": 0.49471177109418485, "learning_rate": 2.465195887882997e-05, "loss": 0.0817, "step": 17517 }, { "epoch": 2.0773153089054905, "grad_norm": 1.1683998290003208, "learning_rate": 2.464955875192374e-05, "loss": 0.1547, "step": 17518 }, { "epoch": 2.0774338906676153, "grad_norm": 0.8682636509523496, "learning_rate": 2.464715862824815e-05, "loss": 0.1078, "step": 17519 }, { "epoch": 2.0775524724297405, "grad_norm": 0.7976375414808635, "learning_rate": 2.4644758507825315e-05, "loss": 0.1153, "step": 17520 }, { "epoch": 2.0776710541918653, "grad_norm": 0.6301286004191206, "learning_rate": 2.464235839067738e-05, "loss": 0.1054, "step": 17521 }, { "epoch": 2.0777896359539905, "grad_norm": 0.6273064868142915, "learning_rate": 2.4639958276826467e-05, "loss": 0.0907, "step": 17522 }, { "epoch": 2.0779082177161152, "grad_norm": 0.9805440735070937, "learning_rate": 2.463755816629469e-05, "loss": 0.1059, "step": 17523 }, { "epoch": 2.0780267994782404, "grad_norm": 1.1339877135164396, "learning_rate": 2.4635158059104176e-05, "loss": 0.1565, "step": 17524 }, { "epoch": 2.078145381240365, "grad_norm": 0.8687485916670098, "learning_rate": 2.4632757955277068e-05, "loss": 0.1053, "step": 17525 }, { "epoch": 2.0782639630024904, "grad_norm": 0.748006965290861, "learning_rate": 2.4630357854835472e-05, "loss": 0.1049, "step": 17526 }, { "epoch": 2.078382544764615, "grad_norm": 0.9377184127845389, "learning_rate": 2.4627957757801534e-05, "loss": 0.1417, "step": 17527 }, { "epoch": 2.0785011265267404, "grad_norm": 0.607561803405347, "learning_rate": 2.4625557664197354e-05, "loss": 0.0806, "step": 17528 }, { "epoch": 2.078619708288865, "grad_norm": 0.8719246470156837, "learning_rate": 2.4623157574045085e-05, "loss": 0.1199, "step": 17529 }, { "epoch": 2.0787382900509903, "grad_norm": 0.7114110731677584, "learning_rate": 2.4620757487366843e-05, "loss": 0.0866, "step": 17530 }, { "epoch": 2.078856871813115, "grad_norm": 0.7098816482982149, "learning_rate": 2.4618357404184743e-05, "loss": 0.1083, "step": 17531 }, { "epoch": 2.0789754535752403, "grad_norm": 0.5962859518727143, "learning_rate": 2.4615957324520926e-05, "loss": 0.0672, "step": 17532 }, { "epoch": 2.079094035337365, "grad_norm": 0.5557378634183437, "learning_rate": 2.461355724839751e-05, "loss": 0.0752, "step": 17533 }, { "epoch": 2.0792126170994902, "grad_norm": 0.7211975040859164, "learning_rate": 2.4611157175836628e-05, "loss": 0.0955, "step": 17534 }, { "epoch": 2.079331198861615, "grad_norm": 0.6506114814105282, "learning_rate": 2.4608757106860385e-05, "loss": 0.0882, "step": 17535 }, { "epoch": 2.07944978062374, "grad_norm": 0.7084767112120013, "learning_rate": 2.4606357041490934e-05, "loss": 0.0768, "step": 17536 }, { "epoch": 2.079568362385865, "grad_norm": 0.8177746122898744, "learning_rate": 2.4603956979750382e-05, "loss": 0.1166, "step": 17537 }, { "epoch": 2.07968694414799, "grad_norm": 0.6994291750083701, "learning_rate": 2.460155692166087e-05, "loss": 0.1, "step": 17538 }, { "epoch": 2.079805525910115, "grad_norm": 0.901683699258696, "learning_rate": 2.4599156867244498e-05, "loss": 0.1528, "step": 17539 }, { "epoch": 2.07992410767224, "grad_norm": 0.8341325783778095, "learning_rate": 2.4596756816523425e-05, "loss": 0.124, "step": 17540 }, { "epoch": 2.080042689434365, "grad_norm": 0.9309733265333537, "learning_rate": 2.4594356769519755e-05, "loss": 0.1228, "step": 17541 }, { "epoch": 2.08016127119649, "grad_norm": 0.8198339604041968, "learning_rate": 2.459195672625562e-05, "loss": 0.0907, "step": 17542 }, { "epoch": 2.080279852958615, "grad_norm": 0.8877427765828633, "learning_rate": 2.4589556686753133e-05, "loss": 0.1109, "step": 17543 }, { "epoch": 2.08039843472074, "grad_norm": 0.8152105320858147, "learning_rate": 2.458715665103444e-05, "loss": 0.076, "step": 17544 }, { "epoch": 2.080517016482865, "grad_norm": 0.728634593349909, "learning_rate": 2.4584756619121655e-05, "loss": 0.1062, "step": 17545 }, { "epoch": 2.08063559824499, "grad_norm": 1.0929498662265413, "learning_rate": 2.4582356591036897e-05, "loss": 0.1311, "step": 17546 }, { "epoch": 2.0807541800071148, "grad_norm": 0.5988253003652865, "learning_rate": 2.4579956566802307e-05, "loss": 0.092, "step": 17547 }, { "epoch": 2.08087276176924, "grad_norm": 0.6624654254923319, "learning_rate": 2.4577556546440002e-05, "loss": 0.092, "step": 17548 }, { "epoch": 2.0809913435313647, "grad_norm": 0.8083842956592264, "learning_rate": 2.457515652997211e-05, "loss": 0.1041, "step": 17549 }, { "epoch": 2.08110992529349, "grad_norm": 0.7751165072453795, "learning_rate": 2.4572756517420744e-05, "loss": 0.104, "step": 17550 }, { "epoch": 2.0812285070556147, "grad_norm": 0.7158893203522532, "learning_rate": 2.4570356508808047e-05, "loss": 0.1028, "step": 17551 }, { "epoch": 2.08134708881774, "grad_norm": 0.8784870868788169, "learning_rate": 2.456795650415614e-05, "loss": 0.1286, "step": 17552 }, { "epoch": 2.0814656705798646, "grad_norm": 0.8973327844649757, "learning_rate": 2.456555650348714e-05, "loss": 0.1206, "step": 17553 }, { "epoch": 2.08158425234199, "grad_norm": 0.9457314639356276, "learning_rate": 2.456315650682317e-05, "loss": 0.1191, "step": 17554 }, { "epoch": 2.0817028341041146, "grad_norm": 0.6646022752950235, "learning_rate": 2.4560756514186367e-05, "loss": 0.1201, "step": 17555 }, { "epoch": 2.08182141586624, "grad_norm": 1.0104121969263449, "learning_rate": 2.455835652559885e-05, "loss": 0.1462, "step": 17556 }, { "epoch": 2.0819399976283646, "grad_norm": 0.7189441623391012, "learning_rate": 2.4555956541082746e-05, "loss": 0.0987, "step": 17557 }, { "epoch": 2.0820585793904898, "grad_norm": 0.706220697546971, "learning_rate": 2.455355656066017e-05, "loss": 0.089, "step": 17558 }, { "epoch": 2.0821771611526145, "grad_norm": 0.776011133866015, "learning_rate": 2.4551156584353264e-05, "loss": 0.1209, "step": 17559 }, { "epoch": 2.0822957429147397, "grad_norm": 0.682417833366189, "learning_rate": 2.4548756612184142e-05, "loss": 0.1045, "step": 17560 }, { "epoch": 2.0824143246768645, "grad_norm": 0.7746598914605438, "learning_rate": 2.454635664417492e-05, "loss": 0.1372, "step": 17561 }, { "epoch": 2.0825329064389897, "grad_norm": 0.9844360354269304, "learning_rate": 2.454395668034775e-05, "loss": 0.1273, "step": 17562 }, { "epoch": 2.0826514882011145, "grad_norm": 0.639976117616033, "learning_rate": 2.4541556720724734e-05, "loss": 0.0872, "step": 17563 }, { "epoch": 2.0827700699632397, "grad_norm": 0.8419100723886338, "learning_rate": 2.4539156765328007e-05, "loss": 0.1303, "step": 17564 }, { "epoch": 2.082888651725365, "grad_norm": 0.7230804309098228, "learning_rate": 2.4536756814179675e-05, "loss": 0.1101, "step": 17565 }, { "epoch": 2.0830072334874896, "grad_norm": 1.0140153116664934, "learning_rate": 2.453435686730189e-05, "loss": 0.1764, "step": 17566 }, { "epoch": 2.083125815249615, "grad_norm": 0.7102831110964155, "learning_rate": 2.453195692471676e-05, "loss": 0.1067, "step": 17567 }, { "epoch": 2.0832443970117396, "grad_norm": 0.7996194201078745, "learning_rate": 2.4529556986446418e-05, "loss": 0.1293, "step": 17568 }, { "epoch": 2.083362978773865, "grad_norm": 0.8108775112488361, "learning_rate": 2.452715705251297e-05, "loss": 0.1309, "step": 17569 }, { "epoch": 2.0834815605359895, "grad_norm": 0.44894440894597726, "learning_rate": 2.4524757122938567e-05, "loss": 0.0538, "step": 17570 }, { "epoch": 2.0836001422981147, "grad_norm": 1.0092731107540156, "learning_rate": 2.452235719774532e-05, "loss": 0.1328, "step": 17571 }, { "epoch": 2.0837187240602395, "grad_norm": 0.76235051126539, "learning_rate": 2.4519957276955354e-05, "loss": 0.0952, "step": 17572 }, { "epoch": 2.0838373058223647, "grad_norm": 0.4905061957884288, "learning_rate": 2.4517557360590782e-05, "loss": 0.0672, "step": 17573 }, { "epoch": 2.0839558875844895, "grad_norm": 0.7663141212353435, "learning_rate": 2.451515744867375e-05, "loss": 0.1084, "step": 17574 }, { "epoch": 2.0840744693466147, "grad_norm": 0.9391908667103068, "learning_rate": 2.4512757541226368e-05, "loss": 0.1126, "step": 17575 }, { "epoch": 2.0841930511087394, "grad_norm": 1.0305139257393674, "learning_rate": 2.4510357638270765e-05, "loss": 0.1433, "step": 17576 }, { "epoch": 2.0843116328708646, "grad_norm": 0.6285902237507764, "learning_rate": 2.450795773982906e-05, "loss": 0.0741, "step": 17577 }, { "epoch": 2.0844302146329894, "grad_norm": 0.6943018202493522, "learning_rate": 2.4505557845923392e-05, "loss": 0.0779, "step": 17578 }, { "epoch": 2.0845487963951146, "grad_norm": 0.8117865140706843, "learning_rate": 2.450315795657587e-05, "loss": 0.1091, "step": 17579 }, { "epoch": 2.0846673781572393, "grad_norm": 0.9721600932731582, "learning_rate": 2.4500758071808613e-05, "loss": 0.1134, "step": 17580 }, { "epoch": 2.0847859599193646, "grad_norm": 0.5661264548447491, "learning_rate": 2.4498358191643763e-05, "loss": 0.0867, "step": 17581 }, { "epoch": 2.0849045416814893, "grad_norm": 0.8157213331216359, "learning_rate": 2.449595831610344e-05, "loss": 0.1255, "step": 17582 }, { "epoch": 2.0850231234436145, "grad_norm": 0.9796931462223702, "learning_rate": 2.449355844520976e-05, "loss": 0.152, "step": 17583 }, { "epoch": 2.0851417052057393, "grad_norm": 0.6666504916994459, "learning_rate": 2.449115857898484e-05, "loss": 0.1118, "step": 17584 }, { "epoch": 2.0852602869678645, "grad_norm": 0.5124568468240628, "learning_rate": 2.4488758717450825e-05, "loss": 0.0601, "step": 17585 }, { "epoch": 2.0853788687299892, "grad_norm": 0.6822491178229534, "learning_rate": 2.448635886062982e-05, "loss": 0.1087, "step": 17586 }, { "epoch": 2.0854974504921144, "grad_norm": 0.6910763818323861, "learning_rate": 2.448395900854396e-05, "loss": 0.1028, "step": 17587 }, { "epoch": 2.085616032254239, "grad_norm": 0.73425255902695, "learning_rate": 2.4481559161215365e-05, "loss": 0.0971, "step": 17588 }, { "epoch": 2.0857346140163644, "grad_norm": 0.9889233631623555, "learning_rate": 2.447915931866616e-05, "loss": 0.0901, "step": 17589 }, { "epoch": 2.085853195778489, "grad_norm": 0.8577216428977207, "learning_rate": 2.447675948091847e-05, "loss": 0.0895, "step": 17590 }, { "epoch": 2.0859717775406144, "grad_norm": 0.9107646601297731, "learning_rate": 2.4474359647994406e-05, "loss": 0.1075, "step": 17591 }, { "epoch": 2.086090359302739, "grad_norm": 0.8538064049803596, "learning_rate": 2.4471959819916112e-05, "loss": 0.1172, "step": 17592 }, { "epoch": 2.0862089410648643, "grad_norm": 0.6550809201450599, "learning_rate": 2.44695599967057e-05, "loss": 0.0876, "step": 17593 }, { "epoch": 2.086327522826989, "grad_norm": 0.8152713960218338, "learning_rate": 2.446716017838529e-05, "loss": 0.1126, "step": 17594 }, { "epoch": 2.0864461045891143, "grad_norm": 0.9861425749236238, "learning_rate": 2.446476036497701e-05, "loss": 0.1404, "step": 17595 }, { "epoch": 2.086564686351239, "grad_norm": 1.1338932156629615, "learning_rate": 2.446236055650298e-05, "loss": 0.1517, "step": 17596 }, { "epoch": 2.0866832681133642, "grad_norm": 0.9200931438714708, "learning_rate": 2.4459960752985338e-05, "loss": 0.1108, "step": 17597 }, { "epoch": 2.086801849875489, "grad_norm": 0.6969253686826722, "learning_rate": 2.445756095444619e-05, "loss": 0.0888, "step": 17598 }, { "epoch": 2.086920431637614, "grad_norm": 1.0325795429643554, "learning_rate": 2.4455161160907657e-05, "loss": 0.1233, "step": 17599 }, { "epoch": 2.087039013399739, "grad_norm": 0.6377420179786789, "learning_rate": 2.4452761372391876e-05, "loss": 0.1006, "step": 17600 }, { "epoch": 2.087157595161864, "grad_norm": 0.7711336253125705, "learning_rate": 2.445036158892097e-05, "loss": 0.0961, "step": 17601 }, { "epoch": 2.087276176923989, "grad_norm": 1.0033727868915332, "learning_rate": 2.4447961810517042e-05, "loss": 0.1009, "step": 17602 }, { "epoch": 2.087394758686114, "grad_norm": 0.9466353534957586, "learning_rate": 2.444556203720224e-05, "loss": 0.1295, "step": 17603 }, { "epoch": 2.087513340448239, "grad_norm": 0.6396654816264715, "learning_rate": 2.4443162268998676e-05, "loss": 0.0959, "step": 17604 }, { "epoch": 2.087631922210364, "grad_norm": 0.7717400642353882, "learning_rate": 2.444076250592847e-05, "loss": 0.0956, "step": 17605 }, { "epoch": 2.087750503972489, "grad_norm": 0.5085232405399873, "learning_rate": 2.4438362748013748e-05, "loss": 0.0859, "step": 17606 }, { "epoch": 2.087869085734614, "grad_norm": 0.787164704662243, "learning_rate": 2.443596299527663e-05, "loss": 0.1239, "step": 17607 }, { "epoch": 2.087987667496739, "grad_norm": 0.7226753304852985, "learning_rate": 2.4433563247739246e-05, "loss": 0.1049, "step": 17608 }, { "epoch": 2.088106249258864, "grad_norm": 0.9304430633718083, "learning_rate": 2.443116350542372e-05, "loss": 0.1356, "step": 17609 }, { "epoch": 2.0882248310209888, "grad_norm": 0.6855210883060936, "learning_rate": 2.4428763768352153e-05, "loss": 0.0931, "step": 17610 }, { "epoch": 2.088343412783114, "grad_norm": 0.8787914443213503, "learning_rate": 2.4426364036546695e-05, "loss": 0.0993, "step": 17611 }, { "epoch": 2.0884619945452387, "grad_norm": 0.7068518287368422, "learning_rate": 2.4423964310029458e-05, "loss": 0.1021, "step": 17612 }, { "epoch": 2.088580576307364, "grad_norm": 0.6504642396711622, "learning_rate": 2.442156458882256e-05, "loss": 0.0841, "step": 17613 }, { "epoch": 2.088699158069489, "grad_norm": 0.7433557604737919, "learning_rate": 2.4419164872948123e-05, "loss": 0.0924, "step": 17614 }, { "epoch": 2.088817739831614, "grad_norm": 0.709053783611437, "learning_rate": 2.4416765162428282e-05, "loss": 0.0837, "step": 17615 }, { "epoch": 2.0889363215937387, "grad_norm": 0.8398086788614837, "learning_rate": 2.4414365457285145e-05, "loss": 0.1131, "step": 17616 }, { "epoch": 2.089054903355864, "grad_norm": 0.5883471183797552, "learning_rate": 2.441196575754084e-05, "loss": 0.0918, "step": 17617 }, { "epoch": 2.089173485117989, "grad_norm": 0.7704184128060104, "learning_rate": 2.440956606321749e-05, "loss": 0.1151, "step": 17618 }, { "epoch": 2.089292066880114, "grad_norm": 0.8413856651734203, "learning_rate": 2.440716637433722e-05, "loss": 0.106, "step": 17619 }, { "epoch": 2.089410648642239, "grad_norm": 0.790596230558701, "learning_rate": 2.4404766690922154e-05, "loss": 0.1127, "step": 17620 }, { "epoch": 2.089529230404364, "grad_norm": 0.9671456935997226, "learning_rate": 2.4402367012994395e-05, "loss": 0.1066, "step": 17621 }, { "epoch": 2.089647812166489, "grad_norm": 0.7863601350384377, "learning_rate": 2.439996734057609e-05, "loss": 0.1147, "step": 17622 }, { "epoch": 2.0897663939286137, "grad_norm": 0.5609428646860544, "learning_rate": 2.4397567673689353e-05, "loss": 0.0748, "step": 17623 }, { "epoch": 2.089884975690739, "grad_norm": 0.9615468157565555, "learning_rate": 2.4395168012356295e-05, "loss": 0.1134, "step": 17624 }, { "epoch": 2.0900035574528637, "grad_norm": 0.7620471522326553, "learning_rate": 2.4392768356599045e-05, "loss": 0.1067, "step": 17625 }, { "epoch": 2.090122139214989, "grad_norm": 0.8240886647210064, "learning_rate": 2.439036870643973e-05, "loss": 0.1135, "step": 17626 }, { "epoch": 2.0902407209771137, "grad_norm": 0.7106948296162857, "learning_rate": 2.4387969061900473e-05, "loss": 0.101, "step": 17627 }, { "epoch": 2.090359302739239, "grad_norm": 0.7312884247017787, "learning_rate": 2.4385569423003386e-05, "loss": 0.1045, "step": 17628 }, { "epoch": 2.0904778845013636, "grad_norm": 0.4792017999600944, "learning_rate": 2.4383169789770592e-05, "loss": 0.0582, "step": 17629 }, { "epoch": 2.090596466263489, "grad_norm": 0.7092388227997387, "learning_rate": 2.438077016222422e-05, "loss": 0.1083, "step": 17630 }, { "epoch": 2.0907150480256136, "grad_norm": 0.7503088423747168, "learning_rate": 2.437837054038639e-05, "loss": 0.0917, "step": 17631 }, { "epoch": 2.090833629787739, "grad_norm": 0.8897422412715482, "learning_rate": 2.437597092427921e-05, "loss": 0.1282, "step": 17632 }, { "epoch": 2.0909522115498635, "grad_norm": 0.9277302510988272, "learning_rate": 2.4373571313924827e-05, "loss": 0.1002, "step": 17633 }, { "epoch": 2.0910707933119888, "grad_norm": 0.8077985333348756, "learning_rate": 2.437117170934535e-05, "loss": 0.1221, "step": 17634 }, { "epoch": 2.0911893750741135, "grad_norm": 1.229855344940687, "learning_rate": 2.436877211056289e-05, "loss": 0.1641, "step": 17635 }, { "epoch": 2.0913079568362387, "grad_norm": 0.9469704948747244, "learning_rate": 2.4366372517599572e-05, "loss": 0.1299, "step": 17636 }, { "epoch": 2.0914265385983635, "grad_norm": 1.0463558536333686, "learning_rate": 2.4363972930477527e-05, "loss": 0.1188, "step": 17637 }, { "epoch": 2.0915451203604887, "grad_norm": 0.7691373061105046, "learning_rate": 2.4361573349218877e-05, "loss": 0.0787, "step": 17638 }, { "epoch": 2.0916637021226134, "grad_norm": 0.609574976790093, "learning_rate": 2.4359173773845738e-05, "loss": 0.0814, "step": 17639 }, { "epoch": 2.0917822838847386, "grad_norm": 0.5768814457326507, "learning_rate": 2.435677420438022e-05, "loss": 0.0829, "step": 17640 }, { "epoch": 2.0919008656468634, "grad_norm": 0.7862685009967889, "learning_rate": 2.4354374640844468e-05, "loss": 0.0965, "step": 17641 }, { "epoch": 2.0920194474089886, "grad_norm": 0.7819893957648906, "learning_rate": 2.4351975083260586e-05, "loss": 0.0734, "step": 17642 }, { "epoch": 2.0921380291711134, "grad_norm": 0.9640960034349493, "learning_rate": 2.43495755316507e-05, "loss": 0.1122, "step": 17643 }, { "epoch": 2.0922566109332386, "grad_norm": 0.7052640202782057, "learning_rate": 2.434717598603692e-05, "loss": 0.0789, "step": 17644 }, { "epoch": 2.0923751926953633, "grad_norm": 0.9684501802268194, "learning_rate": 2.4344776446441385e-05, "loss": 0.106, "step": 17645 }, { "epoch": 2.0924937744574885, "grad_norm": 0.5948129398398712, "learning_rate": 2.4342376912886212e-05, "loss": 0.0912, "step": 17646 }, { "epoch": 2.0926123562196133, "grad_norm": 0.9755656220530663, "learning_rate": 2.4339977385393504e-05, "loss": 0.1321, "step": 17647 }, { "epoch": 2.0927309379817385, "grad_norm": 0.5399435344790019, "learning_rate": 2.4337577863985406e-05, "loss": 0.0838, "step": 17648 }, { "epoch": 2.0928495197438632, "grad_norm": 0.7913035294045291, "learning_rate": 2.433517834868403e-05, "loss": 0.1345, "step": 17649 }, { "epoch": 2.0929681015059884, "grad_norm": 0.8849693920752884, "learning_rate": 2.4332778839511493e-05, "loss": 0.1213, "step": 17650 }, { "epoch": 2.093086683268113, "grad_norm": 0.7564928031194272, "learning_rate": 2.4330379336489906e-05, "loss": 0.1037, "step": 17651 }, { "epoch": 2.0932052650302384, "grad_norm": 0.9379148021536517, "learning_rate": 2.4327979839641413e-05, "loss": 0.1338, "step": 17652 }, { "epoch": 2.093323846792363, "grad_norm": 0.7643884004661348, "learning_rate": 2.432558034898812e-05, "loss": 0.0951, "step": 17653 }, { "epoch": 2.0934424285544884, "grad_norm": 0.6303801348250948, "learning_rate": 2.4323180864552143e-05, "loss": 0.0919, "step": 17654 }, { "epoch": 2.093561010316613, "grad_norm": 0.8447747999939689, "learning_rate": 2.432078138635561e-05, "loss": 0.1166, "step": 17655 }, { "epoch": 2.0936795920787383, "grad_norm": 0.7994587036998436, "learning_rate": 2.4318381914420638e-05, "loss": 0.0979, "step": 17656 }, { "epoch": 2.093798173840863, "grad_norm": 0.7793360224265851, "learning_rate": 2.4315982448769355e-05, "loss": 0.1032, "step": 17657 }, { "epoch": 2.0939167556029883, "grad_norm": 0.8471945027335093, "learning_rate": 2.4313582989423876e-05, "loss": 0.1015, "step": 17658 }, { "epoch": 2.094035337365113, "grad_norm": 0.6599988661901656, "learning_rate": 2.431118353640631e-05, "loss": 0.077, "step": 17659 }, { "epoch": 2.0941539191272383, "grad_norm": 0.940169421718236, "learning_rate": 2.4308784089738797e-05, "loss": 0.1446, "step": 17660 }, { "epoch": 2.094272500889363, "grad_norm": 1.0324906038422277, "learning_rate": 2.430638464944345e-05, "loss": 0.1417, "step": 17661 }, { "epoch": 2.094391082651488, "grad_norm": 0.6776772147789699, "learning_rate": 2.430398521554237e-05, "loss": 0.0849, "step": 17662 }, { "epoch": 2.0945096644136134, "grad_norm": 0.5057363333814702, "learning_rate": 2.4301585788057703e-05, "loss": 0.0695, "step": 17663 }, { "epoch": 2.094628246175738, "grad_norm": 0.7274548497094527, "learning_rate": 2.429918636701156e-05, "loss": 0.0936, "step": 17664 }, { "epoch": 2.094746827937863, "grad_norm": 0.6623585599965763, "learning_rate": 2.4296786952426058e-05, "loss": 0.1082, "step": 17665 }, { "epoch": 2.094865409699988, "grad_norm": 0.7355729436029271, "learning_rate": 2.4294387544323315e-05, "loss": 0.0804, "step": 17666 }, { "epoch": 2.0949839914621133, "grad_norm": 0.7126942093966403, "learning_rate": 2.4291988142725452e-05, "loss": 0.0874, "step": 17667 }, { "epoch": 2.095102573224238, "grad_norm": 0.656426506460523, "learning_rate": 2.42895887476546e-05, "loss": 0.11, "step": 17668 }, { "epoch": 2.0952211549863633, "grad_norm": 0.7540731303594375, "learning_rate": 2.4287189359132866e-05, "loss": 0.1033, "step": 17669 }, { "epoch": 2.095339736748488, "grad_norm": 0.6316538331453777, "learning_rate": 2.428478997718236e-05, "loss": 0.0957, "step": 17670 }, { "epoch": 2.0954583185106133, "grad_norm": 0.5933356186523511, "learning_rate": 2.4282390601825226e-05, "loss": 0.0841, "step": 17671 }, { "epoch": 2.095576900272738, "grad_norm": 0.9463812725042277, "learning_rate": 2.427999123308357e-05, "loss": 0.131, "step": 17672 }, { "epoch": 2.095695482034863, "grad_norm": 0.5598758932119056, "learning_rate": 2.42775918709795e-05, "loss": 0.0764, "step": 17673 }, { "epoch": 2.095814063796988, "grad_norm": 1.0899660941760327, "learning_rate": 2.4275192515535157e-05, "loss": 0.1444, "step": 17674 }, { "epoch": 2.095932645559113, "grad_norm": 0.803957916565864, "learning_rate": 2.427279316677265e-05, "loss": 0.0992, "step": 17675 }, { "epoch": 2.096051227321238, "grad_norm": 0.5023693268536069, "learning_rate": 2.4270393824714098e-05, "loss": 0.0584, "step": 17676 }, { "epoch": 2.096169809083363, "grad_norm": 0.8173363570260191, "learning_rate": 2.426799448938161e-05, "loss": 0.1106, "step": 17677 }, { "epoch": 2.096288390845488, "grad_norm": 0.8118597190162863, "learning_rate": 2.4265595160797326e-05, "loss": 0.1144, "step": 17678 }, { "epoch": 2.096406972607613, "grad_norm": 0.673899532209695, "learning_rate": 2.4263195838983357e-05, "loss": 0.087, "step": 17679 }, { "epoch": 2.096525554369738, "grad_norm": 0.5679188514884848, "learning_rate": 2.4260796523961817e-05, "loss": 0.0689, "step": 17680 }, { "epoch": 2.096644136131863, "grad_norm": 0.8101429492097422, "learning_rate": 2.4258397215754816e-05, "loss": 0.1177, "step": 17681 }, { "epoch": 2.096762717893988, "grad_norm": 0.6217274515965322, "learning_rate": 2.4255997914384496e-05, "loss": 0.11, "step": 17682 }, { "epoch": 2.096881299656113, "grad_norm": 0.725234668879478, "learning_rate": 2.425359861987296e-05, "loss": 0.0879, "step": 17683 }, { "epoch": 2.096999881418238, "grad_norm": 0.8828961395564068, "learning_rate": 2.4251199332242324e-05, "loss": 0.098, "step": 17684 }, { "epoch": 2.097118463180363, "grad_norm": 0.822838990991526, "learning_rate": 2.4248800051514715e-05, "loss": 0.1135, "step": 17685 }, { "epoch": 2.0972370449424877, "grad_norm": 0.6420122210369841, "learning_rate": 2.4246400777712247e-05, "loss": 0.0962, "step": 17686 }, { "epoch": 2.097355626704613, "grad_norm": 0.7427072417549657, "learning_rate": 2.4244001510857042e-05, "loss": 0.1083, "step": 17687 }, { "epoch": 2.0974742084667377, "grad_norm": 0.9026830898643112, "learning_rate": 2.424160225097121e-05, "loss": 0.1075, "step": 17688 }, { "epoch": 2.097592790228863, "grad_norm": 0.5616716478218573, "learning_rate": 2.423920299807688e-05, "loss": 0.066, "step": 17689 }, { "epoch": 2.0977113719909877, "grad_norm": 0.6909623787362744, "learning_rate": 2.423680375219617e-05, "loss": 0.0942, "step": 17690 }, { "epoch": 2.097829953753113, "grad_norm": 1.105110785788971, "learning_rate": 2.4234404513351193e-05, "loss": 0.13, "step": 17691 }, { "epoch": 2.0979485355152376, "grad_norm": 0.7114622165563125, "learning_rate": 2.4232005281564056e-05, "loss": 0.0896, "step": 17692 }, { "epoch": 2.098067117277363, "grad_norm": 0.8710466610109995, "learning_rate": 2.4229606056856903e-05, "loss": 0.118, "step": 17693 }, { "epoch": 2.0981856990394876, "grad_norm": 0.6630048752358645, "learning_rate": 2.4227206839251835e-05, "loss": 0.0885, "step": 17694 }, { "epoch": 2.098304280801613, "grad_norm": 0.7039339312468885, "learning_rate": 2.4224807628770966e-05, "loss": 0.0941, "step": 17695 }, { "epoch": 2.0984228625637376, "grad_norm": 0.6497710909082565, "learning_rate": 2.422240842543642e-05, "loss": 0.0791, "step": 17696 }, { "epoch": 2.0985414443258628, "grad_norm": 0.6425150647218325, "learning_rate": 2.4220009229270314e-05, "loss": 0.101, "step": 17697 }, { "epoch": 2.0986600260879875, "grad_norm": 1.1405118261016693, "learning_rate": 2.421761004029477e-05, "loss": 0.1425, "step": 17698 }, { "epoch": 2.0987786078501127, "grad_norm": 1.0265088551832222, "learning_rate": 2.4215210858531906e-05, "loss": 0.144, "step": 17699 }, { "epoch": 2.0988971896122375, "grad_norm": 0.5761372645506845, "learning_rate": 2.4212811684003824e-05, "loss": 0.0715, "step": 17700 }, { "epoch": 2.0990157713743627, "grad_norm": 0.7844613410716006, "learning_rate": 2.4210412516732664e-05, "loss": 0.1275, "step": 17701 }, { "epoch": 2.0991343531364874, "grad_norm": 0.8224679819545181, "learning_rate": 2.420801335674053e-05, "loss": 0.0961, "step": 17702 }, { "epoch": 2.0992529348986126, "grad_norm": 1.0322039740802258, "learning_rate": 2.420561420404953e-05, "loss": 0.1547, "step": 17703 }, { "epoch": 2.0993715166607374, "grad_norm": 1.0556636487851627, "learning_rate": 2.420321505868181e-05, "loss": 0.1026, "step": 17704 }, { "epoch": 2.0994900984228626, "grad_norm": 0.9495438803878967, "learning_rate": 2.4200815920659458e-05, "loss": 0.1355, "step": 17705 }, { "epoch": 2.0996086801849874, "grad_norm": 0.4896309146710866, "learning_rate": 2.4198416790004613e-05, "loss": 0.0702, "step": 17706 }, { "epoch": 2.0997272619471126, "grad_norm": 0.7753817724988226, "learning_rate": 2.4196017666739372e-05, "loss": 0.1062, "step": 17707 }, { "epoch": 2.0998458437092373, "grad_norm": 0.7134151182700721, "learning_rate": 2.419361855088587e-05, "loss": 0.094, "step": 17708 }, { "epoch": 2.0999644254713625, "grad_norm": 0.764586588293776, "learning_rate": 2.4191219442466215e-05, "loss": 0.1168, "step": 17709 }, { "epoch": 2.1000830072334873, "grad_norm": 0.7634305650064045, "learning_rate": 2.4188820341502528e-05, "loss": 0.1187, "step": 17710 }, { "epoch": 2.1002015889956125, "grad_norm": 0.6006375721324791, "learning_rate": 2.4186421248016912e-05, "loss": 0.0535, "step": 17711 }, { "epoch": 2.1003201707577372, "grad_norm": 0.5843804994008797, "learning_rate": 2.4184022162031502e-05, "loss": 0.0782, "step": 17712 }, { "epoch": 2.1004387525198625, "grad_norm": 0.6280050420341007, "learning_rate": 2.4181623083568408e-05, "loss": 0.0858, "step": 17713 }, { "epoch": 2.100557334281987, "grad_norm": 0.9841011423399921, "learning_rate": 2.4179224012649742e-05, "loss": 0.1247, "step": 17714 }, { "epoch": 2.1006759160441124, "grad_norm": 0.895230359301384, "learning_rate": 2.4176824949297625e-05, "loss": 0.106, "step": 17715 }, { "epoch": 2.1007944978062376, "grad_norm": 0.6112718533517404, "learning_rate": 2.417442589353417e-05, "loss": 0.0845, "step": 17716 }, { "epoch": 2.1009130795683624, "grad_norm": 0.8897589530194006, "learning_rate": 2.41720268453815e-05, "loss": 0.1458, "step": 17717 }, { "epoch": 2.1010316613304876, "grad_norm": 0.8103495354626276, "learning_rate": 2.4169627804861718e-05, "loss": 0.1111, "step": 17718 }, { "epoch": 2.1011502430926123, "grad_norm": 1.0141944510279146, "learning_rate": 2.4167228771996963e-05, "loss": 0.1064, "step": 17719 }, { "epoch": 2.1012688248547375, "grad_norm": 0.9902496044941537, "learning_rate": 2.4164829746809334e-05, "loss": 0.1201, "step": 17720 }, { "epoch": 2.1013874066168623, "grad_norm": 0.714358630911429, "learning_rate": 2.4162430729320954e-05, "loss": 0.1092, "step": 17721 }, { "epoch": 2.1015059883789875, "grad_norm": 0.8795321749846019, "learning_rate": 2.416003171955392e-05, "loss": 0.1142, "step": 17722 }, { "epoch": 2.1016245701411123, "grad_norm": 0.6559569863929873, "learning_rate": 2.4157632717530376e-05, "loss": 0.0912, "step": 17723 }, { "epoch": 2.1017431519032375, "grad_norm": 0.9232786588239772, "learning_rate": 2.415523372327242e-05, "loss": 0.1106, "step": 17724 }, { "epoch": 2.101861733665362, "grad_norm": 0.7764534046428707, "learning_rate": 2.4152834736802183e-05, "loss": 0.1084, "step": 17725 }, { "epoch": 2.1019803154274874, "grad_norm": 0.6564453037413477, "learning_rate": 2.4150435758141753e-05, "loss": 0.1101, "step": 17726 }, { "epoch": 2.102098897189612, "grad_norm": 0.8713781453181823, "learning_rate": 2.414803678731328e-05, "loss": 0.091, "step": 17727 }, { "epoch": 2.1022174789517374, "grad_norm": 0.7758198426616747, "learning_rate": 2.414563782433886e-05, "loss": 0.1197, "step": 17728 }, { "epoch": 2.102336060713862, "grad_norm": 0.5355027713253647, "learning_rate": 2.4143238869240613e-05, "loss": 0.0732, "step": 17729 }, { "epoch": 2.1024546424759873, "grad_norm": 0.6464899771096846, "learning_rate": 2.4140839922040643e-05, "loss": 0.0857, "step": 17730 }, { "epoch": 2.102573224238112, "grad_norm": 0.6803607006983631, "learning_rate": 2.4138440982761085e-05, "loss": 0.0892, "step": 17731 }, { "epoch": 2.1026918060002373, "grad_norm": 1.0174868329402416, "learning_rate": 2.4136042051424043e-05, "loss": 0.1619, "step": 17732 }, { "epoch": 2.102810387762362, "grad_norm": 0.7332631413163705, "learning_rate": 2.4133643128051625e-05, "loss": 0.0899, "step": 17733 }, { "epoch": 2.1029289695244873, "grad_norm": 0.6066182471939238, "learning_rate": 2.4131244212665965e-05, "loss": 0.0782, "step": 17734 }, { "epoch": 2.103047551286612, "grad_norm": 0.6900735547932704, "learning_rate": 2.412884530528916e-05, "loss": 0.0866, "step": 17735 }, { "epoch": 2.1031661330487372, "grad_norm": 0.7094590637101457, "learning_rate": 2.412644640594334e-05, "loss": 0.0971, "step": 17736 }, { "epoch": 2.103284714810862, "grad_norm": 0.7562324873939084, "learning_rate": 2.4124047514650605e-05, "loss": 0.0988, "step": 17737 }, { "epoch": 2.103403296572987, "grad_norm": 0.8553330262311986, "learning_rate": 2.4121648631433088e-05, "loss": 0.1404, "step": 17738 }, { "epoch": 2.103521878335112, "grad_norm": 0.7982551859644568, "learning_rate": 2.411924975631289e-05, "loss": 0.0965, "step": 17739 }, { "epoch": 2.103640460097237, "grad_norm": 0.7902658656827232, "learning_rate": 2.4116850889312135e-05, "loss": 0.1056, "step": 17740 }, { "epoch": 2.103759041859362, "grad_norm": 0.6339656275267481, "learning_rate": 2.4114452030452918e-05, "loss": 0.0878, "step": 17741 }, { "epoch": 2.103877623621487, "grad_norm": 0.9634434942422359, "learning_rate": 2.4112053179757377e-05, "loss": 0.1187, "step": 17742 }, { "epoch": 2.103996205383612, "grad_norm": 0.5700181400267402, "learning_rate": 2.4109654337247616e-05, "loss": 0.0724, "step": 17743 }, { "epoch": 2.104114787145737, "grad_norm": 0.739067136967367, "learning_rate": 2.410725550294574e-05, "loss": 0.1165, "step": 17744 }, { "epoch": 2.104233368907862, "grad_norm": 0.6761849926806303, "learning_rate": 2.4104856676873884e-05, "loss": 0.0889, "step": 17745 }, { "epoch": 2.104351950669987, "grad_norm": 0.913632075367155, "learning_rate": 2.410245785905415e-05, "loss": 0.1075, "step": 17746 }, { "epoch": 2.104470532432112, "grad_norm": 0.8617547109181376, "learning_rate": 2.4100059049508657e-05, "loss": 0.1147, "step": 17747 }, { "epoch": 2.104589114194237, "grad_norm": 0.5983151200427411, "learning_rate": 2.4097660248259504e-05, "loss": 0.0943, "step": 17748 }, { "epoch": 2.1047076959563618, "grad_norm": 0.6052797747410538, "learning_rate": 2.409526145532883e-05, "loss": 0.0751, "step": 17749 }, { "epoch": 2.104826277718487, "grad_norm": 0.7319196233587663, "learning_rate": 2.4092862670738734e-05, "loss": 0.1074, "step": 17750 }, { "epoch": 2.1049448594806117, "grad_norm": 0.7967879666773058, "learning_rate": 2.4090463894511334e-05, "loss": 0.1053, "step": 17751 }, { "epoch": 2.105063441242737, "grad_norm": 0.8520511852485684, "learning_rate": 2.4088065126668727e-05, "loss": 0.0782, "step": 17752 }, { "epoch": 2.1051820230048617, "grad_norm": 0.6620147064784508, "learning_rate": 2.4085666367233058e-05, "loss": 0.0961, "step": 17753 }, { "epoch": 2.105300604766987, "grad_norm": 1.0810729059881963, "learning_rate": 2.4083267616226413e-05, "loss": 0.137, "step": 17754 }, { "epoch": 2.1054191865291116, "grad_norm": 1.1459368994425814, "learning_rate": 2.408086887367092e-05, "loss": 0.1566, "step": 17755 }, { "epoch": 2.105537768291237, "grad_norm": 0.689470151129963, "learning_rate": 2.4078470139588686e-05, "loss": 0.1001, "step": 17756 }, { "epoch": 2.1056563500533616, "grad_norm": 0.8174044487007872, "learning_rate": 2.4076071414001833e-05, "loss": 0.0878, "step": 17757 }, { "epoch": 2.105774931815487, "grad_norm": 0.8120325738384929, "learning_rate": 2.4073672696932468e-05, "loss": 0.1054, "step": 17758 }, { "epoch": 2.1058935135776116, "grad_norm": 0.564762649992703, "learning_rate": 2.4071273988402696e-05, "loss": 0.0732, "step": 17759 }, { "epoch": 2.1060120953397368, "grad_norm": 0.7006981982366348, "learning_rate": 2.406887528843465e-05, "loss": 0.108, "step": 17760 }, { "epoch": 2.1061306771018615, "grad_norm": 0.7524271781285821, "learning_rate": 2.406647659705043e-05, "loss": 0.1124, "step": 17761 }, { "epoch": 2.1062492588639867, "grad_norm": 0.5506039912980265, "learning_rate": 2.406407791427215e-05, "loss": 0.0895, "step": 17762 }, { "epoch": 2.1063678406261115, "grad_norm": 0.6240073246551237, "learning_rate": 2.4061679240121917e-05, "loss": 0.095, "step": 17763 }, { "epoch": 2.1064864223882367, "grad_norm": 0.6299683548672852, "learning_rate": 2.405928057462186e-05, "loss": 0.0943, "step": 17764 }, { "epoch": 2.106605004150362, "grad_norm": 0.7578312100084077, "learning_rate": 2.4056881917794078e-05, "loss": 0.1007, "step": 17765 }, { "epoch": 2.1067235859124867, "grad_norm": 0.9161755012684288, "learning_rate": 2.4054483269660692e-05, "loss": 0.1161, "step": 17766 }, { "epoch": 2.106842167674612, "grad_norm": 0.7163363375820996, "learning_rate": 2.4052084630243798e-05, "loss": 0.1202, "step": 17767 }, { "epoch": 2.1069607494367366, "grad_norm": 0.7294383753574907, "learning_rate": 2.4049685999565536e-05, "loss": 0.0956, "step": 17768 }, { "epoch": 2.107079331198862, "grad_norm": 0.8449570676369849, "learning_rate": 2.4047287377648e-05, "loss": 0.0901, "step": 17769 }, { "epoch": 2.1071979129609866, "grad_norm": 0.7827613058503096, "learning_rate": 2.4044888764513308e-05, "loss": 0.1193, "step": 17770 }, { "epoch": 2.1073164947231118, "grad_norm": 0.8530923936744655, "learning_rate": 2.404249016018356e-05, "loss": 0.105, "step": 17771 }, { "epoch": 2.1074350764852365, "grad_norm": 0.649209621160767, "learning_rate": 2.4040091564680893e-05, "loss": 0.0922, "step": 17772 }, { "epoch": 2.1075536582473617, "grad_norm": 0.6070922340893752, "learning_rate": 2.4037692978027392e-05, "loss": 0.0769, "step": 17773 }, { "epoch": 2.1076722400094865, "grad_norm": 0.6733284583004968, "learning_rate": 2.4035294400245186e-05, "loss": 0.0849, "step": 17774 }, { "epoch": 2.1077908217716117, "grad_norm": 0.9105189636820087, "learning_rate": 2.4032895831356385e-05, "loss": 0.1208, "step": 17775 }, { "epoch": 2.1079094035337365, "grad_norm": 0.49679875286117675, "learning_rate": 2.40304972713831e-05, "loss": 0.0565, "step": 17776 }, { "epoch": 2.1080279852958617, "grad_norm": 0.656112978245929, "learning_rate": 2.4028098720347443e-05, "loss": 0.0874, "step": 17777 }, { "epoch": 2.1081465670579864, "grad_norm": 0.5960129927328174, "learning_rate": 2.402570017827151e-05, "loss": 0.0665, "step": 17778 }, { "epoch": 2.1082651488201116, "grad_norm": 0.8050447560936875, "learning_rate": 2.402330164517744e-05, "loss": 0.0967, "step": 17779 }, { "epoch": 2.1083837305822364, "grad_norm": 0.7507278560063959, "learning_rate": 2.4020903121087328e-05, "loss": 0.1006, "step": 17780 }, { "epoch": 2.1085023123443616, "grad_norm": 0.949728181147529, "learning_rate": 2.4018504606023293e-05, "loss": 0.1231, "step": 17781 }, { "epoch": 2.1086208941064863, "grad_norm": 0.6076752731126464, "learning_rate": 2.401610610000743e-05, "loss": 0.0805, "step": 17782 }, { "epoch": 2.1087394758686115, "grad_norm": 0.6930679748124102, "learning_rate": 2.401370760306187e-05, "loss": 0.0955, "step": 17783 }, { "epoch": 2.1088580576307363, "grad_norm": 0.5573099705270429, "learning_rate": 2.4011309115208715e-05, "loss": 0.0776, "step": 17784 }, { "epoch": 2.1089766393928615, "grad_norm": 0.7150654906236121, "learning_rate": 2.400891063647008e-05, "loss": 0.0925, "step": 17785 }, { "epoch": 2.1090952211549863, "grad_norm": 0.8899017393344478, "learning_rate": 2.4006512166868063e-05, "loss": 0.1196, "step": 17786 }, { "epoch": 2.1092138029171115, "grad_norm": 0.6412620519044003, "learning_rate": 2.40041137064248e-05, "loss": 0.0839, "step": 17787 }, { "epoch": 2.1093323846792362, "grad_norm": 0.7191295068563591, "learning_rate": 2.4001715255162386e-05, "loss": 0.0738, "step": 17788 }, { "epoch": 2.1094509664413614, "grad_norm": 0.7765443185627715, "learning_rate": 2.399931681310292e-05, "loss": 0.0991, "step": 17789 }, { "epoch": 2.109569548203486, "grad_norm": 0.9567393272746687, "learning_rate": 2.3996918380268536e-05, "loss": 0.1305, "step": 17790 }, { "epoch": 2.1096881299656114, "grad_norm": 0.6842986500556484, "learning_rate": 2.399451995668134e-05, "loss": 0.095, "step": 17791 }, { "epoch": 2.109806711727736, "grad_norm": 0.7381934445125651, "learning_rate": 2.3992121542363434e-05, "loss": 0.0837, "step": 17792 }, { "epoch": 2.1099252934898614, "grad_norm": 0.8771190703715759, "learning_rate": 2.3989723137336917e-05, "loss": 0.0807, "step": 17793 }, { "epoch": 2.110043875251986, "grad_norm": 0.6530270771993045, "learning_rate": 2.3987324741623928e-05, "loss": 0.0899, "step": 17794 }, { "epoch": 2.1101624570141113, "grad_norm": 0.6525910681525993, "learning_rate": 2.3984926355246556e-05, "loss": 0.0788, "step": 17795 }, { "epoch": 2.110281038776236, "grad_norm": 0.8675742517335266, "learning_rate": 2.3982527978226926e-05, "loss": 0.1149, "step": 17796 }, { "epoch": 2.1103996205383613, "grad_norm": 0.754610113296398, "learning_rate": 2.398012961058713e-05, "loss": 0.0969, "step": 17797 }, { "epoch": 2.110518202300486, "grad_norm": 0.5747259637751972, "learning_rate": 2.3977731252349296e-05, "loss": 0.077, "step": 17798 }, { "epoch": 2.1106367840626112, "grad_norm": 0.5626709239382442, "learning_rate": 2.397533290353553e-05, "loss": 0.0737, "step": 17799 }, { "epoch": 2.110755365824736, "grad_norm": 0.8426468869969377, "learning_rate": 2.3972934564167933e-05, "loss": 0.1017, "step": 17800 }, { "epoch": 2.110873947586861, "grad_norm": 1.4873802594728036, "learning_rate": 2.3970536234268614e-05, "loss": 0.1275, "step": 17801 }, { "epoch": 2.110992529348986, "grad_norm": 0.6011422419712529, "learning_rate": 2.39681379138597e-05, "loss": 0.0853, "step": 17802 }, { "epoch": 2.111111111111111, "grad_norm": 0.7728765701838523, "learning_rate": 2.3965739602963277e-05, "loss": 0.1058, "step": 17803 }, { "epoch": 2.111229692873236, "grad_norm": 1.1056663562313997, "learning_rate": 2.3963341301601473e-05, "loss": 0.1427, "step": 17804 }, { "epoch": 2.111348274635361, "grad_norm": 0.6811363902703893, "learning_rate": 2.3960943009796384e-05, "loss": 0.0869, "step": 17805 }, { "epoch": 2.111466856397486, "grad_norm": 0.9640801532611685, "learning_rate": 2.395854472757014e-05, "loss": 0.1238, "step": 17806 }, { "epoch": 2.111585438159611, "grad_norm": 0.9107438050565287, "learning_rate": 2.395614645494483e-05, "loss": 0.1267, "step": 17807 }, { "epoch": 2.111704019921736, "grad_norm": 0.9701612775025429, "learning_rate": 2.3953748191942564e-05, "loss": 0.0983, "step": 17808 }, { "epoch": 2.111822601683861, "grad_norm": 0.8783902491264383, "learning_rate": 2.3951349938585462e-05, "loss": 0.1346, "step": 17809 }, { "epoch": 2.111941183445986, "grad_norm": 0.650268689490047, "learning_rate": 2.394895169489563e-05, "loss": 0.0828, "step": 17810 }, { "epoch": 2.112059765208111, "grad_norm": 0.7597270165914234, "learning_rate": 2.3946553460895177e-05, "loss": 0.0939, "step": 17811 }, { "epoch": 2.1121783469702358, "grad_norm": 0.6929831137685498, "learning_rate": 2.3944155236606196e-05, "loss": 0.1053, "step": 17812 }, { "epoch": 2.112296928732361, "grad_norm": 0.5949188911808995, "learning_rate": 2.394175702205082e-05, "loss": 0.084, "step": 17813 }, { "epoch": 2.112415510494486, "grad_norm": 0.7279869069527176, "learning_rate": 2.3939358817251142e-05, "loss": 0.0807, "step": 17814 }, { "epoch": 2.112534092256611, "grad_norm": 0.7574817326237568, "learning_rate": 2.3936960622229284e-05, "loss": 0.088, "step": 17815 }, { "epoch": 2.112652674018736, "grad_norm": 0.7849555715671664, "learning_rate": 2.3934562437007326e-05, "loss": 0.0952, "step": 17816 }, { "epoch": 2.112771255780861, "grad_norm": 0.804884572087379, "learning_rate": 2.3932164261607412e-05, "loss": 0.0908, "step": 17817 }, { "epoch": 2.112889837542986, "grad_norm": 0.7482047034339723, "learning_rate": 2.3929766096051635e-05, "loss": 0.0916, "step": 17818 }, { "epoch": 2.113008419305111, "grad_norm": 0.8927369943301499, "learning_rate": 2.3927367940362087e-05, "loss": 0.121, "step": 17819 }, { "epoch": 2.113127001067236, "grad_norm": 0.6726783278809668, "learning_rate": 2.3924969794560907e-05, "loss": 0.1036, "step": 17820 }, { "epoch": 2.113245582829361, "grad_norm": 0.8691615581385709, "learning_rate": 2.3922571658670185e-05, "loss": 0.1261, "step": 17821 }, { "epoch": 2.113364164591486, "grad_norm": 0.7641283096014578, "learning_rate": 2.3920173532712032e-05, "loss": 0.1046, "step": 17822 }, { "epoch": 2.1134827463536108, "grad_norm": 0.7431156182840443, "learning_rate": 2.391777541670854e-05, "loss": 0.1227, "step": 17823 }, { "epoch": 2.113601328115736, "grad_norm": 0.9504905934445264, "learning_rate": 2.3915377310681848e-05, "loss": 0.1015, "step": 17824 }, { "epoch": 2.1137199098778607, "grad_norm": 1.0267345234592098, "learning_rate": 2.3912979214654038e-05, "loss": 0.1461, "step": 17825 }, { "epoch": 2.113838491639986, "grad_norm": 0.5827311428096316, "learning_rate": 2.3910581128647234e-05, "loss": 0.0721, "step": 17826 }, { "epoch": 2.1139570734021107, "grad_norm": 0.8043094400104702, "learning_rate": 2.390818305268352e-05, "loss": 0.1129, "step": 17827 }, { "epoch": 2.114075655164236, "grad_norm": 0.6764780388616012, "learning_rate": 2.390578498678504e-05, "loss": 0.0878, "step": 17828 }, { "epoch": 2.1141942369263607, "grad_norm": 0.8778304084853604, "learning_rate": 2.3903386930973872e-05, "loss": 0.1199, "step": 17829 }, { "epoch": 2.114312818688486, "grad_norm": 0.774064690962849, "learning_rate": 2.3900988885272125e-05, "loss": 0.1026, "step": 17830 }, { "epoch": 2.1144314004506106, "grad_norm": 0.7959558239054422, "learning_rate": 2.3898590849701926e-05, "loss": 0.1072, "step": 17831 }, { "epoch": 2.114549982212736, "grad_norm": 0.7663234810541174, "learning_rate": 2.3896192824285364e-05, "loss": 0.1087, "step": 17832 }, { "epoch": 2.1146685639748606, "grad_norm": 1.1154225383463556, "learning_rate": 2.3893794809044545e-05, "loss": 0.126, "step": 17833 }, { "epoch": 2.114787145736986, "grad_norm": 0.5991087705834621, "learning_rate": 2.3891396804001585e-05, "loss": 0.0875, "step": 17834 }, { "epoch": 2.1149057274991105, "grad_norm": 1.0291719577881016, "learning_rate": 2.3888998809178583e-05, "loss": 0.1368, "step": 17835 }, { "epoch": 2.1150243092612357, "grad_norm": 0.7016637997683266, "learning_rate": 2.3886600824597656e-05, "loss": 0.078, "step": 17836 }, { "epoch": 2.1151428910233605, "grad_norm": 0.8377366007230417, "learning_rate": 2.3884202850280908e-05, "loss": 0.096, "step": 17837 }, { "epoch": 2.1152614727854857, "grad_norm": 0.9003806095364464, "learning_rate": 2.3881804886250424e-05, "loss": 0.1167, "step": 17838 }, { "epoch": 2.1153800545476105, "grad_norm": 0.6832788106182055, "learning_rate": 2.3879406932528343e-05, "loss": 0.0916, "step": 17839 }, { "epoch": 2.1154986363097357, "grad_norm": 0.7336494287862443, "learning_rate": 2.3877008989136755e-05, "loss": 0.0847, "step": 17840 }, { "epoch": 2.1156172180718604, "grad_norm": 0.6568314242449593, "learning_rate": 2.387461105609777e-05, "loss": 0.0975, "step": 17841 }, { "epoch": 2.1157357998339856, "grad_norm": 0.7543059831922114, "learning_rate": 2.3872213133433473e-05, "loss": 0.1173, "step": 17842 }, { "epoch": 2.1158543815961104, "grad_norm": 0.6734646176869874, "learning_rate": 2.3869815221166006e-05, "loss": 0.0792, "step": 17843 }, { "epoch": 2.1159729633582356, "grad_norm": 0.8542798479702134, "learning_rate": 2.3867417319317446e-05, "loss": 0.11, "step": 17844 }, { "epoch": 2.1160915451203604, "grad_norm": 0.7171418302867631, "learning_rate": 2.3865019427909913e-05, "loss": 0.085, "step": 17845 }, { "epoch": 2.1162101268824856, "grad_norm": 1.1179012240997335, "learning_rate": 2.3862621546965507e-05, "loss": 0.136, "step": 17846 }, { "epoch": 2.1163287086446103, "grad_norm": 0.7261333401535891, "learning_rate": 2.3860223676506345e-05, "loss": 0.1077, "step": 17847 }, { "epoch": 2.1164472904067355, "grad_norm": 0.7971453221368986, "learning_rate": 2.385782581655452e-05, "loss": 0.0886, "step": 17848 }, { "epoch": 2.1165658721688603, "grad_norm": 0.7218070388129837, "learning_rate": 2.385542796713213e-05, "loss": 0.0858, "step": 17849 }, { "epoch": 2.1166844539309855, "grad_norm": 0.8154031966444868, "learning_rate": 2.38530301282613e-05, "loss": 0.0945, "step": 17850 }, { "epoch": 2.1168030356931102, "grad_norm": 0.8123157591488545, "learning_rate": 2.3850632299964127e-05, "loss": 0.0997, "step": 17851 }, { "epoch": 2.1169216174552354, "grad_norm": 0.7283823367334491, "learning_rate": 2.3848234482262707e-05, "loss": 0.1031, "step": 17852 }, { "epoch": 2.11704019921736, "grad_norm": 0.7688741882437939, "learning_rate": 2.3845836675179157e-05, "loss": 0.1035, "step": 17853 }, { "epoch": 2.1171587809794854, "grad_norm": 0.7757209817474158, "learning_rate": 2.3843438878735574e-05, "loss": 0.0872, "step": 17854 }, { "epoch": 2.11727736274161, "grad_norm": 1.0053517108391412, "learning_rate": 2.3841041092954073e-05, "loss": 0.128, "step": 17855 }, { "epoch": 2.1173959445037354, "grad_norm": 0.5602785490077342, "learning_rate": 2.383864331785675e-05, "loss": 0.0681, "step": 17856 }, { "epoch": 2.11751452626586, "grad_norm": 0.6369077188256455, "learning_rate": 2.3836245553465704e-05, "loss": 0.0813, "step": 17857 }, { "epoch": 2.1176331080279853, "grad_norm": 1.0777930882531748, "learning_rate": 2.3833847799803055e-05, "loss": 0.0964, "step": 17858 }, { "epoch": 2.11775168979011, "grad_norm": 0.8869858106586543, "learning_rate": 2.38314500568909e-05, "loss": 0.1119, "step": 17859 }, { "epoch": 2.1178702715522353, "grad_norm": 0.7220445621097537, "learning_rate": 2.382905232475133e-05, "loss": 0.1118, "step": 17860 }, { "epoch": 2.11798885331436, "grad_norm": 0.7857301983006661, "learning_rate": 2.382665460340648e-05, "loss": 0.0915, "step": 17861 }, { "epoch": 2.1181074350764852, "grad_norm": 0.6964569165588829, "learning_rate": 2.3824256892878427e-05, "loss": 0.0991, "step": 17862 }, { "epoch": 2.1182260168386104, "grad_norm": 0.9674335713992094, "learning_rate": 2.3821859193189283e-05, "loss": 0.1091, "step": 17863 }, { "epoch": 2.118344598600735, "grad_norm": 0.7203533898037802, "learning_rate": 2.381946150436115e-05, "loss": 0.0895, "step": 17864 }, { "epoch": 2.11846318036286, "grad_norm": 0.9986072305664891, "learning_rate": 2.3817063826416136e-05, "loss": 0.1142, "step": 17865 }, { "epoch": 2.118581762124985, "grad_norm": 0.6821687776719181, "learning_rate": 2.381466615937635e-05, "loss": 0.0853, "step": 17866 }, { "epoch": 2.1187003438871104, "grad_norm": 0.7594488813347222, "learning_rate": 2.3812268503263882e-05, "loss": 0.0941, "step": 17867 }, { "epoch": 2.118818925649235, "grad_norm": 0.6745054914564077, "learning_rate": 2.3809870858100837e-05, "loss": 0.0906, "step": 17868 }, { "epoch": 2.1189375074113603, "grad_norm": 0.849617017878505, "learning_rate": 2.3807473223909333e-05, "loss": 0.1077, "step": 17869 }, { "epoch": 2.119056089173485, "grad_norm": 0.6570250190706467, "learning_rate": 2.380507560071146e-05, "loss": 0.0815, "step": 17870 }, { "epoch": 2.1191746709356103, "grad_norm": 0.6007178961558507, "learning_rate": 2.380267798852933e-05, "loss": 0.0816, "step": 17871 }, { "epoch": 2.119293252697735, "grad_norm": 0.8433353265681695, "learning_rate": 2.3800280387385025e-05, "loss": 0.1133, "step": 17872 }, { "epoch": 2.1194118344598603, "grad_norm": 0.8588940674706943, "learning_rate": 2.3797882797300677e-05, "loss": 0.1045, "step": 17873 }, { "epoch": 2.119530416221985, "grad_norm": 0.6562080936684265, "learning_rate": 2.379548521829837e-05, "loss": 0.0731, "step": 17874 }, { "epoch": 2.11964899798411, "grad_norm": 0.7884617964159583, "learning_rate": 2.379308765040021e-05, "loss": 0.1307, "step": 17875 }, { "epoch": 2.119767579746235, "grad_norm": 0.7615858302390826, "learning_rate": 2.3790690093628303e-05, "loss": 0.0924, "step": 17876 }, { "epoch": 2.11988616150836, "grad_norm": 0.910756655411388, "learning_rate": 2.378829254800476e-05, "loss": 0.1197, "step": 17877 }, { "epoch": 2.120004743270485, "grad_norm": 0.7193684266250825, "learning_rate": 2.378589501355167e-05, "loss": 0.1011, "step": 17878 }, { "epoch": 2.12012332503261, "grad_norm": 0.7481093992944469, "learning_rate": 2.378349749029113e-05, "loss": 0.1094, "step": 17879 }, { "epoch": 2.120241906794735, "grad_norm": 0.5664161023390765, "learning_rate": 2.3781099978245258e-05, "loss": 0.0658, "step": 17880 }, { "epoch": 2.12036048855686, "grad_norm": 0.6822564240447349, "learning_rate": 2.3778702477436154e-05, "loss": 0.0955, "step": 17881 }, { "epoch": 2.120479070318985, "grad_norm": 0.7159845270892833, "learning_rate": 2.377630498788591e-05, "loss": 0.0902, "step": 17882 }, { "epoch": 2.12059765208111, "grad_norm": 1.6363004739500218, "learning_rate": 2.377390750961663e-05, "loss": 0.1003, "step": 17883 }, { "epoch": 2.120716233843235, "grad_norm": 0.7508743339591013, "learning_rate": 2.3771510042650418e-05, "loss": 0.0952, "step": 17884 }, { "epoch": 2.12083481560536, "grad_norm": 0.7135938435593754, "learning_rate": 2.3769112587009386e-05, "loss": 0.0825, "step": 17885 }, { "epoch": 2.120953397367485, "grad_norm": 0.5581287429631879, "learning_rate": 2.3766715142715628e-05, "loss": 0.0768, "step": 17886 }, { "epoch": 2.12107197912961, "grad_norm": 0.7126186112110405, "learning_rate": 2.376431770979123e-05, "loss": 0.0877, "step": 17887 }, { "epoch": 2.1211905608917347, "grad_norm": 0.5887446013855328, "learning_rate": 2.376192028825832e-05, "loss": 0.0856, "step": 17888 }, { "epoch": 2.12130914265386, "grad_norm": 0.6599783319784512, "learning_rate": 2.3759522878138987e-05, "loss": 0.0813, "step": 17889 }, { "epoch": 2.1214277244159847, "grad_norm": 0.8098462461135668, "learning_rate": 2.375712547945532e-05, "loss": 0.1184, "step": 17890 }, { "epoch": 2.12154630617811, "grad_norm": 0.865996869142336, "learning_rate": 2.375472809222944e-05, "loss": 0.1065, "step": 17891 }, { "epoch": 2.1216648879402347, "grad_norm": 0.7620143848912132, "learning_rate": 2.3752330716483444e-05, "loss": 0.1223, "step": 17892 }, { "epoch": 2.12178346970236, "grad_norm": 0.7704642623591977, "learning_rate": 2.374993335223942e-05, "loss": 0.1101, "step": 17893 }, { "epoch": 2.1219020514644846, "grad_norm": 0.6848403174587552, "learning_rate": 2.3747535999519486e-05, "loss": 0.0997, "step": 17894 }, { "epoch": 2.12202063322661, "grad_norm": 0.5677025861701702, "learning_rate": 2.3745138658345724e-05, "loss": 0.066, "step": 17895 }, { "epoch": 2.1221392149887346, "grad_norm": 0.9629647904451156, "learning_rate": 2.3742741328740257e-05, "loss": 0.1569, "step": 17896 }, { "epoch": 2.12225779675086, "grad_norm": 0.7185129710325748, "learning_rate": 2.3740344010725173e-05, "loss": 0.0859, "step": 17897 }, { "epoch": 2.1223763785129846, "grad_norm": 1.0197775429680558, "learning_rate": 2.3737946704322557e-05, "loss": 0.1644, "step": 17898 }, { "epoch": 2.1224949602751098, "grad_norm": 0.705329123506078, "learning_rate": 2.3735549409554536e-05, "loss": 0.1002, "step": 17899 }, { "epoch": 2.1226135420372345, "grad_norm": 0.5138364659795601, "learning_rate": 2.3733152126443202e-05, "loss": 0.0703, "step": 17900 }, { "epoch": 2.1227321237993597, "grad_norm": 0.6311912081913832, "learning_rate": 2.3730754855010646e-05, "loss": 0.0683, "step": 17901 }, { "epoch": 2.1228507055614845, "grad_norm": 0.6396745038508417, "learning_rate": 2.372835759527897e-05, "loss": 0.0823, "step": 17902 }, { "epoch": 2.1229692873236097, "grad_norm": 0.6500550560464794, "learning_rate": 2.372596034727028e-05, "loss": 0.0904, "step": 17903 }, { "epoch": 2.1230878690857344, "grad_norm": 0.8524321668201746, "learning_rate": 2.372356311100668e-05, "loss": 0.1262, "step": 17904 }, { "epoch": 2.1232064508478596, "grad_norm": 0.9614391567077992, "learning_rate": 2.3721165886510253e-05, "loss": 0.1506, "step": 17905 }, { "epoch": 2.1233250326099844, "grad_norm": 0.7279463263710606, "learning_rate": 2.3718768673803115e-05, "loss": 0.1005, "step": 17906 }, { "epoch": 2.1234436143721096, "grad_norm": 0.7149919487680201, "learning_rate": 2.371637147290736e-05, "loss": 0.0795, "step": 17907 }, { "epoch": 2.1235621961342344, "grad_norm": 0.598201347048442, "learning_rate": 2.371397428384509e-05, "loss": 0.077, "step": 17908 }, { "epoch": 2.1236807778963596, "grad_norm": 1.0522016189169252, "learning_rate": 2.3711577106638384e-05, "loss": 0.1241, "step": 17909 }, { "epoch": 2.1237993596584843, "grad_norm": 0.9406466612416545, "learning_rate": 2.3709179941309374e-05, "loss": 0.1334, "step": 17910 }, { "epoch": 2.1239179414206095, "grad_norm": 0.5648109508057189, "learning_rate": 2.3706782787880136e-05, "loss": 0.0765, "step": 17911 }, { "epoch": 2.1240365231827343, "grad_norm": 0.5519386263388136, "learning_rate": 2.3704385646372772e-05, "loss": 0.0597, "step": 17912 }, { "epoch": 2.1241551049448595, "grad_norm": 0.7673184273601047, "learning_rate": 2.3701988516809382e-05, "loss": 0.0894, "step": 17913 }, { "epoch": 2.1242736867069842, "grad_norm": 0.7424059576452738, "learning_rate": 2.3699591399212067e-05, "loss": 0.114, "step": 17914 }, { "epoch": 2.1243922684691094, "grad_norm": 1.0996489305939712, "learning_rate": 2.3697194293602932e-05, "loss": 0.144, "step": 17915 }, { "epoch": 2.1245108502312346, "grad_norm": 0.8487199506375223, "learning_rate": 2.3694797200004053e-05, "loss": 0.1244, "step": 17916 }, { "epoch": 2.1246294319933594, "grad_norm": 0.5109911550126984, "learning_rate": 2.3692400118437558e-05, "loss": 0.0685, "step": 17917 }, { "epoch": 2.1247480137554846, "grad_norm": 0.6238089767423283, "learning_rate": 2.369000304892553e-05, "loss": 0.101, "step": 17918 }, { "epoch": 2.1248665955176094, "grad_norm": 0.8983865031878597, "learning_rate": 2.3687605991490066e-05, "loss": 0.1153, "step": 17919 }, { "epoch": 2.1249851772797346, "grad_norm": 0.7457922643400969, "learning_rate": 2.3685208946153254e-05, "loss": 0.0908, "step": 17920 }, { "epoch": 2.1251037590418593, "grad_norm": 1.1064800435150053, "learning_rate": 2.3682811912937216e-05, "loss": 0.1693, "step": 17921 }, { "epoch": 2.1252223408039845, "grad_norm": 0.818937815399172, "learning_rate": 2.3680414891864036e-05, "loss": 0.0959, "step": 17922 }, { "epoch": 2.1253409225661093, "grad_norm": 0.635415820200434, "learning_rate": 2.3678017882955807e-05, "loss": 0.0827, "step": 17923 }, { "epoch": 2.1254595043282345, "grad_norm": 0.7315028646011373, "learning_rate": 2.367562088623463e-05, "loss": 0.0966, "step": 17924 }, { "epoch": 2.1255780860903593, "grad_norm": 0.8212071829887027, "learning_rate": 2.3673223901722607e-05, "loss": 0.11, "step": 17925 }, { "epoch": 2.1256966678524845, "grad_norm": 0.9939855279184496, "learning_rate": 2.3670826929441838e-05, "loss": 0.1105, "step": 17926 }, { "epoch": 2.125815249614609, "grad_norm": 0.65075426924166, "learning_rate": 2.3668429969414417e-05, "loss": 0.105, "step": 17927 }, { "epoch": 2.1259338313767344, "grad_norm": 0.9565522567237696, "learning_rate": 2.3666033021662424e-05, "loss": 0.1291, "step": 17928 }, { "epoch": 2.126052413138859, "grad_norm": 0.8484159646406912, "learning_rate": 2.366363608620798e-05, "loss": 0.1014, "step": 17929 }, { "epoch": 2.1261709949009844, "grad_norm": 0.8887452471984288, "learning_rate": 2.3661239163073178e-05, "loss": 0.1022, "step": 17930 }, { "epoch": 2.126289576663109, "grad_norm": 0.8137404230117737, "learning_rate": 2.3658842252280096e-05, "loss": 0.0982, "step": 17931 }, { "epoch": 2.1264081584252343, "grad_norm": 0.7173937793087729, "learning_rate": 2.3656445353850852e-05, "loss": 0.0942, "step": 17932 }, { "epoch": 2.126526740187359, "grad_norm": 0.690157214057542, "learning_rate": 2.3654048467807534e-05, "loss": 0.0934, "step": 17933 }, { "epoch": 2.1266453219494843, "grad_norm": 0.882030278353602, "learning_rate": 2.365165159417224e-05, "loss": 0.119, "step": 17934 }, { "epoch": 2.126763903711609, "grad_norm": 0.7572209603852741, "learning_rate": 2.3649254732967054e-05, "loss": 0.1006, "step": 17935 }, { "epoch": 2.1268824854737343, "grad_norm": 0.6535837997732545, "learning_rate": 2.3646857884214097e-05, "loss": 0.0779, "step": 17936 }, { "epoch": 2.127001067235859, "grad_norm": 0.7266925742073694, "learning_rate": 2.3644461047935448e-05, "loss": 0.1019, "step": 17937 }, { "epoch": 2.1271196489979842, "grad_norm": 0.8041646614861662, "learning_rate": 2.3642064224153205e-05, "loss": 0.0993, "step": 17938 }, { "epoch": 2.127238230760109, "grad_norm": 0.8064137905588544, "learning_rate": 2.363966741288946e-05, "loss": 0.1184, "step": 17939 }, { "epoch": 2.127356812522234, "grad_norm": 0.8567733802826183, "learning_rate": 2.3637270614166317e-05, "loss": 0.1115, "step": 17940 }, { "epoch": 2.127475394284359, "grad_norm": 0.8030423582508885, "learning_rate": 2.3634873828005872e-05, "loss": 0.1137, "step": 17941 }, { "epoch": 2.127593976046484, "grad_norm": 0.8405577757415722, "learning_rate": 2.3632477054430207e-05, "loss": 0.1207, "step": 17942 }, { "epoch": 2.127712557808609, "grad_norm": 0.5808405231914165, "learning_rate": 2.363008029346143e-05, "loss": 0.0774, "step": 17943 }, { "epoch": 2.127831139570734, "grad_norm": 0.6695355889903517, "learning_rate": 2.362768354512163e-05, "loss": 0.0841, "step": 17944 }, { "epoch": 2.127949721332859, "grad_norm": 2.1794959808835808, "learning_rate": 2.3625286809432915e-05, "loss": 0.1505, "step": 17945 }, { "epoch": 2.128068303094984, "grad_norm": 0.6918775036607938, "learning_rate": 2.3622890086417357e-05, "loss": 0.1014, "step": 17946 }, { "epoch": 2.128186884857109, "grad_norm": 0.6931507101542764, "learning_rate": 2.362049337609707e-05, "loss": 0.098, "step": 17947 }, { "epoch": 2.128305466619234, "grad_norm": 0.9732830820519827, "learning_rate": 2.361809667849415e-05, "loss": 0.1299, "step": 17948 }, { "epoch": 2.128424048381359, "grad_norm": 0.7738195800671515, "learning_rate": 2.361569999363068e-05, "loss": 0.0868, "step": 17949 }, { "epoch": 2.128542630143484, "grad_norm": 0.9156763357034299, "learning_rate": 2.3613303321528744e-05, "loss": 0.1398, "step": 17950 }, { "epoch": 2.1286612119056088, "grad_norm": 0.7769478455894022, "learning_rate": 2.3610906662210468e-05, "loss": 0.1057, "step": 17951 }, { "epoch": 2.128779793667734, "grad_norm": 0.7314998546887915, "learning_rate": 2.3608510015697926e-05, "loss": 0.091, "step": 17952 }, { "epoch": 2.1288983754298587, "grad_norm": 0.929189689029976, "learning_rate": 2.360611338201321e-05, "loss": 0.1517, "step": 17953 }, { "epoch": 2.129016957191984, "grad_norm": 0.8416267406235514, "learning_rate": 2.3603716761178422e-05, "loss": 0.1257, "step": 17954 }, { "epoch": 2.1291355389541087, "grad_norm": 0.5313698852381401, "learning_rate": 2.360132015321565e-05, "loss": 0.055, "step": 17955 }, { "epoch": 2.129254120716234, "grad_norm": 0.6489855520999889, "learning_rate": 2.3598923558147e-05, "loss": 0.0924, "step": 17956 }, { "epoch": 2.1293727024783586, "grad_norm": 0.9645600636905274, "learning_rate": 2.3596526975994555e-05, "loss": 0.1255, "step": 17957 }, { "epoch": 2.129491284240484, "grad_norm": 0.7505318286402709, "learning_rate": 2.3594130406780397e-05, "loss": 0.1029, "step": 17958 }, { "epoch": 2.1296098660026086, "grad_norm": 0.776437155397659, "learning_rate": 2.3591733850526647e-05, "loss": 0.0983, "step": 17959 }, { "epoch": 2.129728447764734, "grad_norm": 0.616023850659311, "learning_rate": 2.358933730725538e-05, "loss": 0.0831, "step": 17960 }, { "epoch": 2.129847029526859, "grad_norm": 0.7128132904446529, "learning_rate": 2.3586940776988685e-05, "loss": 0.0952, "step": 17961 }, { "epoch": 2.1299656112889838, "grad_norm": 0.9777230223252029, "learning_rate": 2.3584544259748672e-05, "loss": 0.0972, "step": 17962 }, { "epoch": 2.1300841930511085, "grad_norm": 0.7560065532091526, "learning_rate": 2.3582147755557422e-05, "loss": 0.092, "step": 17963 }, { "epoch": 2.1302027748132337, "grad_norm": 0.6113105681588883, "learning_rate": 2.3579751264437035e-05, "loss": 0.0765, "step": 17964 }, { "epoch": 2.130321356575359, "grad_norm": 0.7285933518414346, "learning_rate": 2.357735478640959e-05, "loss": 0.1046, "step": 17965 }, { "epoch": 2.1304399383374837, "grad_norm": 0.7796491007600945, "learning_rate": 2.3574958321497202e-05, "loss": 0.0943, "step": 17966 }, { "epoch": 2.130558520099609, "grad_norm": 0.8515913640850464, "learning_rate": 2.3572561869721946e-05, "loss": 0.1023, "step": 17967 }, { "epoch": 2.1306771018617336, "grad_norm": 0.7416116674353541, "learning_rate": 2.3570165431105924e-05, "loss": 0.1123, "step": 17968 }, { "epoch": 2.130795683623859, "grad_norm": 0.7089334420762649, "learning_rate": 2.3567769005671208e-05, "loss": 0.1013, "step": 17969 }, { "epoch": 2.1309142653859836, "grad_norm": 0.5332351112684102, "learning_rate": 2.356537259343992e-05, "loss": 0.0721, "step": 17970 }, { "epoch": 2.131032847148109, "grad_norm": 0.6229097554779223, "learning_rate": 2.3562976194434132e-05, "loss": 0.0855, "step": 17971 }, { "epoch": 2.1311514289102336, "grad_norm": 0.85019901906268, "learning_rate": 2.3560579808675943e-05, "loss": 0.1048, "step": 17972 }, { "epoch": 2.1312700106723588, "grad_norm": 0.7237192884265834, "learning_rate": 2.355818343618744e-05, "loss": 0.1084, "step": 17973 }, { "epoch": 2.1313885924344835, "grad_norm": 1.310789594927696, "learning_rate": 2.3555787076990714e-05, "loss": 0.1256, "step": 17974 }, { "epoch": 2.1315071741966087, "grad_norm": 0.7189161756262489, "learning_rate": 2.3553390731107872e-05, "loss": 0.0841, "step": 17975 }, { "epoch": 2.1316257559587335, "grad_norm": 0.7904979370066046, "learning_rate": 2.3550994398560978e-05, "loss": 0.1135, "step": 17976 }, { "epoch": 2.1317443377208587, "grad_norm": 0.8374737107257041, "learning_rate": 2.3548598079372152e-05, "loss": 0.0875, "step": 17977 }, { "epoch": 2.1318629194829835, "grad_norm": 1.041560735994387, "learning_rate": 2.354620177356347e-05, "loss": 0.1418, "step": 17978 }, { "epoch": 2.1319815012451087, "grad_norm": 0.650547995376575, "learning_rate": 2.354380548115703e-05, "loss": 0.1067, "step": 17979 }, { "epoch": 2.1321000830072334, "grad_norm": 0.6291266139420923, "learning_rate": 2.35414092021749e-05, "loss": 0.0943, "step": 17980 }, { "epoch": 2.1322186647693586, "grad_norm": 0.8001775550464129, "learning_rate": 2.3539012936639203e-05, "loss": 0.1197, "step": 17981 }, { "epoch": 2.1323372465314834, "grad_norm": 0.7459076322198885, "learning_rate": 2.353661668457201e-05, "loss": 0.1127, "step": 17982 }, { "epoch": 2.1324558282936086, "grad_norm": 0.6126482173539698, "learning_rate": 2.353422044599542e-05, "loss": 0.0685, "step": 17983 }, { "epoch": 2.1325744100557333, "grad_norm": 0.520303660516012, "learning_rate": 2.353182422093151e-05, "loss": 0.0803, "step": 17984 }, { "epoch": 2.1326929918178585, "grad_norm": 0.969541353150509, "learning_rate": 2.352942800940239e-05, "loss": 0.1411, "step": 17985 }, { "epoch": 2.1328115735799833, "grad_norm": 0.6226639599073154, "learning_rate": 2.352703181143014e-05, "loss": 0.0767, "step": 17986 }, { "epoch": 2.1329301553421085, "grad_norm": 1.111531910988383, "learning_rate": 2.352463562703684e-05, "loss": 0.1597, "step": 17987 }, { "epoch": 2.1330487371042333, "grad_norm": 0.6898020862188439, "learning_rate": 2.3522239456244603e-05, "loss": 0.1002, "step": 17988 }, { "epoch": 2.1331673188663585, "grad_norm": 0.9035760661420732, "learning_rate": 2.3519843299075508e-05, "loss": 0.1013, "step": 17989 }, { "epoch": 2.1332859006284832, "grad_norm": 0.8177121380055178, "learning_rate": 2.3517447155551642e-05, "loss": 0.1188, "step": 17990 }, { "epoch": 2.1334044823906084, "grad_norm": 0.6883478418987434, "learning_rate": 2.351505102569508e-05, "loss": 0.1062, "step": 17991 }, { "epoch": 2.133523064152733, "grad_norm": 0.8619965867122521, "learning_rate": 2.3512654909527943e-05, "loss": 0.1064, "step": 17992 }, { "epoch": 2.1336416459148584, "grad_norm": 0.8933626276913544, "learning_rate": 2.3510258807072294e-05, "loss": 0.115, "step": 17993 }, { "epoch": 2.133760227676983, "grad_norm": 0.6714201189075433, "learning_rate": 2.3507862718350237e-05, "loss": 0.0846, "step": 17994 }, { "epoch": 2.1338788094391083, "grad_norm": 1.1744051070120978, "learning_rate": 2.350546664338385e-05, "loss": 0.1466, "step": 17995 }, { "epoch": 2.133997391201233, "grad_norm": 0.7642441401724089, "learning_rate": 2.3503070582195235e-05, "loss": 0.1487, "step": 17996 }, { "epoch": 2.1341159729633583, "grad_norm": 0.7529487793584818, "learning_rate": 2.3500674534806475e-05, "loss": 0.1013, "step": 17997 }, { "epoch": 2.134234554725483, "grad_norm": 0.637900511054051, "learning_rate": 2.3498278501239653e-05, "loss": 0.0829, "step": 17998 }, { "epoch": 2.1343531364876083, "grad_norm": 0.6729323309704297, "learning_rate": 2.3495882481516855e-05, "loss": 0.0946, "step": 17999 }, { "epoch": 2.134471718249733, "grad_norm": 0.6712618665477792, "learning_rate": 2.3493486475660187e-05, "loss": 0.0957, "step": 18000 }, { "epoch": 2.1345903000118582, "grad_norm": 0.7157267011133359, "learning_rate": 2.3491090483691726e-05, "loss": 0.0885, "step": 18001 }, { "epoch": 2.134708881773983, "grad_norm": 0.8037486863320477, "learning_rate": 2.3488694505633548e-05, "loss": 0.1017, "step": 18002 }, { "epoch": 2.134827463536108, "grad_norm": 0.5924802316743294, "learning_rate": 2.3486298541507764e-05, "loss": 0.0734, "step": 18003 }, { "epoch": 2.134946045298233, "grad_norm": 1.0066599779909167, "learning_rate": 2.3483902591336448e-05, "loss": 0.132, "step": 18004 }, { "epoch": 2.135064627060358, "grad_norm": 0.9566497703070097, "learning_rate": 2.3481506655141697e-05, "loss": 0.1722, "step": 18005 }, { "epoch": 2.135183208822483, "grad_norm": 0.6279313542518444, "learning_rate": 2.347911073294558e-05, "loss": 0.0869, "step": 18006 }, { "epoch": 2.135301790584608, "grad_norm": 0.8070261913629467, "learning_rate": 2.3476714824770208e-05, "loss": 0.121, "step": 18007 }, { "epoch": 2.135420372346733, "grad_norm": 0.7292099898304665, "learning_rate": 2.3474318930637656e-05, "loss": 0.1111, "step": 18008 }, { "epoch": 2.135538954108858, "grad_norm": 0.6116872377730583, "learning_rate": 2.3471923050570013e-05, "loss": 0.0647, "step": 18009 }, { "epoch": 2.135657535870983, "grad_norm": 0.7905847848814176, "learning_rate": 2.3469527184589357e-05, "loss": 0.1084, "step": 18010 }, { "epoch": 2.135776117633108, "grad_norm": 0.5529919037535964, "learning_rate": 2.3467131332717793e-05, "loss": 0.0677, "step": 18011 }, { "epoch": 2.135894699395233, "grad_norm": 0.6439759243633505, "learning_rate": 2.3464735494977392e-05, "loss": 0.0927, "step": 18012 }, { "epoch": 2.136013281157358, "grad_norm": 0.8702695590386543, "learning_rate": 2.3462339671390255e-05, "loss": 0.0895, "step": 18013 }, { "epoch": 2.136131862919483, "grad_norm": 0.7412421377280523, "learning_rate": 2.3459943861978447e-05, "loss": 0.1066, "step": 18014 }, { "epoch": 2.136250444681608, "grad_norm": 0.777798178681783, "learning_rate": 2.345754806676408e-05, "loss": 0.1057, "step": 18015 }, { "epoch": 2.1363690264437327, "grad_norm": 0.9237364942404307, "learning_rate": 2.345515228576923e-05, "loss": 0.1342, "step": 18016 }, { "epoch": 2.136487608205858, "grad_norm": 0.6106865786879563, "learning_rate": 2.3452756519015965e-05, "loss": 0.0837, "step": 18017 }, { "epoch": 2.136606189967983, "grad_norm": 1.0693416307912376, "learning_rate": 2.3450360766526405e-05, "loss": 0.1498, "step": 18018 }, { "epoch": 2.136724771730108, "grad_norm": 0.8146444400841573, "learning_rate": 2.3447965028322617e-05, "loss": 0.1066, "step": 18019 }, { "epoch": 2.136843353492233, "grad_norm": 0.6368735070380152, "learning_rate": 2.3445569304426687e-05, "loss": 0.0834, "step": 18020 }, { "epoch": 2.136961935254358, "grad_norm": 1.0317235321642375, "learning_rate": 2.344317359486069e-05, "loss": 0.139, "step": 18021 }, { "epoch": 2.137080517016483, "grad_norm": 0.7518010711989375, "learning_rate": 2.3440777899646736e-05, "loss": 0.1381, "step": 18022 }, { "epoch": 2.137199098778608, "grad_norm": 0.7584581441437331, "learning_rate": 2.3438382218806892e-05, "loss": 0.1088, "step": 18023 }, { "epoch": 2.137317680540733, "grad_norm": 0.5213458827296431, "learning_rate": 2.3435986552363255e-05, "loss": 0.0727, "step": 18024 }, { "epoch": 2.1374362623028578, "grad_norm": 0.9111548483046347, "learning_rate": 2.343359090033789e-05, "loss": 0.1081, "step": 18025 }, { "epoch": 2.137554844064983, "grad_norm": 0.7391686080405271, "learning_rate": 2.343119526275291e-05, "loss": 0.119, "step": 18026 }, { "epoch": 2.1376734258271077, "grad_norm": 0.5957188262381526, "learning_rate": 2.3428799639630383e-05, "loss": 0.0877, "step": 18027 }, { "epoch": 2.137792007589233, "grad_norm": 0.6083177768801656, "learning_rate": 2.3426404030992398e-05, "loss": 0.0793, "step": 18028 }, { "epoch": 2.1379105893513577, "grad_norm": 0.6342198883171318, "learning_rate": 2.342400843686103e-05, "loss": 0.0893, "step": 18029 }, { "epoch": 2.138029171113483, "grad_norm": 0.599692922688417, "learning_rate": 2.342161285725838e-05, "loss": 0.0851, "step": 18030 }, { "epoch": 2.1381477528756077, "grad_norm": 0.6498707769072296, "learning_rate": 2.3419217292206523e-05, "loss": 0.1022, "step": 18031 }, { "epoch": 2.138266334637733, "grad_norm": 0.5250549935838396, "learning_rate": 2.3416821741727534e-05, "loss": 0.0675, "step": 18032 }, { "epoch": 2.1383849163998576, "grad_norm": 0.5661442777501516, "learning_rate": 2.3414426205843516e-05, "loss": 0.0796, "step": 18033 }, { "epoch": 2.138503498161983, "grad_norm": 0.8912426879759916, "learning_rate": 2.3412030684576545e-05, "loss": 0.1303, "step": 18034 }, { "epoch": 2.1386220799241076, "grad_norm": 0.5285611167927607, "learning_rate": 2.340963517794871e-05, "loss": 0.0803, "step": 18035 }, { "epoch": 2.138740661686233, "grad_norm": 0.7971832071440378, "learning_rate": 2.340723968598207e-05, "loss": 0.097, "step": 18036 }, { "epoch": 2.1388592434483575, "grad_norm": 0.7886784610044397, "learning_rate": 2.340484420869874e-05, "loss": 0.1118, "step": 18037 }, { "epoch": 2.1389778252104827, "grad_norm": 0.8325417157712541, "learning_rate": 2.3402448746120792e-05, "loss": 0.0998, "step": 18038 }, { "epoch": 2.1390964069726075, "grad_norm": 0.8673012561978783, "learning_rate": 2.3400053298270304e-05, "loss": 0.1455, "step": 18039 }, { "epoch": 2.1392149887347327, "grad_norm": 0.5721425723167158, "learning_rate": 2.3397657865169356e-05, "loss": 0.0842, "step": 18040 }, { "epoch": 2.1393335704968575, "grad_norm": 0.7189776576445861, "learning_rate": 2.3395262446840046e-05, "loss": 0.0978, "step": 18041 }, { "epoch": 2.1394521522589827, "grad_norm": 0.7867094070803604, "learning_rate": 2.339286704330444e-05, "loss": 0.0909, "step": 18042 }, { "epoch": 2.1395707340211074, "grad_norm": 0.7332408463141352, "learning_rate": 2.3390471654584642e-05, "loss": 0.0835, "step": 18043 }, { "epoch": 2.1396893157832326, "grad_norm": 1.3659974291512087, "learning_rate": 2.3388076280702705e-05, "loss": 0.1838, "step": 18044 }, { "epoch": 2.1398078975453574, "grad_norm": 0.6900796712354379, "learning_rate": 2.3385680921680742e-05, "loss": 0.0716, "step": 18045 }, { "epoch": 2.1399264793074826, "grad_norm": 0.6047944990875459, "learning_rate": 2.338328557754082e-05, "loss": 0.0822, "step": 18046 }, { "epoch": 2.1400450610696073, "grad_norm": 0.5733761154197873, "learning_rate": 2.3380890248305013e-05, "loss": 0.0928, "step": 18047 }, { "epoch": 2.1401636428317325, "grad_norm": 0.4893372739568274, "learning_rate": 2.337849493399542e-05, "loss": 0.0707, "step": 18048 }, { "epoch": 2.1402822245938573, "grad_norm": 0.6264341399466726, "learning_rate": 2.337609963463412e-05, "loss": 0.0828, "step": 18049 }, { "epoch": 2.1404008063559825, "grad_norm": 0.940528791794186, "learning_rate": 2.3373704350243184e-05, "loss": 0.1354, "step": 18050 }, { "epoch": 2.1405193881181073, "grad_norm": 0.7472558180252735, "learning_rate": 2.3371309080844693e-05, "loss": 0.114, "step": 18051 }, { "epoch": 2.1406379698802325, "grad_norm": 0.9147306334718411, "learning_rate": 2.3368913826460744e-05, "loss": 0.1285, "step": 18052 }, { "epoch": 2.1407565516423572, "grad_norm": 0.8213501537537489, "learning_rate": 2.3366518587113406e-05, "loss": 0.116, "step": 18053 }, { "epoch": 2.1408751334044824, "grad_norm": 0.5972004334156824, "learning_rate": 2.3364123362824766e-05, "loss": 0.0865, "step": 18054 }, { "epoch": 2.140993715166607, "grad_norm": 0.7123993315244432, "learning_rate": 2.3361728153616893e-05, "loss": 0.0991, "step": 18055 }, { "epoch": 2.1411122969287324, "grad_norm": 0.5830591414604122, "learning_rate": 2.3359332959511888e-05, "loss": 0.0747, "step": 18056 }, { "epoch": 2.141230878690857, "grad_norm": 0.6714233130580632, "learning_rate": 2.3356937780531823e-05, "loss": 0.0816, "step": 18057 }, { "epoch": 2.1413494604529824, "grad_norm": 0.8444375103388471, "learning_rate": 2.3354542616698776e-05, "loss": 0.0985, "step": 18058 }, { "epoch": 2.141468042215107, "grad_norm": 0.597477490866996, "learning_rate": 2.335214746803482e-05, "loss": 0.0768, "step": 18059 }, { "epoch": 2.1415866239772323, "grad_norm": 0.6547117945798402, "learning_rate": 2.3349752334562046e-05, "loss": 0.0739, "step": 18060 }, { "epoch": 2.141705205739357, "grad_norm": 0.8075592259822908, "learning_rate": 2.3347357216302533e-05, "loss": 0.1068, "step": 18061 }, { "epoch": 2.1418237875014823, "grad_norm": 0.7554655506309944, "learning_rate": 2.3344962113278358e-05, "loss": 0.0961, "step": 18062 }, { "epoch": 2.1419423692636075, "grad_norm": 0.8016755043386448, "learning_rate": 2.3342567025511605e-05, "loss": 0.1092, "step": 18063 }, { "epoch": 2.1420609510257322, "grad_norm": 1.015132411498408, "learning_rate": 2.3340171953024356e-05, "loss": 0.1177, "step": 18064 }, { "epoch": 2.142179532787857, "grad_norm": 0.6061933401261164, "learning_rate": 2.3337776895838687e-05, "loss": 0.0887, "step": 18065 }, { "epoch": 2.142298114549982, "grad_norm": 0.7898899825485469, "learning_rate": 2.333538185397666e-05, "loss": 0.0934, "step": 18066 }, { "epoch": 2.1424166963121074, "grad_norm": 0.7577674260773044, "learning_rate": 2.333298682746039e-05, "loss": 0.0918, "step": 18067 }, { "epoch": 2.142535278074232, "grad_norm": 0.9888382287923655, "learning_rate": 2.3330591816311935e-05, "loss": 0.1434, "step": 18068 }, { "epoch": 2.1426538598363574, "grad_norm": 0.7023257267084432, "learning_rate": 2.3328196820553373e-05, "loss": 0.079, "step": 18069 }, { "epoch": 2.142772441598482, "grad_norm": 0.8175957307719977, "learning_rate": 2.3325801840206775e-05, "loss": 0.0889, "step": 18070 }, { "epoch": 2.1428910233606073, "grad_norm": 1.0274324968056765, "learning_rate": 2.3323406875294246e-05, "loss": 0.1313, "step": 18071 }, { "epoch": 2.143009605122732, "grad_norm": 0.7463302568746567, "learning_rate": 2.3321011925837843e-05, "loss": 0.106, "step": 18072 }, { "epoch": 2.1431281868848573, "grad_norm": 0.8828530244476475, "learning_rate": 2.331861699185965e-05, "loss": 0.1477, "step": 18073 }, { "epoch": 2.143246768646982, "grad_norm": 0.9532853768012814, "learning_rate": 2.3316222073381746e-05, "loss": 0.1392, "step": 18074 }, { "epoch": 2.1433653504091073, "grad_norm": 1.1508237242233292, "learning_rate": 2.3313827170426218e-05, "loss": 0.1747, "step": 18075 }, { "epoch": 2.143483932171232, "grad_norm": 0.9052430644114066, "learning_rate": 2.3311432283015134e-05, "loss": 0.0929, "step": 18076 }, { "epoch": 2.143602513933357, "grad_norm": 0.8316858898207852, "learning_rate": 2.3309037411170563e-05, "loss": 0.1027, "step": 18077 }, { "epoch": 2.143721095695482, "grad_norm": 0.9304349672916578, "learning_rate": 2.3306642554914604e-05, "loss": 0.1432, "step": 18078 }, { "epoch": 2.143839677457607, "grad_norm": 1.089490464534018, "learning_rate": 2.3304247714269323e-05, "loss": 0.0961, "step": 18079 }, { "epoch": 2.143958259219732, "grad_norm": 0.6502530677779542, "learning_rate": 2.33018528892568e-05, "loss": 0.0824, "step": 18080 }, { "epoch": 2.144076840981857, "grad_norm": 0.8596455476877474, "learning_rate": 2.32994580798991e-05, "loss": 0.1074, "step": 18081 }, { "epoch": 2.144195422743982, "grad_norm": 0.6108617024143107, "learning_rate": 2.329706328621832e-05, "loss": 0.0741, "step": 18082 }, { "epoch": 2.144314004506107, "grad_norm": 0.8077240730508112, "learning_rate": 2.3294668508236524e-05, "loss": 0.1306, "step": 18083 }, { "epoch": 2.144432586268232, "grad_norm": 0.7494475487936177, "learning_rate": 2.32922737459758e-05, "loss": 0.0888, "step": 18084 }, { "epoch": 2.144551168030357, "grad_norm": 0.8099613123334597, "learning_rate": 2.3289878999458205e-05, "loss": 0.093, "step": 18085 }, { "epoch": 2.144669749792482, "grad_norm": 0.7005478689144264, "learning_rate": 2.328748426870584e-05, "loss": 0.12, "step": 18086 }, { "epoch": 2.144788331554607, "grad_norm": 0.810246500005227, "learning_rate": 2.3285089553740774e-05, "loss": 0.0904, "step": 18087 }, { "epoch": 2.144906913316732, "grad_norm": 1.4308581752294731, "learning_rate": 2.3282694854585063e-05, "loss": 0.1587, "step": 18088 }, { "epoch": 2.145025495078857, "grad_norm": 0.6971963964331938, "learning_rate": 2.3280300171260814e-05, "loss": 0.0844, "step": 18089 }, { "epoch": 2.1451440768409817, "grad_norm": 0.5584215659312805, "learning_rate": 2.3277905503790087e-05, "loss": 0.0776, "step": 18090 }, { "epoch": 2.145262658603107, "grad_norm": 0.9591231617530674, "learning_rate": 2.3275510852194954e-05, "loss": 0.1376, "step": 18091 }, { "epoch": 2.1453812403652317, "grad_norm": 0.8188823046807663, "learning_rate": 2.3273116216497498e-05, "loss": 0.1275, "step": 18092 }, { "epoch": 2.145499822127357, "grad_norm": 0.730830718115018, "learning_rate": 2.327072159671979e-05, "loss": 0.103, "step": 18093 }, { "epoch": 2.1456184038894817, "grad_norm": 0.9227901666081288, "learning_rate": 2.3268326992883916e-05, "loss": 0.1177, "step": 18094 }, { "epoch": 2.145736985651607, "grad_norm": 0.6474206183261723, "learning_rate": 2.3265932405011942e-05, "loss": 0.0869, "step": 18095 }, { "epoch": 2.1458555674137316, "grad_norm": 0.912011367935835, "learning_rate": 2.3263537833125933e-05, "loss": 0.1216, "step": 18096 }, { "epoch": 2.145974149175857, "grad_norm": 0.965707705707891, "learning_rate": 2.326114327724799e-05, "loss": 0.0973, "step": 18097 }, { "epoch": 2.1460927309379816, "grad_norm": 1.2442381816799404, "learning_rate": 2.325874873740017e-05, "loss": 0.1152, "step": 18098 }, { "epoch": 2.146211312700107, "grad_norm": 0.9894280903397933, "learning_rate": 2.3256354213604552e-05, "loss": 0.1453, "step": 18099 }, { "epoch": 2.1463298944622315, "grad_norm": 0.7078210763688166, "learning_rate": 2.3253959705883203e-05, "loss": 0.1028, "step": 18100 }, { "epoch": 2.1464484762243567, "grad_norm": 0.8743458849957061, "learning_rate": 2.325156521425821e-05, "loss": 0.119, "step": 18101 }, { "epoch": 2.1465670579864815, "grad_norm": 0.8211973595833604, "learning_rate": 2.3249170738751642e-05, "loss": 0.098, "step": 18102 }, { "epoch": 2.1466856397486067, "grad_norm": 0.5701343810452626, "learning_rate": 2.3246776279385568e-05, "loss": 0.0685, "step": 18103 }, { "epoch": 2.1468042215107315, "grad_norm": 0.49077395329758944, "learning_rate": 2.3244381836182068e-05, "loss": 0.07, "step": 18104 }, { "epoch": 2.1469228032728567, "grad_norm": 0.6856528907769682, "learning_rate": 2.3241987409163222e-05, "loss": 0.0912, "step": 18105 }, { "epoch": 2.1470413850349814, "grad_norm": 0.6731806465364306, "learning_rate": 2.3239592998351092e-05, "loss": 0.0816, "step": 18106 }, { "epoch": 2.1471599667971066, "grad_norm": 0.9041999764862111, "learning_rate": 2.3237198603767747e-05, "loss": 0.1359, "step": 18107 }, { "epoch": 2.1472785485592314, "grad_norm": 0.6715426001344005, "learning_rate": 2.3234804225435282e-05, "loss": 0.0835, "step": 18108 }, { "epoch": 2.1473971303213566, "grad_norm": 0.4019711711116074, "learning_rate": 2.3232409863375756e-05, "loss": 0.0627, "step": 18109 }, { "epoch": 2.1475157120834814, "grad_norm": 0.6717644933305216, "learning_rate": 2.323001551761124e-05, "loss": 0.0908, "step": 18110 }, { "epoch": 2.1476342938456066, "grad_norm": 0.6852354148582213, "learning_rate": 2.3227621188163807e-05, "loss": 0.0975, "step": 18111 }, { "epoch": 2.1477528756077318, "grad_norm": 0.6076907563551447, "learning_rate": 2.3225226875055535e-05, "loss": 0.0937, "step": 18112 }, { "epoch": 2.1478714573698565, "grad_norm": 0.6103847015376641, "learning_rate": 2.32228325783085e-05, "loss": 0.0946, "step": 18113 }, { "epoch": 2.1479900391319813, "grad_norm": 0.6414170256554548, "learning_rate": 2.322043829794477e-05, "loss": 0.0854, "step": 18114 }, { "epoch": 2.1481086208941065, "grad_norm": 0.8497505343088174, "learning_rate": 2.32180440339864e-05, "loss": 0.1215, "step": 18115 }, { "epoch": 2.1482272026562317, "grad_norm": 0.7877573202653888, "learning_rate": 2.3215649786455498e-05, "loss": 0.1275, "step": 18116 }, { "epoch": 2.1483457844183564, "grad_norm": 0.6801568465767223, "learning_rate": 2.3213255555374113e-05, "loss": 0.0793, "step": 18117 }, { "epoch": 2.1484643661804816, "grad_norm": 0.7955775847791441, "learning_rate": 2.321086134076431e-05, "loss": 0.1221, "step": 18118 }, { "epoch": 2.1485829479426064, "grad_norm": 0.64617084693515, "learning_rate": 2.3208467142648185e-05, "loss": 0.0895, "step": 18119 }, { "epoch": 2.1487015297047316, "grad_norm": 0.673220362222063, "learning_rate": 2.320607296104779e-05, "loss": 0.0853, "step": 18120 }, { "epoch": 2.1488201114668564, "grad_norm": 0.7276050932085466, "learning_rate": 2.3203678795985205e-05, "loss": 0.1144, "step": 18121 }, { "epoch": 2.1489386932289816, "grad_norm": 0.7485867798028866, "learning_rate": 2.3201284647482492e-05, "loss": 0.1071, "step": 18122 }, { "epoch": 2.1490572749911063, "grad_norm": 0.988604949364844, "learning_rate": 2.3198890515561733e-05, "loss": 0.1082, "step": 18123 }, { "epoch": 2.1491758567532315, "grad_norm": 0.7639925713735145, "learning_rate": 2.3196496400245e-05, "loss": 0.0803, "step": 18124 }, { "epoch": 2.1492944385153563, "grad_norm": 0.9519986941569366, "learning_rate": 2.3194102301554355e-05, "loss": 0.1442, "step": 18125 }, { "epoch": 2.1494130202774815, "grad_norm": 0.5932752594665959, "learning_rate": 2.3191708219511867e-05, "loss": 0.0761, "step": 18126 }, { "epoch": 2.1495316020396062, "grad_norm": 0.5227746733073987, "learning_rate": 2.3189314154139617e-05, "loss": 0.0616, "step": 18127 }, { "epoch": 2.1496501838017315, "grad_norm": 0.6815486317430457, "learning_rate": 2.3186920105459672e-05, "loss": 0.1018, "step": 18128 }, { "epoch": 2.149768765563856, "grad_norm": 0.6575320457562552, "learning_rate": 2.31845260734941e-05, "loss": 0.0871, "step": 18129 }, { "epoch": 2.1498873473259814, "grad_norm": 0.9138209980568756, "learning_rate": 2.318213205826496e-05, "loss": 0.1271, "step": 18130 }, { "epoch": 2.150005929088106, "grad_norm": 0.8297299890809935, "learning_rate": 2.3179738059794344e-05, "loss": 0.1002, "step": 18131 }, { "epoch": 2.1501245108502314, "grad_norm": 0.8180974031582056, "learning_rate": 2.3177344078104308e-05, "loss": 0.1201, "step": 18132 }, { "epoch": 2.150243092612356, "grad_norm": 0.6370147294690185, "learning_rate": 2.317495011321692e-05, "loss": 0.0972, "step": 18133 }, { "epoch": 2.1503616743744813, "grad_norm": 0.631388197838621, "learning_rate": 2.317255616515426e-05, "loss": 0.0914, "step": 18134 }, { "epoch": 2.150480256136606, "grad_norm": 0.6340892292528569, "learning_rate": 2.3170162233938394e-05, "loss": 0.0831, "step": 18135 }, { "epoch": 2.1505988378987313, "grad_norm": 0.6328260781577894, "learning_rate": 2.3167768319591387e-05, "loss": 0.0809, "step": 18136 }, { "epoch": 2.150717419660856, "grad_norm": 0.8528781529900971, "learning_rate": 2.31653744221353e-05, "loss": 0.0807, "step": 18137 }, { "epoch": 2.1508360014229813, "grad_norm": 0.8794962657783777, "learning_rate": 2.3162980541592223e-05, "loss": 0.1358, "step": 18138 }, { "epoch": 2.150954583185106, "grad_norm": 0.5774211025513339, "learning_rate": 2.3160586677984207e-05, "loss": 0.0684, "step": 18139 }, { "epoch": 2.151073164947231, "grad_norm": 0.6738072930774789, "learning_rate": 2.3158192831333327e-05, "loss": 0.0787, "step": 18140 }, { "epoch": 2.151191746709356, "grad_norm": 0.6485456332558643, "learning_rate": 2.3155799001661648e-05, "loss": 0.0773, "step": 18141 }, { "epoch": 2.151310328471481, "grad_norm": 0.7391262890084076, "learning_rate": 2.315340518899124e-05, "loss": 0.0846, "step": 18142 }, { "epoch": 2.151428910233606, "grad_norm": 0.9779840652770714, "learning_rate": 2.315101139334418e-05, "loss": 0.1284, "step": 18143 }, { "epoch": 2.151547491995731, "grad_norm": 0.7025012195214992, "learning_rate": 2.3148617614742524e-05, "loss": 0.0958, "step": 18144 }, { "epoch": 2.151666073757856, "grad_norm": 0.9971336498004546, "learning_rate": 2.3146223853208334e-05, "loss": 0.1327, "step": 18145 }, { "epoch": 2.151784655519981, "grad_norm": 0.5671534828665069, "learning_rate": 2.3143830108763698e-05, "loss": 0.0798, "step": 18146 }, { "epoch": 2.151903237282106, "grad_norm": 0.8716908413898118, "learning_rate": 2.3141436381430673e-05, "loss": 0.1022, "step": 18147 }, { "epoch": 2.152021819044231, "grad_norm": 0.46130784405367475, "learning_rate": 2.313904267123131e-05, "loss": 0.0727, "step": 18148 }, { "epoch": 2.152140400806356, "grad_norm": 0.6351613759543203, "learning_rate": 2.3136648978187708e-05, "loss": 0.0765, "step": 18149 }, { "epoch": 2.152258982568481, "grad_norm": 0.7888419177928546, "learning_rate": 2.3134255302321915e-05, "loss": 0.1222, "step": 18150 }, { "epoch": 2.152377564330606, "grad_norm": 0.6741672998405454, "learning_rate": 2.3131861643655998e-05, "loss": 0.0783, "step": 18151 }, { "epoch": 2.152496146092731, "grad_norm": 0.8988194486721663, "learning_rate": 2.312946800221202e-05, "loss": 0.1362, "step": 18152 }, { "epoch": 2.1526147278548557, "grad_norm": 1.3386723568240328, "learning_rate": 2.3127074378012058e-05, "loss": 0.1307, "step": 18153 }, { "epoch": 2.152733309616981, "grad_norm": 0.6641771760248938, "learning_rate": 2.3124680771078176e-05, "loss": 0.0939, "step": 18154 }, { "epoch": 2.1528518913791057, "grad_norm": 0.8418614838862601, "learning_rate": 2.312228718143244e-05, "loss": 0.1174, "step": 18155 }, { "epoch": 2.152970473141231, "grad_norm": 1.0587160378128124, "learning_rate": 2.31198936090969e-05, "loss": 0.137, "step": 18156 }, { "epoch": 2.1530890549033557, "grad_norm": 0.7854008753925822, "learning_rate": 2.3117500054093645e-05, "loss": 0.1021, "step": 18157 }, { "epoch": 2.153207636665481, "grad_norm": 0.7781030683517973, "learning_rate": 2.3115106516444738e-05, "loss": 0.123, "step": 18158 }, { "epoch": 2.1533262184276056, "grad_norm": 0.8259606510106414, "learning_rate": 2.311271299617222e-05, "loss": 0.0962, "step": 18159 }, { "epoch": 2.153444800189731, "grad_norm": 0.736209319071738, "learning_rate": 2.311031949329819e-05, "loss": 0.1014, "step": 18160 }, { "epoch": 2.153563381951856, "grad_norm": 0.6972753163739709, "learning_rate": 2.310792600784469e-05, "loss": 0.0962, "step": 18161 }, { "epoch": 2.153681963713981, "grad_norm": 0.7402370264979539, "learning_rate": 2.310553253983379e-05, "loss": 0.1091, "step": 18162 }, { "epoch": 2.1538005454761056, "grad_norm": 0.9732503963958173, "learning_rate": 2.3103139089287558e-05, "loss": 0.1533, "step": 18163 }, { "epoch": 2.1539191272382308, "grad_norm": 0.8138120912286035, "learning_rate": 2.3100745656228056e-05, "loss": 0.0912, "step": 18164 }, { "epoch": 2.154037709000356, "grad_norm": 0.7029214470128695, "learning_rate": 2.309835224067736e-05, "loss": 0.1042, "step": 18165 }, { "epoch": 2.1541562907624807, "grad_norm": 0.7728986354093755, "learning_rate": 2.309595884265752e-05, "loss": 0.1125, "step": 18166 }, { "epoch": 2.154274872524606, "grad_norm": 1.0640671295149697, "learning_rate": 2.3093565462190595e-05, "loss": 0.1431, "step": 18167 }, { "epoch": 2.1543934542867307, "grad_norm": 0.5772411816749196, "learning_rate": 2.3091172099298665e-05, "loss": 0.0656, "step": 18168 }, { "epoch": 2.154512036048856, "grad_norm": 0.6896418390996412, "learning_rate": 2.308877875400379e-05, "loss": 0.077, "step": 18169 }, { "epoch": 2.1546306178109806, "grad_norm": 0.7808587379063265, "learning_rate": 2.3086385426328032e-05, "loss": 0.0943, "step": 18170 }, { "epoch": 2.154749199573106, "grad_norm": 0.8667898192465658, "learning_rate": 2.308399211629345e-05, "loss": 0.1355, "step": 18171 }, { "epoch": 2.1548677813352306, "grad_norm": 0.6953066606399103, "learning_rate": 2.3081598823922108e-05, "loss": 0.1084, "step": 18172 }, { "epoch": 2.154986363097356, "grad_norm": 0.8821050999682643, "learning_rate": 2.307920554923608e-05, "loss": 0.1098, "step": 18173 }, { "epoch": 2.1551049448594806, "grad_norm": 0.6392239423028726, "learning_rate": 2.307681229225741e-05, "loss": 0.0772, "step": 18174 }, { "epoch": 2.1552235266216058, "grad_norm": 0.9246335450249412, "learning_rate": 2.3074419053008185e-05, "loss": 0.1416, "step": 18175 }, { "epoch": 2.1553421083837305, "grad_norm": 0.8330116734809548, "learning_rate": 2.3072025831510452e-05, "loss": 0.1143, "step": 18176 }, { "epoch": 2.1554606901458557, "grad_norm": 0.718101012555311, "learning_rate": 2.306963262778628e-05, "loss": 0.0796, "step": 18177 }, { "epoch": 2.1555792719079805, "grad_norm": 0.7573833062131254, "learning_rate": 2.3067239441857713e-05, "loss": 0.1117, "step": 18178 }, { "epoch": 2.1556978536701057, "grad_norm": 0.8199884217452375, "learning_rate": 2.3064846273746844e-05, "loss": 0.1119, "step": 18179 }, { "epoch": 2.1558164354322304, "grad_norm": 0.8031634785029379, "learning_rate": 2.3062453123475712e-05, "loss": 0.1374, "step": 18180 }, { "epoch": 2.1559350171943557, "grad_norm": 0.8377048822939261, "learning_rate": 2.3060059991066388e-05, "loss": 0.1273, "step": 18181 }, { "epoch": 2.1560535989564804, "grad_norm": 0.6836474477348978, "learning_rate": 2.3057666876540927e-05, "loss": 0.0996, "step": 18182 }, { "epoch": 2.1561721807186056, "grad_norm": 0.5191959010832761, "learning_rate": 2.30552737799214e-05, "loss": 0.0669, "step": 18183 }, { "epoch": 2.1562907624807304, "grad_norm": 0.5220909854671199, "learning_rate": 2.305288070122987e-05, "loss": 0.0853, "step": 18184 }, { "epoch": 2.1564093442428556, "grad_norm": 0.5908015739492222, "learning_rate": 2.3050487640488388e-05, "loss": 0.0842, "step": 18185 }, { "epoch": 2.1565279260049803, "grad_norm": 0.8052188534461072, "learning_rate": 2.304809459771901e-05, "loss": 0.1144, "step": 18186 }, { "epoch": 2.1566465077671055, "grad_norm": 0.6946834553344882, "learning_rate": 2.3045701572943817e-05, "loss": 0.0962, "step": 18187 }, { "epoch": 2.1567650895292303, "grad_norm": 0.4703431196004679, "learning_rate": 2.304330856618486e-05, "loss": 0.0668, "step": 18188 }, { "epoch": 2.1568836712913555, "grad_norm": 0.8822679208091541, "learning_rate": 2.3040915577464183e-05, "loss": 0.1261, "step": 18189 }, { "epoch": 2.1570022530534803, "grad_norm": 0.5484524810777485, "learning_rate": 2.303852260680388e-05, "loss": 0.0804, "step": 18190 }, { "epoch": 2.1571208348156055, "grad_norm": 0.6806465226347443, "learning_rate": 2.3036129654225982e-05, "loss": 0.0848, "step": 18191 }, { "epoch": 2.15723941657773, "grad_norm": 0.7515767610424349, "learning_rate": 2.303373671975257e-05, "loss": 0.107, "step": 18192 }, { "epoch": 2.1573579983398554, "grad_norm": 0.8765387331930589, "learning_rate": 2.303134380340568e-05, "loss": 0.1297, "step": 18193 }, { "epoch": 2.15747658010198, "grad_norm": 0.7120820531707, "learning_rate": 2.30289509052074e-05, "loss": 0.1031, "step": 18194 }, { "epoch": 2.1575951618641054, "grad_norm": 0.8313599065207818, "learning_rate": 2.3026558025179775e-05, "loss": 0.0992, "step": 18195 }, { "epoch": 2.15771374362623, "grad_norm": 1.0520444854702482, "learning_rate": 2.3024165163344862e-05, "loss": 0.1458, "step": 18196 }, { "epoch": 2.1578323253883553, "grad_norm": 0.6369963534844891, "learning_rate": 2.3021772319724716e-05, "loss": 0.0992, "step": 18197 }, { "epoch": 2.15795090715048, "grad_norm": 1.2679832026652074, "learning_rate": 2.3019379494341412e-05, "loss": 0.1128, "step": 18198 }, { "epoch": 2.1580694889126053, "grad_norm": 0.87661997533867, "learning_rate": 2.3016986687217002e-05, "loss": 0.0942, "step": 18199 }, { "epoch": 2.15818807067473, "grad_norm": 0.9937268469813308, "learning_rate": 2.3014593898373534e-05, "loss": 0.1095, "step": 18200 }, { "epoch": 2.1583066524368553, "grad_norm": 0.708482344464271, "learning_rate": 2.301220112783308e-05, "loss": 0.0863, "step": 18201 }, { "epoch": 2.15842523419898, "grad_norm": 0.949234488936392, "learning_rate": 2.3009808375617692e-05, "loss": 0.1126, "step": 18202 }, { "epoch": 2.1585438159611052, "grad_norm": 0.5821814492676864, "learning_rate": 2.3007415641749437e-05, "loss": 0.0796, "step": 18203 }, { "epoch": 2.15866239772323, "grad_norm": 0.5739562806168383, "learning_rate": 2.3005022926250353e-05, "loss": 0.0838, "step": 18204 }, { "epoch": 2.158780979485355, "grad_norm": 0.6339446001154138, "learning_rate": 2.3002630229142525e-05, "loss": 0.0876, "step": 18205 }, { "epoch": 2.15889956124748, "grad_norm": 0.5748548478942015, "learning_rate": 2.3000237550447995e-05, "loss": 0.0659, "step": 18206 }, { "epoch": 2.159018143009605, "grad_norm": 0.7259764121124648, "learning_rate": 2.299784489018882e-05, "loss": 0.1092, "step": 18207 }, { "epoch": 2.15913672477173, "grad_norm": 0.7915851391422019, "learning_rate": 2.2995452248387054e-05, "loss": 0.1002, "step": 18208 }, { "epoch": 2.159255306533855, "grad_norm": 0.6583993269000132, "learning_rate": 2.2993059625064768e-05, "loss": 0.0858, "step": 18209 }, { "epoch": 2.15937388829598, "grad_norm": 0.5447839740172392, "learning_rate": 2.2990667020244013e-05, "loss": 0.0768, "step": 18210 }, { "epoch": 2.159492470058105, "grad_norm": 0.8844225903989047, "learning_rate": 2.2988274433946838e-05, "loss": 0.0922, "step": 18211 }, { "epoch": 2.15961105182023, "grad_norm": 0.9061308563486061, "learning_rate": 2.2985881866195307e-05, "loss": 0.1221, "step": 18212 }, { "epoch": 2.159729633582355, "grad_norm": 0.5727541154692406, "learning_rate": 2.2983489317011473e-05, "loss": 0.0721, "step": 18213 }, { "epoch": 2.1598482153444802, "grad_norm": 0.779447172881068, "learning_rate": 2.29810967864174e-05, "loss": 0.1152, "step": 18214 }, { "epoch": 2.159966797106605, "grad_norm": 0.7560684136078012, "learning_rate": 2.297870427443514e-05, "loss": 0.1104, "step": 18215 }, { "epoch": 2.1600853788687298, "grad_norm": 0.8064054902796883, "learning_rate": 2.2976311781086735e-05, "loss": 0.1076, "step": 18216 }, { "epoch": 2.160203960630855, "grad_norm": 0.8791936436689805, "learning_rate": 2.297391930639427e-05, "loss": 0.0933, "step": 18217 }, { "epoch": 2.16032254239298, "grad_norm": 0.9778196724452377, "learning_rate": 2.297152685037978e-05, "loss": 0.1386, "step": 18218 }, { "epoch": 2.160441124155105, "grad_norm": 1.1474200461570863, "learning_rate": 2.2969134413065313e-05, "loss": 0.1442, "step": 18219 }, { "epoch": 2.16055970591723, "grad_norm": 0.6206208781347465, "learning_rate": 2.296674199447295e-05, "loss": 0.0854, "step": 18220 }, { "epoch": 2.160678287679355, "grad_norm": 0.7846071374243185, "learning_rate": 2.2964349594624726e-05, "loss": 0.0843, "step": 18221 }, { "epoch": 2.16079686944148, "grad_norm": 0.5699595273521295, "learning_rate": 2.296195721354271e-05, "loss": 0.0751, "step": 18222 }, { "epoch": 2.160915451203605, "grad_norm": 2.6637669414925957, "learning_rate": 2.2959564851248936e-05, "loss": 0.1343, "step": 18223 }, { "epoch": 2.16103403296573, "grad_norm": 0.5510407334097618, "learning_rate": 2.2957172507765482e-05, "loss": 0.0759, "step": 18224 }, { "epoch": 2.161152614727855, "grad_norm": 1.104142927478328, "learning_rate": 2.2954780183114392e-05, "loss": 0.1436, "step": 18225 }, { "epoch": 2.16127119648998, "grad_norm": 1.1549746325924886, "learning_rate": 2.2952387877317724e-05, "loss": 0.104, "step": 18226 }, { "epoch": 2.1613897782521048, "grad_norm": 0.9521459397836233, "learning_rate": 2.2949995590397518e-05, "loss": 0.1217, "step": 18227 }, { "epoch": 2.16150836001423, "grad_norm": 1.1029606918239991, "learning_rate": 2.2947603322375846e-05, "loss": 0.1699, "step": 18228 }, { "epoch": 2.1616269417763547, "grad_norm": 0.7516645562852932, "learning_rate": 2.294521107327476e-05, "loss": 0.1009, "step": 18229 }, { "epoch": 2.16174552353848, "grad_norm": 0.5804239178780992, "learning_rate": 2.2942818843116297e-05, "loss": 0.0647, "step": 18230 }, { "epoch": 2.1618641053006047, "grad_norm": 0.6407003116576069, "learning_rate": 2.2940426631922523e-05, "loss": 0.1037, "step": 18231 }, { "epoch": 2.16198268706273, "grad_norm": 0.6636363062692989, "learning_rate": 2.293803443971549e-05, "loss": 0.0875, "step": 18232 }, { "epoch": 2.1621012688248546, "grad_norm": 0.7118411591031854, "learning_rate": 2.2935642266517256e-05, "loss": 0.1005, "step": 18233 }, { "epoch": 2.16221985058698, "grad_norm": 0.8723383342226, "learning_rate": 2.293325011234986e-05, "loss": 0.1181, "step": 18234 }, { "epoch": 2.1623384323491046, "grad_norm": 0.9404470103079484, "learning_rate": 2.2930857977235372e-05, "loss": 0.1239, "step": 18235 }, { "epoch": 2.16245701411123, "grad_norm": 0.7508830457331587, "learning_rate": 2.292846586119584e-05, "loss": 0.0903, "step": 18236 }, { "epoch": 2.1625755958733546, "grad_norm": 0.7981798034775671, "learning_rate": 2.292607376425331e-05, "loss": 0.1048, "step": 18237 }, { "epoch": 2.1626941776354798, "grad_norm": 0.6357129292179071, "learning_rate": 2.2923681686429825e-05, "loss": 0.1041, "step": 18238 }, { "epoch": 2.1628127593976045, "grad_norm": 0.9339322282307543, "learning_rate": 2.292128962774746e-05, "loss": 0.1286, "step": 18239 }, { "epoch": 2.1629313411597297, "grad_norm": 0.5900079585744792, "learning_rate": 2.291889758822825e-05, "loss": 0.0777, "step": 18240 }, { "epoch": 2.1630499229218545, "grad_norm": 0.6057186965395314, "learning_rate": 2.291650556789426e-05, "loss": 0.0851, "step": 18241 }, { "epoch": 2.1631685046839797, "grad_norm": 1.0299354487485834, "learning_rate": 2.291411356676752e-05, "loss": 0.1511, "step": 18242 }, { "epoch": 2.1632870864461045, "grad_norm": 0.6098408007362349, "learning_rate": 2.2911721584870108e-05, "loss": 0.0794, "step": 18243 }, { "epoch": 2.1634056682082297, "grad_norm": 0.7852178840167741, "learning_rate": 2.290932962222406e-05, "loss": 0.1009, "step": 18244 }, { "epoch": 2.1635242499703544, "grad_norm": 0.6059217794429567, "learning_rate": 2.290693767885142e-05, "loss": 0.0812, "step": 18245 }, { "epoch": 2.1636428317324796, "grad_norm": 0.501371366623731, "learning_rate": 2.2904545754774258e-05, "loss": 0.0708, "step": 18246 }, { "epoch": 2.1637614134946044, "grad_norm": 1.0136155232677024, "learning_rate": 2.2902153850014616e-05, "loss": 0.1086, "step": 18247 }, { "epoch": 2.1638799952567296, "grad_norm": 1.0416178785885206, "learning_rate": 2.289976196459454e-05, "loss": 0.1484, "step": 18248 }, { "epoch": 2.1639985770188543, "grad_norm": 0.6014384038024505, "learning_rate": 2.2897370098536075e-05, "loss": 0.0988, "step": 18249 }, { "epoch": 2.1641171587809795, "grad_norm": 0.6771301118220179, "learning_rate": 2.2894978251861286e-05, "loss": 0.1024, "step": 18250 }, { "epoch": 2.1642357405431043, "grad_norm": 0.6364682743103897, "learning_rate": 2.2892586424592216e-05, "loss": 0.1135, "step": 18251 }, { "epoch": 2.1643543223052295, "grad_norm": 0.7864691796077026, "learning_rate": 2.2890194616750916e-05, "loss": 0.1307, "step": 18252 }, { "epoch": 2.1644729040673543, "grad_norm": 0.7929437817317556, "learning_rate": 2.288780282835943e-05, "loss": 0.1184, "step": 18253 }, { "epoch": 2.1645914858294795, "grad_norm": 0.7035995773669962, "learning_rate": 2.2885411059439815e-05, "loss": 0.1095, "step": 18254 }, { "epoch": 2.1647100675916042, "grad_norm": 0.9533224956704623, "learning_rate": 2.288301931001412e-05, "loss": 0.1338, "step": 18255 }, { "epoch": 2.1648286493537294, "grad_norm": 0.6429047166376552, "learning_rate": 2.288062758010439e-05, "loss": 0.0707, "step": 18256 }, { "epoch": 2.164947231115854, "grad_norm": 0.9607835011856475, "learning_rate": 2.2878235869732668e-05, "loss": 0.1284, "step": 18257 }, { "epoch": 2.1650658128779794, "grad_norm": 0.6514257882479402, "learning_rate": 2.2875844178921015e-05, "loss": 0.0776, "step": 18258 }, { "epoch": 2.165184394640104, "grad_norm": 0.473091441549393, "learning_rate": 2.287345250769148e-05, "loss": 0.0642, "step": 18259 }, { "epoch": 2.1653029764022294, "grad_norm": 0.598712124473958, "learning_rate": 2.287106085606609e-05, "loss": 0.0756, "step": 18260 }, { "epoch": 2.165421558164354, "grad_norm": 0.48076209454749275, "learning_rate": 2.286866922406692e-05, "loss": 0.0618, "step": 18261 }, { "epoch": 2.1655401399264793, "grad_norm": 0.6597757168699985, "learning_rate": 2.2866277611716e-05, "loss": 0.0909, "step": 18262 }, { "epoch": 2.1656587216886045, "grad_norm": 0.9629040082152795, "learning_rate": 2.286388601903539e-05, "loss": 0.1445, "step": 18263 }, { "epoch": 2.1657773034507293, "grad_norm": 0.9807162126865389, "learning_rate": 2.2861494446047118e-05, "loss": 0.133, "step": 18264 }, { "epoch": 2.165895885212854, "grad_norm": 0.7777359188908318, "learning_rate": 2.285910289277326e-05, "loss": 0.093, "step": 18265 }, { "epoch": 2.1660144669749792, "grad_norm": 1.0218242770480346, "learning_rate": 2.2856711359235846e-05, "loss": 0.1566, "step": 18266 }, { "epoch": 2.1661330487371044, "grad_norm": 0.8204177483641968, "learning_rate": 2.285431984545692e-05, "loss": 0.1263, "step": 18267 }, { "epoch": 2.166251630499229, "grad_norm": 0.622232035290334, "learning_rate": 2.285192835145853e-05, "loss": 0.0781, "step": 18268 }, { "epoch": 2.1663702122613544, "grad_norm": 0.641021750534238, "learning_rate": 2.2849536877262737e-05, "loss": 0.0777, "step": 18269 }, { "epoch": 2.166488794023479, "grad_norm": 0.6968676421006453, "learning_rate": 2.284714542289157e-05, "loss": 0.0744, "step": 18270 }, { "epoch": 2.1666073757856044, "grad_norm": 0.7141336918824486, "learning_rate": 2.2844753988367086e-05, "loss": 0.1059, "step": 18271 }, { "epoch": 2.166725957547729, "grad_norm": 0.8536177265001599, "learning_rate": 2.284236257371132e-05, "loss": 0.0962, "step": 18272 }, { "epoch": 2.1668445393098543, "grad_norm": 0.5859319508345553, "learning_rate": 2.283997117894633e-05, "loss": 0.0915, "step": 18273 }, { "epoch": 2.166963121071979, "grad_norm": 0.82883504866387, "learning_rate": 2.2837579804094163e-05, "loss": 0.1011, "step": 18274 }, { "epoch": 2.1670817028341043, "grad_norm": 0.6979317134617556, "learning_rate": 2.2835188449176844e-05, "loss": 0.0818, "step": 18275 }, { "epoch": 2.167200284596229, "grad_norm": 0.6957083314978458, "learning_rate": 2.2832797114216445e-05, "loss": 0.0683, "step": 18276 }, { "epoch": 2.1673188663583542, "grad_norm": 0.7200992650012682, "learning_rate": 2.2830405799235e-05, "loss": 0.101, "step": 18277 }, { "epoch": 2.167437448120479, "grad_norm": 1.193263642257512, "learning_rate": 2.2828014504254554e-05, "loss": 0.1607, "step": 18278 }, { "epoch": 2.167556029882604, "grad_norm": 0.7075843010557745, "learning_rate": 2.2825623229297135e-05, "loss": 0.1239, "step": 18279 }, { "epoch": 2.167674611644729, "grad_norm": 0.4375121011368977, "learning_rate": 2.282323197438482e-05, "loss": 0.0534, "step": 18280 }, { "epoch": 2.167793193406854, "grad_norm": 0.8140769040455942, "learning_rate": 2.2820840739539624e-05, "loss": 0.1029, "step": 18281 }, { "epoch": 2.167911775168979, "grad_norm": 0.6677092388348671, "learning_rate": 2.2818449524783615e-05, "loss": 0.0822, "step": 18282 }, { "epoch": 2.168030356931104, "grad_norm": 0.7395631073800455, "learning_rate": 2.2816058330138815e-05, "loss": 0.0986, "step": 18283 }, { "epoch": 2.168148938693229, "grad_norm": 0.7817260724117859, "learning_rate": 2.281366715562729e-05, "loss": 0.121, "step": 18284 }, { "epoch": 2.168267520455354, "grad_norm": 1.1057630530401805, "learning_rate": 2.2811276001271072e-05, "loss": 0.132, "step": 18285 }, { "epoch": 2.168386102217479, "grad_norm": 0.9439367959186609, "learning_rate": 2.2808884867092206e-05, "loss": 0.133, "step": 18286 }, { "epoch": 2.168504683979604, "grad_norm": 0.9664546902477019, "learning_rate": 2.2806493753112725e-05, "loss": 0.1055, "step": 18287 }, { "epoch": 2.168623265741729, "grad_norm": 0.6744569868047293, "learning_rate": 2.280410265935469e-05, "loss": 0.081, "step": 18288 }, { "epoch": 2.168741847503854, "grad_norm": 0.9152277351924556, "learning_rate": 2.2801711585840135e-05, "loss": 0.1388, "step": 18289 }, { "epoch": 2.1688604292659788, "grad_norm": 0.9080784985736691, "learning_rate": 2.2799320532591097e-05, "loss": 0.1149, "step": 18290 }, { "epoch": 2.168979011028104, "grad_norm": 0.8869363878699742, "learning_rate": 2.279692949962963e-05, "loss": 0.128, "step": 18291 }, { "epoch": 2.1690975927902287, "grad_norm": 1.1724835846878408, "learning_rate": 2.279453848697777e-05, "loss": 0.117, "step": 18292 }, { "epoch": 2.169216174552354, "grad_norm": 0.5833403144601471, "learning_rate": 2.2792147494657565e-05, "loss": 0.078, "step": 18293 }, { "epoch": 2.1693347563144787, "grad_norm": 0.760803300490983, "learning_rate": 2.278975652269104e-05, "loss": 0.1193, "step": 18294 }, { "epoch": 2.169453338076604, "grad_norm": 0.5947458443103202, "learning_rate": 2.2787365571100264e-05, "loss": 0.0744, "step": 18295 }, { "epoch": 2.1695719198387287, "grad_norm": 0.8576205515718955, "learning_rate": 2.278497463990726e-05, "loss": 0.0934, "step": 18296 }, { "epoch": 2.169690501600854, "grad_norm": 0.6336413528338436, "learning_rate": 2.2782583729134077e-05, "loss": 0.0885, "step": 18297 }, { "epoch": 2.1698090833629786, "grad_norm": 0.6694818081251378, "learning_rate": 2.2780192838802742e-05, "loss": 0.0805, "step": 18298 }, { "epoch": 2.169927665125104, "grad_norm": 1.0330677456949513, "learning_rate": 2.2777801968935317e-05, "loss": 0.1083, "step": 18299 }, { "epoch": 2.1700462468872286, "grad_norm": 0.8102788226463821, "learning_rate": 2.2775411119553827e-05, "loss": 0.1148, "step": 18300 }, { "epoch": 2.170164828649354, "grad_norm": 0.9252918618323722, "learning_rate": 2.2773020290680323e-05, "loss": 0.1258, "step": 18301 }, { "epoch": 2.1702834104114785, "grad_norm": 0.5699441198542027, "learning_rate": 2.277062948233683e-05, "loss": 0.0903, "step": 18302 }, { "epoch": 2.1704019921736037, "grad_norm": 0.7166337252277443, "learning_rate": 2.276823869454541e-05, "loss": 0.1102, "step": 18303 }, { "epoch": 2.1705205739357285, "grad_norm": 0.753154963573039, "learning_rate": 2.2765847927328096e-05, "loss": 0.1249, "step": 18304 }, { "epoch": 2.1706391556978537, "grad_norm": 0.6752992691138443, "learning_rate": 2.2763457180706912e-05, "loss": 0.0984, "step": 18305 }, { "epoch": 2.1707577374599785, "grad_norm": 0.8514397172089723, "learning_rate": 2.276106645470392e-05, "loss": 0.1133, "step": 18306 }, { "epoch": 2.1708763192221037, "grad_norm": 0.722478801393599, "learning_rate": 2.2758675749341152e-05, "loss": 0.087, "step": 18307 }, { "epoch": 2.1709949009842284, "grad_norm": 0.7176812619953669, "learning_rate": 2.275628506464064e-05, "loss": 0.1044, "step": 18308 }, { "epoch": 2.1711134827463536, "grad_norm": 0.8417819659851523, "learning_rate": 2.275389440062442e-05, "loss": 0.1163, "step": 18309 }, { "epoch": 2.1712320645084784, "grad_norm": 0.8737524081600735, "learning_rate": 2.2751503757314552e-05, "loss": 0.1173, "step": 18310 }, { "epoch": 2.1713506462706036, "grad_norm": 0.7922417472269576, "learning_rate": 2.2749113134733056e-05, "loss": 0.1173, "step": 18311 }, { "epoch": 2.171469228032729, "grad_norm": 0.9723759271753204, "learning_rate": 2.274672253290198e-05, "loss": 0.1004, "step": 18312 }, { "epoch": 2.1715878097948536, "grad_norm": 0.8434536425209765, "learning_rate": 2.274433195184335e-05, "loss": 0.1049, "step": 18313 }, { "epoch": 2.1717063915569783, "grad_norm": 0.5518200635381321, "learning_rate": 2.2741941391579224e-05, "loss": 0.078, "step": 18314 }, { "epoch": 2.1718249733191035, "grad_norm": 0.8200728928633869, "learning_rate": 2.273955085213163e-05, "loss": 0.0899, "step": 18315 }, { "epoch": 2.1719435550812287, "grad_norm": 0.5701326390560173, "learning_rate": 2.2737160333522595e-05, "loss": 0.0655, "step": 18316 }, { "epoch": 2.1720621368433535, "grad_norm": 0.6631931948465511, "learning_rate": 2.2734769835774175e-05, "loss": 0.098, "step": 18317 }, { "epoch": 2.1721807186054787, "grad_norm": 0.8217616601624214, "learning_rate": 2.27323793589084e-05, "loss": 0.1081, "step": 18318 }, { "epoch": 2.1722993003676034, "grad_norm": 0.9520943155516961, "learning_rate": 2.27299889029473e-05, "loss": 0.1415, "step": 18319 }, { "epoch": 2.1724178821297286, "grad_norm": 0.6464089155509882, "learning_rate": 2.2727598467912927e-05, "loss": 0.0945, "step": 18320 }, { "epoch": 2.1725364638918534, "grad_norm": 0.8745565880353616, "learning_rate": 2.27252080538273e-05, "loss": 0.1164, "step": 18321 }, { "epoch": 2.1726550456539786, "grad_norm": 0.7824845950562695, "learning_rate": 2.2722817660712474e-05, "loss": 0.1106, "step": 18322 }, { "epoch": 2.1727736274161034, "grad_norm": 0.7511035302105583, "learning_rate": 2.2720427288590475e-05, "loss": 0.1129, "step": 18323 }, { "epoch": 2.1728922091782286, "grad_norm": 1.0341494843139387, "learning_rate": 2.271803693748333e-05, "loss": 0.1049, "step": 18324 }, { "epoch": 2.1730107909403533, "grad_norm": 0.4129142072829262, "learning_rate": 2.27156466074131e-05, "loss": 0.0539, "step": 18325 }, { "epoch": 2.1731293727024785, "grad_norm": 0.9221415722164602, "learning_rate": 2.2713256298401804e-05, "loss": 0.1393, "step": 18326 }, { "epoch": 2.1732479544646033, "grad_norm": 1.0565010273792987, "learning_rate": 2.2710866010471485e-05, "loss": 0.1546, "step": 18327 }, { "epoch": 2.1733665362267285, "grad_norm": 0.5626137480955598, "learning_rate": 2.270847574364416e-05, "loss": 0.0796, "step": 18328 }, { "epoch": 2.1734851179888532, "grad_norm": 0.7049042079273979, "learning_rate": 2.270608549794189e-05, "loss": 0.0953, "step": 18329 }, { "epoch": 2.1736036997509784, "grad_norm": 0.8316444250734987, "learning_rate": 2.2703695273386694e-05, "loss": 0.1203, "step": 18330 }, { "epoch": 2.173722281513103, "grad_norm": 0.8819118556347035, "learning_rate": 2.2701305070000608e-05, "loss": 0.1049, "step": 18331 }, { "epoch": 2.1738408632752284, "grad_norm": 0.8606751771318868, "learning_rate": 2.2698914887805677e-05, "loss": 0.1334, "step": 18332 }, { "epoch": 2.173959445037353, "grad_norm": 1.0430466488178458, "learning_rate": 2.269652472682393e-05, "loss": 0.134, "step": 18333 }, { "epoch": 2.1740780267994784, "grad_norm": 1.1507572723655977, "learning_rate": 2.26941345870774e-05, "loss": 0.1388, "step": 18334 }, { "epoch": 2.174196608561603, "grad_norm": 0.7096575564288057, "learning_rate": 2.2691744468588112e-05, "loss": 0.0978, "step": 18335 }, { "epoch": 2.1743151903237283, "grad_norm": 0.6959499286390484, "learning_rate": 2.268935437137812e-05, "loss": 0.1017, "step": 18336 }, { "epoch": 2.174433772085853, "grad_norm": 0.6089125546184191, "learning_rate": 2.268696429546945e-05, "loss": 0.0829, "step": 18337 }, { "epoch": 2.1745523538479783, "grad_norm": 0.7151441485909925, "learning_rate": 2.2684574240884127e-05, "loss": 0.088, "step": 18338 }, { "epoch": 2.174670935610103, "grad_norm": 3.003834054967566, "learning_rate": 2.268218420764418e-05, "loss": 0.1092, "step": 18339 }, { "epoch": 2.1747895173722283, "grad_norm": 0.7515848285662812, "learning_rate": 2.267979419577167e-05, "loss": 0.1215, "step": 18340 }, { "epoch": 2.174908099134353, "grad_norm": 0.625468584565516, "learning_rate": 2.26774042052886e-05, "loss": 0.1052, "step": 18341 }, { "epoch": 2.175026680896478, "grad_norm": 0.9036370395316549, "learning_rate": 2.2675014236217025e-05, "loss": 0.1339, "step": 18342 }, { "epoch": 2.175145262658603, "grad_norm": 0.7140695346848739, "learning_rate": 2.2672624288578954e-05, "loss": 0.0917, "step": 18343 }, { "epoch": 2.175263844420728, "grad_norm": 0.7588220458319599, "learning_rate": 2.267023436239644e-05, "loss": 0.0932, "step": 18344 }, { "epoch": 2.175382426182853, "grad_norm": 0.624413949780555, "learning_rate": 2.2667844457691516e-05, "loss": 0.0846, "step": 18345 }, { "epoch": 2.175501007944978, "grad_norm": 0.49367819901287535, "learning_rate": 2.266545457448619e-05, "loss": 0.0709, "step": 18346 }, { "epoch": 2.175619589707103, "grad_norm": 0.8211234904676614, "learning_rate": 2.2663064712802523e-05, "loss": 0.1002, "step": 18347 }, { "epoch": 2.175738171469228, "grad_norm": 0.679032546004311, "learning_rate": 2.2660674872662535e-05, "loss": 0.0969, "step": 18348 }, { "epoch": 2.175856753231353, "grad_norm": 0.8518795654572343, "learning_rate": 2.265828505408825e-05, "loss": 0.1209, "step": 18349 }, { "epoch": 2.175975334993478, "grad_norm": 0.5803799459416905, "learning_rate": 2.2655895257101704e-05, "loss": 0.0774, "step": 18350 }, { "epoch": 2.176093916755603, "grad_norm": 0.5721590550082291, "learning_rate": 2.265350548172493e-05, "loss": 0.0652, "step": 18351 }, { "epoch": 2.176212498517728, "grad_norm": 0.6892649532482126, "learning_rate": 2.265111572797996e-05, "loss": 0.0915, "step": 18352 }, { "epoch": 2.176331080279853, "grad_norm": 0.9691564054566336, "learning_rate": 2.2648725995888824e-05, "loss": 0.1115, "step": 18353 }, { "epoch": 2.176449662041978, "grad_norm": 0.7303810162106675, "learning_rate": 2.2646336285473544e-05, "loss": 0.0842, "step": 18354 }, { "epoch": 2.1765682438041027, "grad_norm": 0.6674254352084464, "learning_rate": 2.2643946596756166e-05, "loss": 0.0918, "step": 18355 }, { "epoch": 2.176686825566228, "grad_norm": 0.7818826798810806, "learning_rate": 2.264155692975871e-05, "loss": 0.096, "step": 18356 }, { "epoch": 2.1768054073283527, "grad_norm": 0.8140951146278154, "learning_rate": 2.263916728450321e-05, "loss": 0.1164, "step": 18357 }, { "epoch": 2.176923989090478, "grad_norm": 0.61765318242083, "learning_rate": 2.2636777661011678e-05, "loss": 0.0934, "step": 18358 }, { "epoch": 2.1770425708526027, "grad_norm": 0.7568926575318811, "learning_rate": 2.2634388059306172e-05, "loss": 0.1169, "step": 18359 }, { "epoch": 2.177161152614728, "grad_norm": 0.9143073318977126, "learning_rate": 2.26319984794087e-05, "loss": 0.1456, "step": 18360 }, { "epoch": 2.177279734376853, "grad_norm": 0.7416939863210084, "learning_rate": 2.2629608921341297e-05, "loss": 0.0873, "step": 18361 }, { "epoch": 2.177398316138978, "grad_norm": 0.864125560380093, "learning_rate": 2.2627219385125994e-05, "loss": 0.1145, "step": 18362 }, { "epoch": 2.1775168979011026, "grad_norm": 0.8401173559800118, "learning_rate": 2.2624829870784825e-05, "loss": 0.1287, "step": 18363 }, { "epoch": 2.177635479663228, "grad_norm": 0.5757634253735214, "learning_rate": 2.2622440378339814e-05, "loss": 0.0707, "step": 18364 }, { "epoch": 2.177754061425353, "grad_norm": 0.7346383461819286, "learning_rate": 2.262005090781297e-05, "loss": 0.1124, "step": 18365 }, { "epoch": 2.1778726431874778, "grad_norm": 0.8092716789497358, "learning_rate": 2.261766145922635e-05, "loss": 0.1051, "step": 18366 }, { "epoch": 2.177991224949603, "grad_norm": 0.5883538777524814, "learning_rate": 2.2615272032601976e-05, "loss": 0.0863, "step": 18367 }, { "epoch": 2.1781098067117277, "grad_norm": 0.796243580203159, "learning_rate": 2.261288262796186e-05, "loss": 0.1114, "step": 18368 }, { "epoch": 2.178228388473853, "grad_norm": 0.866030785134124, "learning_rate": 2.261049324532804e-05, "loss": 0.1257, "step": 18369 }, { "epoch": 2.1783469702359777, "grad_norm": 0.632412797306405, "learning_rate": 2.260810388472254e-05, "loss": 0.0979, "step": 18370 }, { "epoch": 2.178465551998103, "grad_norm": 0.8436535197050752, "learning_rate": 2.2605714546167398e-05, "loss": 0.1001, "step": 18371 }, { "epoch": 2.1785841337602276, "grad_norm": 0.6733828937384795, "learning_rate": 2.260332522968463e-05, "loss": 0.1044, "step": 18372 }, { "epoch": 2.178702715522353, "grad_norm": 0.7718749800912684, "learning_rate": 2.2600935935296252e-05, "loss": 0.0969, "step": 18373 }, { "epoch": 2.1788212972844776, "grad_norm": 0.7443402946307769, "learning_rate": 2.2598546663024316e-05, "loss": 0.1064, "step": 18374 }, { "epoch": 2.178939879046603, "grad_norm": 0.44122894960372244, "learning_rate": 2.2596157412890833e-05, "loss": 0.0707, "step": 18375 }, { "epoch": 2.1790584608087276, "grad_norm": 0.6022952125115137, "learning_rate": 2.2593768184917826e-05, "loss": 0.0748, "step": 18376 }, { "epoch": 2.1791770425708528, "grad_norm": 0.8551091655642724, "learning_rate": 2.2591378979127327e-05, "loss": 0.137, "step": 18377 }, { "epoch": 2.1792956243329775, "grad_norm": 0.6321826786671534, "learning_rate": 2.2588989795541364e-05, "loss": 0.0676, "step": 18378 }, { "epoch": 2.1794142060951027, "grad_norm": 0.9418576956569064, "learning_rate": 2.2586600634181955e-05, "loss": 0.1291, "step": 18379 }, { "epoch": 2.1795327878572275, "grad_norm": 0.6218494200558723, "learning_rate": 2.2584211495071127e-05, "loss": 0.0894, "step": 18380 }, { "epoch": 2.1796513696193527, "grad_norm": 1.6103034691789562, "learning_rate": 2.258182237823091e-05, "loss": 0.1591, "step": 18381 }, { "epoch": 2.1797699513814774, "grad_norm": 0.717856525605982, "learning_rate": 2.2579433283683328e-05, "loss": 0.1064, "step": 18382 }, { "epoch": 2.1798885331436026, "grad_norm": 0.5781432021685317, "learning_rate": 2.25770442114504e-05, "loss": 0.0887, "step": 18383 }, { "epoch": 2.1800071149057274, "grad_norm": 0.6810072582568776, "learning_rate": 2.2574655161554146e-05, "loss": 0.0829, "step": 18384 }, { "epoch": 2.1801256966678526, "grad_norm": 0.5796746374307297, "learning_rate": 2.2572266134016605e-05, "loss": 0.0748, "step": 18385 }, { "epoch": 2.1802442784299774, "grad_norm": 0.7252697066664271, "learning_rate": 2.2569877128859796e-05, "loss": 0.0837, "step": 18386 }, { "epoch": 2.1803628601921026, "grad_norm": 0.6936429972178799, "learning_rate": 2.256748814610574e-05, "loss": 0.0926, "step": 18387 }, { "epoch": 2.1804814419542273, "grad_norm": 0.6408206579981919, "learning_rate": 2.256509918577645e-05, "loss": 0.0845, "step": 18388 }, { "epoch": 2.1806000237163525, "grad_norm": 0.8111486459116686, "learning_rate": 2.2562710247893967e-05, "loss": 0.1062, "step": 18389 }, { "epoch": 2.1807186054784773, "grad_norm": 0.8114353587718467, "learning_rate": 2.2560321332480302e-05, "loss": 0.103, "step": 18390 }, { "epoch": 2.1808371872406025, "grad_norm": 1.0135205173648256, "learning_rate": 2.2557932439557482e-05, "loss": 0.1337, "step": 18391 }, { "epoch": 2.1809557690027273, "grad_norm": 0.6727239415536918, "learning_rate": 2.2555543569147535e-05, "loss": 0.1008, "step": 18392 }, { "epoch": 2.1810743507648525, "grad_norm": 0.6940193055648642, "learning_rate": 2.255315472127248e-05, "loss": 0.0998, "step": 18393 }, { "epoch": 2.181192932526977, "grad_norm": 0.7916506435322717, "learning_rate": 2.255076589595434e-05, "loss": 0.121, "step": 18394 }, { "epoch": 2.1813115142891024, "grad_norm": 0.8581349122837684, "learning_rate": 2.254837709321512e-05, "loss": 0.1223, "step": 18395 }, { "epoch": 2.181430096051227, "grad_norm": 0.5744465272118806, "learning_rate": 2.2545988313076873e-05, "loss": 0.0782, "step": 18396 }, { "epoch": 2.1815486778133524, "grad_norm": 0.6944368069181848, "learning_rate": 2.2543599555561605e-05, "loss": 0.0901, "step": 18397 }, { "epoch": 2.181667259575477, "grad_norm": 0.8708443380600385, "learning_rate": 2.2541210820691328e-05, "loss": 0.1155, "step": 18398 }, { "epoch": 2.1817858413376023, "grad_norm": 0.5016240411038565, "learning_rate": 2.2538822108488073e-05, "loss": 0.0708, "step": 18399 }, { "epoch": 2.181904423099727, "grad_norm": 0.9561088761444287, "learning_rate": 2.2536433418973856e-05, "loss": 0.1169, "step": 18400 }, { "epoch": 2.1820230048618523, "grad_norm": 1.156739113795515, "learning_rate": 2.2534044752170714e-05, "loss": 0.1472, "step": 18401 }, { "epoch": 2.182141586623977, "grad_norm": 0.5885252429001123, "learning_rate": 2.253165610810064e-05, "loss": 0.0731, "step": 18402 }, { "epoch": 2.1822601683861023, "grad_norm": 0.8219878705210989, "learning_rate": 2.2529267486785682e-05, "loss": 0.1269, "step": 18403 }, { "epoch": 2.182378750148227, "grad_norm": 0.6607280206530093, "learning_rate": 2.252687888824785e-05, "loss": 0.092, "step": 18404 }, { "epoch": 2.1824973319103522, "grad_norm": 0.958121881826483, "learning_rate": 2.252449031250916e-05, "loss": 0.1376, "step": 18405 }, { "epoch": 2.182615913672477, "grad_norm": 0.682000712620283, "learning_rate": 2.2522101759591624e-05, "loss": 0.077, "step": 18406 }, { "epoch": 2.182734495434602, "grad_norm": 0.6361306425914606, "learning_rate": 2.251971322951728e-05, "loss": 0.0906, "step": 18407 }, { "epoch": 2.182853077196727, "grad_norm": 0.8364681159743217, "learning_rate": 2.2517324722308143e-05, "loss": 0.1297, "step": 18408 }, { "epoch": 2.182971658958852, "grad_norm": 0.8008265498746963, "learning_rate": 2.2514936237986218e-05, "loss": 0.1167, "step": 18409 }, { "epoch": 2.1830902407209773, "grad_norm": 0.7846279771798135, "learning_rate": 2.2512547776573535e-05, "loss": 0.1175, "step": 18410 }, { "epoch": 2.183208822483102, "grad_norm": 0.5780426779490427, "learning_rate": 2.2510159338092108e-05, "loss": 0.0739, "step": 18411 }, { "epoch": 2.183327404245227, "grad_norm": 0.7598249805558362, "learning_rate": 2.2507770922563966e-05, "loss": 0.1014, "step": 18412 }, { "epoch": 2.183445986007352, "grad_norm": 0.6995332081899455, "learning_rate": 2.250538253001112e-05, "loss": 0.0952, "step": 18413 }, { "epoch": 2.1835645677694773, "grad_norm": 0.7459147098463338, "learning_rate": 2.250299416045558e-05, "loss": 0.0935, "step": 18414 }, { "epoch": 2.183683149531602, "grad_norm": 0.9633422322286446, "learning_rate": 2.250060581391938e-05, "loss": 0.125, "step": 18415 }, { "epoch": 2.183801731293727, "grad_norm": 1.0553145512838658, "learning_rate": 2.249821749042453e-05, "loss": 0.1494, "step": 18416 }, { "epoch": 2.183920313055852, "grad_norm": 0.6919259422274493, "learning_rate": 2.2495829189993032e-05, "loss": 0.0957, "step": 18417 }, { "epoch": 2.184038894817977, "grad_norm": 0.5981364680504196, "learning_rate": 2.2493440912646935e-05, "loss": 0.0818, "step": 18418 }, { "epoch": 2.184157476580102, "grad_norm": 0.6521651346338427, "learning_rate": 2.2491052658408235e-05, "loss": 0.0888, "step": 18419 }, { "epoch": 2.184276058342227, "grad_norm": 0.5605763378526037, "learning_rate": 2.248866442729895e-05, "loss": 0.078, "step": 18420 }, { "epoch": 2.184394640104352, "grad_norm": 0.5865603058262729, "learning_rate": 2.24862762193411e-05, "loss": 0.083, "step": 18421 }, { "epoch": 2.184513221866477, "grad_norm": 0.9324292117579831, "learning_rate": 2.24838880345567e-05, "loss": 0.1278, "step": 18422 }, { "epoch": 2.184631803628602, "grad_norm": 0.5569855986859769, "learning_rate": 2.248149987296777e-05, "loss": 0.0854, "step": 18423 }, { "epoch": 2.184750385390727, "grad_norm": 0.7840562013418243, "learning_rate": 2.2479111734596323e-05, "loss": 0.1206, "step": 18424 }, { "epoch": 2.184868967152852, "grad_norm": 0.8125754311456357, "learning_rate": 2.2476723619464367e-05, "loss": 0.1045, "step": 18425 }, { "epoch": 2.184987548914977, "grad_norm": 0.7263410690845682, "learning_rate": 2.247433552759393e-05, "loss": 0.093, "step": 18426 }, { "epoch": 2.185106130677102, "grad_norm": 0.6530092881140867, "learning_rate": 2.2471947459007028e-05, "loss": 0.0993, "step": 18427 }, { "epoch": 2.185224712439227, "grad_norm": 0.5746708668182857, "learning_rate": 2.2469559413725664e-05, "loss": 0.0884, "step": 18428 }, { "epoch": 2.1853432942013518, "grad_norm": 1.0667740435359643, "learning_rate": 2.246717139177186e-05, "loss": 0.1448, "step": 18429 }, { "epoch": 2.185461875963477, "grad_norm": 0.7855047756584274, "learning_rate": 2.2464783393167624e-05, "loss": 0.1275, "step": 18430 }, { "epoch": 2.1855804577256017, "grad_norm": 0.5478857324114155, "learning_rate": 2.2462395417934987e-05, "loss": 0.0696, "step": 18431 }, { "epoch": 2.185699039487727, "grad_norm": 0.7546049923186586, "learning_rate": 2.246000746609594e-05, "loss": 0.1018, "step": 18432 }, { "epoch": 2.1858176212498517, "grad_norm": 0.6674927981661097, "learning_rate": 2.245761953767252e-05, "loss": 0.0877, "step": 18433 }, { "epoch": 2.185936203011977, "grad_norm": 0.5170105384388965, "learning_rate": 2.245523163268673e-05, "loss": 0.0758, "step": 18434 }, { "epoch": 2.1860547847741016, "grad_norm": 0.9407099424580031, "learning_rate": 2.2452843751160585e-05, "loss": 0.15, "step": 18435 }, { "epoch": 2.186173366536227, "grad_norm": 1.1189742144371786, "learning_rate": 2.2450455893116085e-05, "loss": 0.133, "step": 18436 }, { "epoch": 2.1862919482983516, "grad_norm": 0.5132011815421205, "learning_rate": 2.2448068058575268e-05, "loss": 0.0611, "step": 18437 }, { "epoch": 2.186410530060477, "grad_norm": 0.48964948950489356, "learning_rate": 2.244568024756013e-05, "loss": 0.0678, "step": 18438 }, { "epoch": 2.1865291118226016, "grad_norm": 0.7074923457498284, "learning_rate": 2.2443292460092685e-05, "loss": 0.1117, "step": 18439 }, { "epoch": 2.1866476935847268, "grad_norm": 0.5985010694243094, "learning_rate": 2.244090469619495e-05, "loss": 0.0601, "step": 18440 }, { "epoch": 2.1867662753468515, "grad_norm": 0.6273669030015802, "learning_rate": 2.243851695588893e-05, "loss": 0.1091, "step": 18441 }, { "epoch": 2.1868848571089767, "grad_norm": 0.6008041323131077, "learning_rate": 2.2436129239196656e-05, "loss": 0.0939, "step": 18442 }, { "epoch": 2.1870034388711015, "grad_norm": 0.6174083399242325, "learning_rate": 2.2433741546140123e-05, "loss": 0.1119, "step": 18443 }, { "epoch": 2.1871220206332267, "grad_norm": 0.5840861335942009, "learning_rate": 2.2431353876741332e-05, "loss": 0.0932, "step": 18444 }, { "epoch": 2.1872406023953515, "grad_norm": 0.6382506427265218, "learning_rate": 2.2428966231022323e-05, "loss": 0.0912, "step": 18445 }, { "epoch": 2.1873591841574767, "grad_norm": 0.8222890368025033, "learning_rate": 2.2426578609005088e-05, "loss": 0.0916, "step": 18446 }, { "epoch": 2.1874777659196014, "grad_norm": 0.5734600593615354, "learning_rate": 2.2424191010711636e-05, "loss": 0.085, "step": 18447 }, { "epoch": 2.1875963476817266, "grad_norm": 0.596340578378995, "learning_rate": 2.2421803436163998e-05, "loss": 0.0837, "step": 18448 }, { "epoch": 2.1877149294438514, "grad_norm": 1.0084540696461124, "learning_rate": 2.2419415885384158e-05, "loss": 0.113, "step": 18449 }, { "epoch": 2.1878335112059766, "grad_norm": 0.6722281774865035, "learning_rate": 2.241702835839415e-05, "loss": 0.089, "step": 18450 }, { "epoch": 2.1879520929681013, "grad_norm": 0.7528960985132602, "learning_rate": 2.2414640855215956e-05, "loss": 0.1007, "step": 18451 }, { "epoch": 2.1880706747302265, "grad_norm": 0.5037195078723284, "learning_rate": 2.241225337587162e-05, "loss": 0.0648, "step": 18452 }, { "epoch": 2.1881892564923513, "grad_norm": 0.8574874749033047, "learning_rate": 2.240986592038313e-05, "loss": 0.1445, "step": 18453 }, { "epoch": 2.1883078382544765, "grad_norm": 1.0094148512797658, "learning_rate": 2.2407478488772503e-05, "loss": 0.1021, "step": 18454 }, { "epoch": 2.1884264200166013, "grad_norm": 0.8530636284562935, "learning_rate": 2.2405091081061733e-05, "loss": 0.0886, "step": 18455 }, { "epoch": 2.1885450017787265, "grad_norm": 0.8561293182107678, "learning_rate": 2.2402703697272853e-05, "loss": 0.1057, "step": 18456 }, { "epoch": 2.188663583540851, "grad_norm": 0.675044492595026, "learning_rate": 2.240031633742786e-05, "loss": 0.101, "step": 18457 }, { "epoch": 2.1887821653029764, "grad_norm": 0.9778476741242377, "learning_rate": 2.2397929001548756e-05, "loss": 0.1187, "step": 18458 }, { "epoch": 2.188900747065101, "grad_norm": 0.5090457736613733, "learning_rate": 2.2395541689657558e-05, "loss": 0.0589, "step": 18459 }, { "epoch": 2.1890193288272264, "grad_norm": 0.5474901828642386, "learning_rate": 2.2393154401776267e-05, "loss": 0.0896, "step": 18460 }, { "epoch": 2.189137910589351, "grad_norm": 0.6353467834164056, "learning_rate": 2.2390767137926904e-05, "loss": 0.0926, "step": 18461 }, { "epoch": 2.1892564923514763, "grad_norm": 0.8884683516963249, "learning_rate": 2.238837989813146e-05, "loss": 0.1159, "step": 18462 }, { "epoch": 2.1893750741136015, "grad_norm": 0.8250137811405346, "learning_rate": 2.238599268241196e-05, "loss": 0.1205, "step": 18463 }, { "epoch": 2.1894936558757263, "grad_norm": 0.6623926876237836, "learning_rate": 2.23836054907904e-05, "loss": 0.0898, "step": 18464 }, { "epoch": 2.189612237637851, "grad_norm": 0.6715827609717537, "learning_rate": 2.2381218323288787e-05, "loss": 0.0821, "step": 18465 }, { "epoch": 2.1897308193999763, "grad_norm": 0.6881309542800916, "learning_rate": 2.2378831179929125e-05, "loss": 0.0852, "step": 18466 }, { "epoch": 2.1898494011621015, "grad_norm": 0.7839381071951821, "learning_rate": 2.2376444060733433e-05, "loss": 0.0819, "step": 18467 }, { "epoch": 2.1899679829242262, "grad_norm": 0.7753718805237698, "learning_rate": 2.237405696572371e-05, "loss": 0.1071, "step": 18468 }, { "epoch": 2.1900865646863514, "grad_norm": 0.823825977896918, "learning_rate": 2.2371669894921954e-05, "loss": 0.1264, "step": 18469 }, { "epoch": 2.190205146448476, "grad_norm": 0.5974648359718348, "learning_rate": 2.2369282848350182e-05, "loss": 0.085, "step": 18470 }, { "epoch": 2.1903237282106014, "grad_norm": 0.6543164570813214, "learning_rate": 2.2366895826030393e-05, "loss": 0.0841, "step": 18471 }, { "epoch": 2.190442309972726, "grad_norm": 1.181393737484423, "learning_rate": 2.23645088279846e-05, "loss": 0.116, "step": 18472 }, { "epoch": 2.1905608917348514, "grad_norm": 1.079893083388744, "learning_rate": 2.236212185423481e-05, "loss": 0.1349, "step": 18473 }, { "epoch": 2.190679473496976, "grad_norm": 0.6615243863550132, "learning_rate": 2.2359734904803005e-05, "loss": 0.0759, "step": 18474 }, { "epoch": 2.1907980552591013, "grad_norm": 0.4717051269460087, "learning_rate": 2.2357347979711218e-05, "loss": 0.0551, "step": 18475 }, { "epoch": 2.190916637021226, "grad_norm": 0.9057013742077762, "learning_rate": 2.2354961078981447e-05, "loss": 0.0869, "step": 18476 }, { "epoch": 2.1910352187833513, "grad_norm": 0.760753499100508, "learning_rate": 2.2352574202635674e-05, "loss": 0.0957, "step": 18477 }, { "epoch": 2.191153800545476, "grad_norm": 0.6368180482369633, "learning_rate": 2.2350187350695932e-05, "loss": 0.0795, "step": 18478 }, { "epoch": 2.1912723823076012, "grad_norm": 0.8281067604284843, "learning_rate": 2.234780052318421e-05, "loss": 0.1143, "step": 18479 }, { "epoch": 2.191390964069726, "grad_norm": 0.7883761229061127, "learning_rate": 2.2345413720122523e-05, "loss": 0.1271, "step": 18480 }, { "epoch": 2.191509545831851, "grad_norm": 0.8190226298975014, "learning_rate": 2.2343026941532853e-05, "loss": 0.1176, "step": 18481 }, { "epoch": 2.191628127593976, "grad_norm": 0.49293805780579264, "learning_rate": 2.2340640187437224e-05, "loss": 0.0785, "step": 18482 }, { "epoch": 2.191746709356101, "grad_norm": 0.6268657248331693, "learning_rate": 2.2338253457857636e-05, "loss": 0.0904, "step": 18483 }, { "epoch": 2.191865291118226, "grad_norm": 1.2354411864211547, "learning_rate": 2.2335866752816084e-05, "loss": 0.1678, "step": 18484 }, { "epoch": 2.191983872880351, "grad_norm": 0.8590678768656969, "learning_rate": 2.2333480072334565e-05, "loss": 0.1328, "step": 18485 }, { "epoch": 2.192102454642476, "grad_norm": 0.5508647975535224, "learning_rate": 2.2331093416435096e-05, "loss": 0.081, "step": 18486 }, { "epoch": 2.192221036404601, "grad_norm": 0.6217181643038421, "learning_rate": 2.232870678513968e-05, "loss": 0.0893, "step": 18487 }, { "epoch": 2.192339618166726, "grad_norm": 0.5724887270324105, "learning_rate": 2.2326320178470294e-05, "loss": 0.0742, "step": 18488 }, { "epoch": 2.192458199928851, "grad_norm": 0.9439234280840562, "learning_rate": 2.232393359644897e-05, "loss": 0.1246, "step": 18489 }, { "epoch": 2.192576781690976, "grad_norm": 0.9992920410286019, "learning_rate": 2.2321547039097694e-05, "loss": 0.1319, "step": 18490 }, { "epoch": 2.192695363453101, "grad_norm": 0.5904562611430167, "learning_rate": 2.2319160506438474e-05, "loss": 0.0882, "step": 18491 }, { "epoch": 2.1928139452152258, "grad_norm": 0.57287236357984, "learning_rate": 2.2316773998493296e-05, "loss": 0.0878, "step": 18492 }, { "epoch": 2.192932526977351, "grad_norm": 0.7128153048590048, "learning_rate": 2.231438751528418e-05, "loss": 0.1141, "step": 18493 }, { "epoch": 2.1930511087394757, "grad_norm": 0.653657657716474, "learning_rate": 2.231200105683312e-05, "loss": 0.0924, "step": 18494 }, { "epoch": 2.193169690501601, "grad_norm": 0.6582609781713341, "learning_rate": 2.2309614623162115e-05, "loss": 0.0862, "step": 18495 }, { "epoch": 2.1932882722637257, "grad_norm": 0.8900287232145305, "learning_rate": 2.230722821429315e-05, "loss": 0.1263, "step": 18496 }, { "epoch": 2.193406854025851, "grad_norm": 0.7548047350126029, "learning_rate": 2.2304841830248253e-05, "loss": 0.101, "step": 18497 }, { "epoch": 2.1935254357879757, "grad_norm": 0.8875815058046704, "learning_rate": 2.2302455471049403e-05, "loss": 0.1011, "step": 18498 }, { "epoch": 2.193644017550101, "grad_norm": 0.5062227000328744, "learning_rate": 2.230006913671861e-05, "loss": 0.0616, "step": 18499 }, { "epoch": 2.1937625993122256, "grad_norm": 0.6396079713520615, "learning_rate": 2.2297682827277858e-05, "loss": 0.0928, "step": 18500 }, { "epoch": 2.193881181074351, "grad_norm": 1.184536068596578, "learning_rate": 2.2295296542749167e-05, "loss": 0.1243, "step": 18501 }, { "epoch": 2.1939997628364756, "grad_norm": 0.7714183539614202, "learning_rate": 2.2292910283154524e-05, "loss": 0.0963, "step": 18502 }, { "epoch": 2.194118344598601, "grad_norm": 0.6098704111105191, "learning_rate": 2.229052404851592e-05, "loss": 0.0834, "step": 18503 }, { "epoch": 2.1942369263607255, "grad_norm": 0.5464479276897053, "learning_rate": 2.228813783885537e-05, "loss": 0.0673, "step": 18504 }, { "epoch": 2.1943555081228507, "grad_norm": 0.7853634212661972, "learning_rate": 2.2285751654194867e-05, "loss": 0.0991, "step": 18505 }, { "epoch": 2.1944740898849755, "grad_norm": 0.6374050492009214, "learning_rate": 2.2283365494556404e-05, "loss": 0.0792, "step": 18506 }, { "epoch": 2.1945926716471007, "grad_norm": 0.780897838208035, "learning_rate": 2.2280979359961967e-05, "loss": 0.0951, "step": 18507 }, { "epoch": 2.1947112534092255, "grad_norm": 0.858538377033894, "learning_rate": 2.227859325043358e-05, "loss": 0.1287, "step": 18508 }, { "epoch": 2.1948298351713507, "grad_norm": 0.6647020688707223, "learning_rate": 2.2276207165993217e-05, "loss": 0.0692, "step": 18509 }, { "epoch": 2.1949484169334754, "grad_norm": 0.7807601421241424, "learning_rate": 2.2273821106662892e-05, "loss": 0.0902, "step": 18510 }, { "epoch": 2.1950669986956006, "grad_norm": 0.858675262934243, "learning_rate": 2.2271435072464584e-05, "loss": 0.0934, "step": 18511 }, { "epoch": 2.195185580457726, "grad_norm": 0.8200956669682851, "learning_rate": 2.2269049063420305e-05, "loss": 0.1145, "step": 18512 }, { "epoch": 2.1953041622198506, "grad_norm": 1.035577099384347, "learning_rate": 2.2266663079552047e-05, "loss": 0.1273, "step": 18513 }, { "epoch": 2.1954227439819753, "grad_norm": 0.732997696023518, "learning_rate": 2.2264277120881803e-05, "loss": 0.0906, "step": 18514 }, { "epoch": 2.1955413257441005, "grad_norm": 0.6850650766663792, "learning_rate": 2.2261891187431562e-05, "loss": 0.1142, "step": 18515 }, { "epoch": 2.1956599075062257, "grad_norm": 0.8176970851743066, "learning_rate": 2.2259505279223333e-05, "loss": 0.0962, "step": 18516 }, { "epoch": 2.1957784892683505, "grad_norm": 0.8054752461664582, "learning_rate": 2.2257119396279105e-05, "loss": 0.1084, "step": 18517 }, { "epoch": 2.1958970710304757, "grad_norm": 0.553423809719829, "learning_rate": 2.2254733538620864e-05, "loss": 0.0691, "step": 18518 }, { "epoch": 2.1960156527926005, "grad_norm": 0.6709360335210435, "learning_rate": 2.2252347706270622e-05, "loss": 0.0909, "step": 18519 }, { "epoch": 2.1961342345547257, "grad_norm": 0.6160422823644436, "learning_rate": 2.224996189925036e-05, "loss": 0.0726, "step": 18520 }, { "epoch": 2.1962528163168504, "grad_norm": 0.6556075571607048, "learning_rate": 2.2247576117582083e-05, "loss": 0.0804, "step": 18521 }, { "epoch": 2.1963713980789756, "grad_norm": 0.8650940461960032, "learning_rate": 2.224519036128777e-05, "loss": 0.0992, "step": 18522 }, { "epoch": 2.1964899798411004, "grad_norm": 0.6203374370418595, "learning_rate": 2.224280463038943e-05, "loss": 0.0802, "step": 18523 }, { "epoch": 2.1966085616032256, "grad_norm": 0.786557174691255, "learning_rate": 2.2240418924909055e-05, "loss": 0.1099, "step": 18524 }, { "epoch": 2.1967271433653504, "grad_norm": 0.853142201818019, "learning_rate": 2.223803324486863e-05, "loss": 0.1017, "step": 18525 }, { "epoch": 2.1968457251274756, "grad_norm": 1.0522875220294179, "learning_rate": 2.223564759029014e-05, "loss": 0.1418, "step": 18526 }, { "epoch": 2.1969643068896003, "grad_norm": 0.6809372935979422, "learning_rate": 2.2233261961195603e-05, "loss": 0.0922, "step": 18527 }, { "epoch": 2.1970828886517255, "grad_norm": 0.6505065750432619, "learning_rate": 2.2230876357606992e-05, "loss": 0.0722, "step": 18528 }, { "epoch": 2.1972014704138503, "grad_norm": 0.8094962424326148, "learning_rate": 2.222849077954631e-05, "loss": 0.1164, "step": 18529 }, { "epoch": 2.1973200521759755, "grad_norm": 0.9082711875351711, "learning_rate": 2.222610522703553e-05, "loss": 0.1137, "step": 18530 }, { "epoch": 2.1974386339381002, "grad_norm": 0.7536310593385196, "learning_rate": 2.2223719700096675e-05, "loss": 0.0867, "step": 18531 }, { "epoch": 2.1975572157002254, "grad_norm": 0.6776671801795742, "learning_rate": 2.222133419875172e-05, "loss": 0.0995, "step": 18532 }, { "epoch": 2.19767579746235, "grad_norm": 1.0260202813762966, "learning_rate": 2.221894872302264e-05, "loss": 0.1205, "step": 18533 }, { "epoch": 2.1977943792244754, "grad_norm": 0.8453640607351962, "learning_rate": 2.2216563272931457e-05, "loss": 0.1093, "step": 18534 }, { "epoch": 2.1979129609866, "grad_norm": 0.9434136525317276, "learning_rate": 2.221417784850015e-05, "loss": 0.0934, "step": 18535 }, { "epoch": 2.1980315427487254, "grad_norm": 0.714320708593729, "learning_rate": 2.22117924497507e-05, "loss": 0.0902, "step": 18536 }, { "epoch": 2.19815012451085, "grad_norm": 0.5878713275571802, "learning_rate": 2.22094070767051e-05, "loss": 0.0805, "step": 18537 }, { "epoch": 2.1982687062729753, "grad_norm": 0.626199835045335, "learning_rate": 2.2207021729385353e-05, "loss": 0.083, "step": 18538 }, { "epoch": 2.1983872880351, "grad_norm": 0.6031177979564564, "learning_rate": 2.2204636407813434e-05, "loss": 0.0799, "step": 18539 }, { "epoch": 2.1985058697972253, "grad_norm": 0.5918239560552421, "learning_rate": 2.2202251112011347e-05, "loss": 0.0781, "step": 18540 }, { "epoch": 2.19862445155935, "grad_norm": 0.5385284521300315, "learning_rate": 2.219986584200106e-05, "loss": 0.0783, "step": 18541 }, { "epoch": 2.1987430333214752, "grad_norm": 0.8289809387521189, "learning_rate": 2.2197480597804586e-05, "loss": 0.1263, "step": 18542 }, { "epoch": 2.1988616150836, "grad_norm": 0.8465947648863564, "learning_rate": 2.219509537944391e-05, "loss": 0.0911, "step": 18543 }, { "epoch": 2.198980196845725, "grad_norm": 0.9051284399314428, "learning_rate": 2.2192710186941014e-05, "loss": 0.1244, "step": 18544 }, { "epoch": 2.19909877860785, "grad_norm": 0.5645343672141971, "learning_rate": 2.2190325020317872e-05, "loss": 0.0909, "step": 18545 }, { "epoch": 2.199217360369975, "grad_norm": 0.7067184591053518, "learning_rate": 2.2187939879596504e-05, "loss": 0.1012, "step": 18546 }, { "epoch": 2.1993359421321, "grad_norm": 0.9157719499821433, "learning_rate": 2.2185554764798876e-05, "loss": 0.117, "step": 18547 }, { "epoch": 2.199454523894225, "grad_norm": 1.2119930891830306, "learning_rate": 2.2183169675946976e-05, "loss": 0.1369, "step": 18548 }, { "epoch": 2.19957310565635, "grad_norm": 1.0840501471801565, "learning_rate": 2.218078461306281e-05, "loss": 0.1488, "step": 18549 }, { "epoch": 2.199691687418475, "grad_norm": 1.0280660258688932, "learning_rate": 2.217839957616834e-05, "loss": 0.1423, "step": 18550 }, { "epoch": 2.1998102691806, "grad_norm": 0.5007376855546856, "learning_rate": 2.2176014565285578e-05, "loss": 0.0629, "step": 18551 }, { "epoch": 2.199928850942725, "grad_norm": 1.1936499174749404, "learning_rate": 2.2173629580436483e-05, "loss": 0.1402, "step": 18552 }, { "epoch": 2.20004743270485, "grad_norm": 0.6690989685537171, "learning_rate": 2.2171244621643073e-05, "loss": 0.1, "step": 18553 }, { "epoch": 2.200166014466975, "grad_norm": 0.784882037108575, "learning_rate": 2.2168859688927314e-05, "loss": 0.0798, "step": 18554 }, { "epoch": 2.2002845962290998, "grad_norm": 0.6684540464323727, "learning_rate": 2.21664747823112e-05, "loss": 0.0855, "step": 18555 }, { "epoch": 2.200403177991225, "grad_norm": 0.7413071325703999, "learning_rate": 2.21640899018167e-05, "loss": 0.0969, "step": 18556 }, { "epoch": 2.2005217597533497, "grad_norm": 0.8361374705201728, "learning_rate": 2.2161705047465828e-05, "loss": 0.1091, "step": 18557 }, { "epoch": 2.200640341515475, "grad_norm": 0.8041811971504875, "learning_rate": 2.2159320219280548e-05, "loss": 0.0976, "step": 18558 }, { "epoch": 2.2007589232775997, "grad_norm": 1.0524303402299942, "learning_rate": 2.215693541728285e-05, "loss": 0.1337, "step": 18559 }, { "epoch": 2.200877505039725, "grad_norm": 0.6680201243633164, "learning_rate": 2.2154550641494724e-05, "loss": 0.0886, "step": 18560 }, { "epoch": 2.20099608680185, "grad_norm": 0.6501557660392016, "learning_rate": 2.2152165891938157e-05, "loss": 0.0827, "step": 18561 }, { "epoch": 2.201114668563975, "grad_norm": 0.6146163813522737, "learning_rate": 2.2149781168635126e-05, "loss": 0.0934, "step": 18562 }, { "epoch": 2.2012332503260996, "grad_norm": 0.8826705546658759, "learning_rate": 2.2147396471607606e-05, "loss": 0.1202, "step": 18563 }, { "epoch": 2.201351832088225, "grad_norm": 0.9852576266034967, "learning_rate": 2.2145011800877606e-05, "loss": 0.1175, "step": 18564 }, { "epoch": 2.20147041385035, "grad_norm": 0.9513530019885685, "learning_rate": 2.2142627156467098e-05, "loss": 0.1302, "step": 18565 }, { "epoch": 2.201588995612475, "grad_norm": 0.8654444352781222, "learning_rate": 2.2140242538398058e-05, "loss": 0.1266, "step": 18566 }, { "epoch": 2.2017075773746, "grad_norm": 0.5434129379293668, "learning_rate": 2.2137857946692468e-05, "loss": 0.0753, "step": 18567 }, { "epoch": 2.2018261591367247, "grad_norm": 0.5480297654142707, "learning_rate": 2.213547338137233e-05, "loss": 0.0639, "step": 18568 }, { "epoch": 2.20194474089885, "grad_norm": 1.0351814034498477, "learning_rate": 2.2133088842459607e-05, "loss": 0.1388, "step": 18569 }, { "epoch": 2.2020633226609747, "grad_norm": 0.71055519406069, "learning_rate": 2.2130704329976293e-05, "loss": 0.0943, "step": 18570 }, { "epoch": 2.2021819044231, "grad_norm": 0.9193631799444636, "learning_rate": 2.212831984394436e-05, "loss": 0.1487, "step": 18571 }, { "epoch": 2.2023004861852247, "grad_norm": 0.5067931795845955, "learning_rate": 2.2125935384385805e-05, "loss": 0.0572, "step": 18572 }, { "epoch": 2.20241906794735, "grad_norm": 0.7943488855387889, "learning_rate": 2.21235509513226e-05, "loss": 0.1133, "step": 18573 }, { "epoch": 2.2025376497094746, "grad_norm": 0.6930737956642986, "learning_rate": 2.2121166544776715e-05, "loss": 0.1048, "step": 18574 }, { "epoch": 2.2026562314716, "grad_norm": 1.0697652505712003, "learning_rate": 2.211878216477016e-05, "loss": 0.1427, "step": 18575 }, { "epoch": 2.2027748132337246, "grad_norm": 0.7925548364088736, "learning_rate": 2.21163978113249e-05, "loss": 0.0976, "step": 18576 }, { "epoch": 2.20289339499585, "grad_norm": 0.6274133625874148, "learning_rate": 2.2114013484462907e-05, "loss": 0.0988, "step": 18577 }, { "epoch": 2.2030119767579746, "grad_norm": 0.7632437822820491, "learning_rate": 2.2111629184206176e-05, "loss": 0.1309, "step": 18578 }, { "epoch": 2.2031305585200998, "grad_norm": 0.7631345567142237, "learning_rate": 2.210924491057668e-05, "loss": 0.1263, "step": 18579 }, { "epoch": 2.2032491402822245, "grad_norm": 1.3662623194326764, "learning_rate": 2.2106860663596403e-05, "loss": 0.157, "step": 18580 }, { "epoch": 2.2033677220443497, "grad_norm": 0.7038165548268492, "learning_rate": 2.2104476443287326e-05, "loss": 0.0907, "step": 18581 }, { "epoch": 2.2034863038064745, "grad_norm": 0.8349491913234616, "learning_rate": 2.2102092249671412e-05, "loss": 0.0767, "step": 18582 }, { "epoch": 2.2036048855685997, "grad_norm": 0.8946905256324299, "learning_rate": 2.2099708082770667e-05, "loss": 0.1168, "step": 18583 }, { "epoch": 2.2037234673307244, "grad_norm": 0.44266671844258443, "learning_rate": 2.2097323942607055e-05, "loss": 0.0691, "step": 18584 }, { "epoch": 2.2038420490928496, "grad_norm": 0.6523131661658357, "learning_rate": 2.209493982920256e-05, "loss": 0.0627, "step": 18585 }, { "epoch": 2.2039606308549744, "grad_norm": 0.5016930785632562, "learning_rate": 2.2092555742579148e-05, "loss": 0.0771, "step": 18586 }, { "epoch": 2.2040792126170996, "grad_norm": 0.5978781784812954, "learning_rate": 2.2090171682758813e-05, "loss": 0.0814, "step": 18587 }, { "epoch": 2.2041977943792244, "grad_norm": 0.8955751112518372, "learning_rate": 2.208778764976352e-05, "loss": 0.1425, "step": 18588 }, { "epoch": 2.2043163761413496, "grad_norm": 0.7723725234874677, "learning_rate": 2.2085403643615256e-05, "loss": 0.0796, "step": 18589 }, { "epoch": 2.2044349579034743, "grad_norm": 0.6198159230380986, "learning_rate": 2.2083019664335998e-05, "loss": 0.0926, "step": 18590 }, { "epoch": 2.2045535396655995, "grad_norm": 0.756360778800563, "learning_rate": 2.2080635711947725e-05, "loss": 0.0956, "step": 18591 }, { "epoch": 2.2046721214277243, "grad_norm": 0.7996900093697139, "learning_rate": 2.2078251786472413e-05, "loss": 0.1095, "step": 18592 }, { "epoch": 2.2047907031898495, "grad_norm": 0.45190679371499526, "learning_rate": 2.207586788793202e-05, "loss": 0.0668, "step": 18593 }, { "epoch": 2.2049092849519742, "grad_norm": 0.665975726164127, "learning_rate": 2.207348401634856e-05, "loss": 0.0962, "step": 18594 }, { "epoch": 2.2050278667140994, "grad_norm": 0.5858734212135126, "learning_rate": 2.207110017174398e-05, "loss": 0.0808, "step": 18595 }, { "epoch": 2.205146448476224, "grad_norm": 0.4829699188710921, "learning_rate": 2.206871635414027e-05, "loss": 0.0618, "step": 18596 }, { "epoch": 2.2052650302383494, "grad_norm": 0.7795387758030494, "learning_rate": 2.2066332563559385e-05, "loss": 0.0969, "step": 18597 }, { "epoch": 2.205383612000474, "grad_norm": 0.7947729608033122, "learning_rate": 2.2063948800023332e-05, "loss": 0.108, "step": 18598 }, { "epoch": 2.2055021937625994, "grad_norm": 0.7497829229789136, "learning_rate": 2.2061565063554064e-05, "loss": 0.1023, "step": 18599 }, { "epoch": 2.205620775524724, "grad_norm": 0.6595083176035033, "learning_rate": 2.2059181354173564e-05, "loss": 0.0858, "step": 18600 }, { "epoch": 2.2057393572868493, "grad_norm": 0.6879356036744354, "learning_rate": 2.2056797671903797e-05, "loss": 0.0911, "step": 18601 }, { "epoch": 2.205857939048974, "grad_norm": 0.5811243878593294, "learning_rate": 2.2054414016766758e-05, "loss": 0.0897, "step": 18602 }, { "epoch": 2.2059765208110993, "grad_norm": 0.8096793251905643, "learning_rate": 2.2052030388784413e-05, "loss": 0.1127, "step": 18603 }, { "epoch": 2.206095102573224, "grad_norm": 0.7451417839162476, "learning_rate": 2.2049646787978717e-05, "loss": 0.0958, "step": 18604 }, { "epoch": 2.2062136843353493, "grad_norm": 0.670796870891284, "learning_rate": 2.2047263214371673e-05, "loss": 0.1041, "step": 18605 }, { "epoch": 2.206332266097474, "grad_norm": 0.7376735863330911, "learning_rate": 2.2044879667985238e-05, "loss": 0.0954, "step": 18606 }, { "epoch": 2.206450847859599, "grad_norm": 0.8633651388194945, "learning_rate": 2.204249614884139e-05, "loss": 0.1302, "step": 18607 }, { "epoch": 2.206569429621724, "grad_norm": 0.7518356842963364, "learning_rate": 2.2040112656962093e-05, "loss": 0.1029, "step": 18608 }, { "epoch": 2.206688011383849, "grad_norm": 0.8492067734332795, "learning_rate": 2.203772919236933e-05, "loss": 0.1234, "step": 18609 }, { "epoch": 2.2068065931459744, "grad_norm": 0.7183766620489905, "learning_rate": 2.203534575508508e-05, "loss": 0.0836, "step": 18610 }, { "epoch": 2.206925174908099, "grad_norm": 0.974540143537155, "learning_rate": 2.2032962345131303e-05, "loss": 0.1102, "step": 18611 }, { "epoch": 2.207043756670224, "grad_norm": 1.0038836971111686, "learning_rate": 2.2030578962529964e-05, "loss": 0.1035, "step": 18612 }, { "epoch": 2.207162338432349, "grad_norm": 0.644306625108896, "learning_rate": 2.202819560730306e-05, "loss": 0.0822, "step": 18613 }, { "epoch": 2.2072809201944743, "grad_norm": 1.2027020384967633, "learning_rate": 2.2025812279472546e-05, "loss": 0.1395, "step": 18614 }, { "epoch": 2.207399501956599, "grad_norm": 1.0594622702598857, "learning_rate": 2.2023428979060396e-05, "loss": 0.1305, "step": 18615 }, { "epoch": 2.2075180837187243, "grad_norm": 0.6584386444540188, "learning_rate": 2.2021045706088567e-05, "loss": 0.0919, "step": 18616 }, { "epoch": 2.207636665480849, "grad_norm": 0.9100718979565664, "learning_rate": 2.201866246057906e-05, "loss": 0.1136, "step": 18617 }, { "epoch": 2.2077552472429742, "grad_norm": 0.7153970729503815, "learning_rate": 2.2016279242553818e-05, "loss": 0.0959, "step": 18618 }, { "epoch": 2.207873829005099, "grad_norm": 0.7279621761995054, "learning_rate": 2.2013896052034825e-05, "loss": 0.1148, "step": 18619 }, { "epoch": 2.207992410767224, "grad_norm": 0.870027704700297, "learning_rate": 2.201151288904405e-05, "loss": 0.0857, "step": 18620 }, { "epoch": 2.208110992529349, "grad_norm": 0.8337689045850897, "learning_rate": 2.2009129753603463e-05, "loss": 0.0928, "step": 18621 }, { "epoch": 2.208229574291474, "grad_norm": 0.6440783583648777, "learning_rate": 2.2006746645735035e-05, "loss": 0.1206, "step": 18622 }, { "epoch": 2.208348156053599, "grad_norm": 0.6891570753357061, "learning_rate": 2.200436356546072e-05, "loss": 0.0984, "step": 18623 }, { "epoch": 2.208466737815724, "grad_norm": 1.0249860097179124, "learning_rate": 2.2001980512802514e-05, "loss": 0.1741, "step": 18624 }, { "epoch": 2.208585319577849, "grad_norm": 0.6903485651092206, "learning_rate": 2.1999597487782368e-05, "loss": 0.0875, "step": 18625 }, { "epoch": 2.208703901339974, "grad_norm": 0.6062910246557257, "learning_rate": 2.199721449042225e-05, "loss": 0.0915, "step": 18626 }, { "epoch": 2.208822483102099, "grad_norm": 0.936409676322443, "learning_rate": 2.199483152074413e-05, "loss": 0.1506, "step": 18627 }, { "epoch": 2.208941064864224, "grad_norm": 0.6947070103436033, "learning_rate": 2.1992448578769977e-05, "loss": 0.1154, "step": 18628 }, { "epoch": 2.209059646626349, "grad_norm": 0.7530375633730423, "learning_rate": 2.199006566452177e-05, "loss": 0.1077, "step": 18629 }, { "epoch": 2.209178228388474, "grad_norm": 1.0009365088898239, "learning_rate": 2.1987682778021463e-05, "loss": 0.1236, "step": 18630 }, { "epoch": 2.2092968101505988, "grad_norm": 0.5542601465160594, "learning_rate": 2.198529991929102e-05, "loss": 0.068, "step": 18631 }, { "epoch": 2.209415391912724, "grad_norm": 0.7528113402301521, "learning_rate": 2.1982917088352424e-05, "loss": 0.0988, "step": 18632 }, { "epoch": 2.2095339736748487, "grad_norm": 0.5881278347818135, "learning_rate": 2.198053428522763e-05, "loss": 0.0926, "step": 18633 }, { "epoch": 2.209652555436974, "grad_norm": 0.6880838367554584, "learning_rate": 2.1978151509938596e-05, "loss": 0.1027, "step": 18634 }, { "epoch": 2.2097711371990987, "grad_norm": 0.8814054904354307, "learning_rate": 2.1975768762507312e-05, "loss": 0.1039, "step": 18635 }, { "epoch": 2.209889718961224, "grad_norm": 0.8794332308575439, "learning_rate": 2.197338604295573e-05, "loss": 0.1296, "step": 18636 }, { "epoch": 2.2100083007233486, "grad_norm": 0.6604809888978591, "learning_rate": 2.1971003351305816e-05, "loss": 0.1183, "step": 18637 }, { "epoch": 2.210126882485474, "grad_norm": 0.6830949084814016, "learning_rate": 2.1968620687579533e-05, "loss": 0.0938, "step": 18638 }, { "epoch": 2.2102454642475986, "grad_norm": 0.7034628813258055, "learning_rate": 2.196623805179885e-05, "loss": 0.105, "step": 18639 }, { "epoch": 2.210364046009724, "grad_norm": 0.9532083007683606, "learning_rate": 2.196385544398574e-05, "loss": 0.1424, "step": 18640 }, { "epoch": 2.2104826277718486, "grad_norm": 0.8156634050913079, "learning_rate": 2.196147286416216e-05, "loss": 0.0878, "step": 18641 }, { "epoch": 2.2106012095339738, "grad_norm": 0.8729415376990854, "learning_rate": 2.195909031235006e-05, "loss": 0.1203, "step": 18642 }, { "epoch": 2.2107197912960985, "grad_norm": 0.5770768557473204, "learning_rate": 2.1956707788571433e-05, "loss": 0.0794, "step": 18643 }, { "epoch": 2.2108383730582237, "grad_norm": 0.9712460024882714, "learning_rate": 2.1954325292848224e-05, "loss": 0.1368, "step": 18644 }, { "epoch": 2.2109569548203485, "grad_norm": 0.787624591616509, "learning_rate": 2.195194282520239e-05, "loss": 0.0997, "step": 18645 }, { "epoch": 2.2110755365824737, "grad_norm": 0.6452216279782554, "learning_rate": 2.194956038565592e-05, "loss": 0.0984, "step": 18646 }, { "epoch": 2.2111941183445984, "grad_norm": 0.7059312389006707, "learning_rate": 2.194717797423076e-05, "loss": 0.0973, "step": 18647 }, { "epoch": 2.2113127001067236, "grad_norm": 0.627076525728745, "learning_rate": 2.1944795590948873e-05, "loss": 0.0884, "step": 18648 }, { "epoch": 2.2114312818688484, "grad_norm": 0.6011650331397197, "learning_rate": 2.1942413235832222e-05, "loss": 0.0755, "step": 18649 }, { "epoch": 2.2115498636309736, "grad_norm": 0.8795694101074933, "learning_rate": 2.194003090890277e-05, "loss": 0.1183, "step": 18650 }, { "epoch": 2.2116684453930984, "grad_norm": 1.0597435514606912, "learning_rate": 2.1937648610182487e-05, "loss": 0.1607, "step": 18651 }, { "epoch": 2.2117870271552236, "grad_norm": 0.7917411486357385, "learning_rate": 2.1935266339693328e-05, "loss": 0.1237, "step": 18652 }, { "epoch": 2.2119056089173483, "grad_norm": 0.8914149438945368, "learning_rate": 2.1932884097457247e-05, "loss": 0.0945, "step": 18653 }, { "epoch": 2.2120241906794735, "grad_norm": 0.6407896648716348, "learning_rate": 2.193050188349622e-05, "loss": 0.1078, "step": 18654 }, { "epoch": 2.2121427724415983, "grad_norm": 0.6021451242435333, "learning_rate": 2.1928119697832206e-05, "loss": 0.1067, "step": 18655 }, { "epoch": 2.2122613542037235, "grad_norm": 0.7070466979194842, "learning_rate": 2.1925737540487153e-05, "loss": 0.081, "step": 18656 }, { "epoch": 2.2123799359658483, "grad_norm": 0.5853492223139105, "learning_rate": 2.1923355411483033e-05, "loss": 0.0786, "step": 18657 }, { "epoch": 2.2124985177279735, "grad_norm": 0.7853003727267024, "learning_rate": 2.1920973310841798e-05, "loss": 0.1064, "step": 18658 }, { "epoch": 2.212617099490098, "grad_norm": 0.7082067726618405, "learning_rate": 2.1918591238585423e-05, "loss": 0.0844, "step": 18659 }, { "epoch": 2.2127356812522234, "grad_norm": 0.6728041224700175, "learning_rate": 2.1916209194735846e-05, "loss": 0.0897, "step": 18660 }, { "epoch": 2.212854263014348, "grad_norm": 0.6432716893785607, "learning_rate": 2.191382717931505e-05, "loss": 0.0825, "step": 18661 }, { "epoch": 2.2129728447764734, "grad_norm": 0.7051747978325846, "learning_rate": 2.1911445192344985e-05, "loss": 0.0983, "step": 18662 }, { "epoch": 2.2130914265385986, "grad_norm": 0.6713429748288177, "learning_rate": 2.1909063233847605e-05, "loss": 0.1123, "step": 18663 }, { "epoch": 2.2132100083007233, "grad_norm": 0.8600886502656668, "learning_rate": 2.190668130384486e-05, "loss": 0.1237, "step": 18664 }, { "epoch": 2.213328590062848, "grad_norm": 0.7151052667107956, "learning_rate": 2.1904299402358733e-05, "loss": 0.0885, "step": 18665 }, { "epoch": 2.2134471718249733, "grad_norm": 0.6618300311391481, "learning_rate": 2.1901917529411168e-05, "loss": 0.1092, "step": 18666 }, { "epoch": 2.2135657535870985, "grad_norm": 0.7116766422676849, "learning_rate": 2.1899535685024118e-05, "loss": 0.0816, "step": 18667 }, { "epoch": 2.2136843353492233, "grad_norm": 0.687131103742838, "learning_rate": 2.189715386921955e-05, "loss": 0.0921, "step": 18668 }, { "epoch": 2.2138029171113485, "grad_norm": 0.6379089671428497, "learning_rate": 2.1894772082019416e-05, "loss": 0.0854, "step": 18669 }, { "epoch": 2.2139214988734732, "grad_norm": 0.7488806146928201, "learning_rate": 2.1892390323445684e-05, "loss": 0.1102, "step": 18670 }, { "epoch": 2.2140400806355984, "grad_norm": 0.3744134510030876, "learning_rate": 2.1890008593520297e-05, "loss": 0.0476, "step": 18671 }, { "epoch": 2.214158662397723, "grad_norm": 0.8765545168514761, "learning_rate": 2.1887626892265213e-05, "loss": 0.141, "step": 18672 }, { "epoch": 2.2142772441598484, "grad_norm": 0.6520933403408776, "learning_rate": 2.1885245219702398e-05, "loss": 0.0685, "step": 18673 }, { "epoch": 2.214395825921973, "grad_norm": 0.8071999468964205, "learning_rate": 2.18828635758538e-05, "loss": 0.1089, "step": 18674 }, { "epoch": 2.2145144076840984, "grad_norm": 0.8515546905045722, "learning_rate": 2.1880481960741374e-05, "loss": 0.1157, "step": 18675 }, { "epoch": 2.214632989446223, "grad_norm": 0.6034643129478984, "learning_rate": 2.1878100374387085e-05, "loss": 0.0784, "step": 18676 }, { "epoch": 2.2147515712083483, "grad_norm": 0.6186813312770109, "learning_rate": 2.1875718816812886e-05, "loss": 0.0735, "step": 18677 }, { "epoch": 2.214870152970473, "grad_norm": 1.0821210241338188, "learning_rate": 2.1873337288040717e-05, "loss": 0.1885, "step": 18678 }, { "epoch": 2.2149887347325983, "grad_norm": 0.6850738906615209, "learning_rate": 2.1870955788092547e-05, "loss": 0.1102, "step": 18679 }, { "epoch": 2.215107316494723, "grad_norm": 0.7692298389014989, "learning_rate": 2.1868574316990332e-05, "loss": 0.1102, "step": 18680 }, { "epoch": 2.2152258982568482, "grad_norm": 0.7942619554401589, "learning_rate": 2.186619287475602e-05, "loss": 0.1335, "step": 18681 }, { "epoch": 2.215344480018973, "grad_norm": 0.7533815851106666, "learning_rate": 2.186381146141157e-05, "loss": 0.0972, "step": 18682 }, { "epoch": 2.215463061781098, "grad_norm": 0.6283102927787916, "learning_rate": 2.1861430076978923e-05, "loss": 0.0755, "step": 18683 }, { "epoch": 2.215581643543223, "grad_norm": 0.8301645002043025, "learning_rate": 2.1859048721480052e-05, "loss": 0.119, "step": 18684 }, { "epoch": 2.215700225305348, "grad_norm": 0.8977144414912559, "learning_rate": 2.1856667394936898e-05, "loss": 0.1471, "step": 18685 }, { "epoch": 2.215818807067473, "grad_norm": 0.8741160018895833, "learning_rate": 2.185428609737141e-05, "loss": 0.1314, "step": 18686 }, { "epoch": 2.215937388829598, "grad_norm": 0.9612728434343771, "learning_rate": 2.185190482880555e-05, "loss": 0.1361, "step": 18687 }, { "epoch": 2.216055970591723, "grad_norm": 0.8580653727279521, "learning_rate": 2.1849523589261266e-05, "loss": 0.1311, "step": 18688 }, { "epoch": 2.216174552353848, "grad_norm": 0.9588323801052889, "learning_rate": 2.1847142378760514e-05, "loss": 0.1302, "step": 18689 }, { "epoch": 2.216293134115973, "grad_norm": 0.6198404001980826, "learning_rate": 2.1844761197325237e-05, "loss": 0.0927, "step": 18690 }, { "epoch": 2.216411715878098, "grad_norm": 0.5784735277613501, "learning_rate": 2.18423800449774e-05, "loss": 0.0788, "step": 18691 }, { "epoch": 2.216530297640223, "grad_norm": 1.0121844892101148, "learning_rate": 2.1839998921738948e-05, "loss": 0.139, "step": 18692 }, { "epoch": 2.216648879402348, "grad_norm": 0.8568426153757488, "learning_rate": 2.1837617827631833e-05, "loss": 0.1223, "step": 18693 }, { "epoch": 2.2167674611644728, "grad_norm": 0.8927015304356817, "learning_rate": 2.183523676267799e-05, "loss": 0.0987, "step": 18694 }, { "epoch": 2.216886042926598, "grad_norm": 0.6804630945180242, "learning_rate": 2.1832855726899392e-05, "loss": 0.1159, "step": 18695 }, { "epoch": 2.2170046246887227, "grad_norm": 0.8287177261518044, "learning_rate": 2.1830474720317983e-05, "loss": 0.1027, "step": 18696 }, { "epoch": 2.217123206450848, "grad_norm": 1.0614487307462328, "learning_rate": 2.1828093742955705e-05, "loss": 0.1204, "step": 18697 }, { "epoch": 2.2172417882129727, "grad_norm": 0.6141940127027554, "learning_rate": 2.1825712794834514e-05, "loss": 0.0762, "step": 18698 }, { "epoch": 2.217360369975098, "grad_norm": 0.6426103008100169, "learning_rate": 2.1823331875976354e-05, "loss": 0.1018, "step": 18699 }, { "epoch": 2.2174789517372226, "grad_norm": 0.7452552849171687, "learning_rate": 2.182095098640319e-05, "loss": 0.0952, "step": 18700 }, { "epoch": 2.217597533499348, "grad_norm": 0.790238901613885, "learning_rate": 2.1818570126136954e-05, "loss": 0.1026, "step": 18701 }, { "epoch": 2.2177161152614726, "grad_norm": 0.9266693321909398, "learning_rate": 2.1816189295199594e-05, "loss": 0.144, "step": 18702 }, { "epoch": 2.217834697023598, "grad_norm": 0.7009742291438549, "learning_rate": 2.181380849361307e-05, "loss": 0.098, "step": 18703 }, { "epoch": 2.2179532787857226, "grad_norm": 0.5725876141914784, "learning_rate": 2.181142772139933e-05, "loss": 0.0792, "step": 18704 }, { "epoch": 2.2180718605478478, "grad_norm": 0.6208478932717707, "learning_rate": 2.1809046978580305e-05, "loss": 0.0677, "step": 18705 }, { "epoch": 2.2181904423099725, "grad_norm": 0.5393205769434588, "learning_rate": 2.180666626517796e-05, "loss": 0.0784, "step": 18706 }, { "epoch": 2.2183090240720977, "grad_norm": 0.6827932661282602, "learning_rate": 2.1804285581214235e-05, "loss": 0.1237, "step": 18707 }, { "epoch": 2.2184276058342225, "grad_norm": 0.7334561509085107, "learning_rate": 2.1801904926711083e-05, "loss": 0.1245, "step": 18708 }, { "epoch": 2.2185461875963477, "grad_norm": 0.9663215358778052, "learning_rate": 2.179952430169043e-05, "loss": 0.1421, "step": 18709 }, { "epoch": 2.2186647693584725, "grad_norm": 0.6344197290360618, "learning_rate": 2.1797143706174254e-05, "loss": 0.0822, "step": 18710 }, { "epoch": 2.2187833511205977, "grad_norm": 0.7638435880135139, "learning_rate": 2.1794763140184487e-05, "loss": 0.1113, "step": 18711 }, { "epoch": 2.218901932882723, "grad_norm": 0.6012733108718435, "learning_rate": 2.179238260374307e-05, "loss": 0.089, "step": 18712 }, { "epoch": 2.2190205146448476, "grad_norm": 0.6539711158768791, "learning_rate": 2.1790002096871945e-05, "loss": 0.0603, "step": 18713 }, { "epoch": 2.2191390964069724, "grad_norm": 0.7488235439497176, "learning_rate": 2.178762161959307e-05, "loss": 0.1014, "step": 18714 }, { "epoch": 2.2192576781690976, "grad_norm": 0.9571688597791002, "learning_rate": 2.1785241171928388e-05, "loss": 0.1471, "step": 18715 }, { "epoch": 2.219376259931223, "grad_norm": 0.5724504931816764, "learning_rate": 2.1782860753899833e-05, "loss": 0.0714, "step": 18716 }, { "epoch": 2.2194948416933475, "grad_norm": 0.5679356555553766, "learning_rate": 2.1780480365529358e-05, "loss": 0.0765, "step": 18717 }, { "epoch": 2.2196134234554727, "grad_norm": 0.5857518003283306, "learning_rate": 2.1778100006838904e-05, "loss": 0.0936, "step": 18718 }, { "epoch": 2.2197320052175975, "grad_norm": 0.8120237484943815, "learning_rate": 2.1775719677850426e-05, "loss": 0.108, "step": 18719 }, { "epoch": 2.2198505869797227, "grad_norm": 0.7136030016253614, "learning_rate": 2.1773339378585845e-05, "loss": 0.0973, "step": 18720 }, { "epoch": 2.2199691687418475, "grad_norm": 0.6244744878349615, "learning_rate": 2.177095910906713e-05, "loss": 0.0776, "step": 18721 }, { "epoch": 2.2200877505039727, "grad_norm": 0.5949523161318707, "learning_rate": 2.176857886931621e-05, "loss": 0.0724, "step": 18722 }, { "epoch": 2.2202063322660974, "grad_norm": 0.625403615098908, "learning_rate": 2.176619865935503e-05, "loss": 0.0994, "step": 18723 }, { "epoch": 2.2203249140282226, "grad_norm": 0.7921867517076254, "learning_rate": 2.1763818479205525e-05, "loss": 0.0853, "step": 18724 }, { "epoch": 2.2204434957903474, "grad_norm": 0.9848042325838526, "learning_rate": 2.1761438328889655e-05, "loss": 0.133, "step": 18725 }, { "epoch": 2.2205620775524726, "grad_norm": 0.6968242006894193, "learning_rate": 2.1759058208429355e-05, "loss": 0.08, "step": 18726 }, { "epoch": 2.2206806593145973, "grad_norm": 0.6351210625642078, "learning_rate": 2.1756678117846553e-05, "loss": 0.0838, "step": 18727 }, { "epoch": 2.2207992410767226, "grad_norm": 0.7519041942911971, "learning_rate": 2.17542980571632e-05, "loss": 0.0933, "step": 18728 }, { "epoch": 2.2209178228388473, "grad_norm": 0.5309643113493044, "learning_rate": 2.1751918026401248e-05, "loss": 0.0884, "step": 18729 }, { "epoch": 2.2210364046009725, "grad_norm": 0.5706025259925694, "learning_rate": 2.1749538025582627e-05, "loss": 0.0932, "step": 18730 }, { "epoch": 2.2211549863630973, "grad_norm": 0.6736978131525626, "learning_rate": 2.174715805472927e-05, "loss": 0.0828, "step": 18731 }, { "epoch": 2.2212735681252225, "grad_norm": 0.8550123104962464, "learning_rate": 2.174477811386314e-05, "loss": 0.1158, "step": 18732 }, { "epoch": 2.2213921498873472, "grad_norm": 0.8533094435047427, "learning_rate": 2.1742398203006162e-05, "loss": 0.1158, "step": 18733 }, { "epoch": 2.2215107316494724, "grad_norm": 0.6019263942042418, "learning_rate": 2.174001832218028e-05, "loss": 0.081, "step": 18734 }, { "epoch": 2.221629313411597, "grad_norm": 0.7087622543174938, "learning_rate": 2.173763847140742e-05, "loss": 0.1098, "step": 18735 }, { "epoch": 2.2217478951737224, "grad_norm": 0.855204827978792, "learning_rate": 2.1735258650709538e-05, "loss": 0.1096, "step": 18736 }, { "epoch": 2.221866476935847, "grad_norm": 0.672713353971956, "learning_rate": 2.1732878860108567e-05, "loss": 0.0819, "step": 18737 }, { "epoch": 2.2219850586979724, "grad_norm": 0.6423223416370696, "learning_rate": 2.1730499099626452e-05, "loss": 0.0963, "step": 18738 }, { "epoch": 2.222103640460097, "grad_norm": 0.8017845214030946, "learning_rate": 2.1728119369285116e-05, "loss": 0.1171, "step": 18739 }, { "epoch": 2.2222222222222223, "grad_norm": 0.5484568966715241, "learning_rate": 2.1725739669106515e-05, "loss": 0.0821, "step": 18740 }, { "epoch": 2.222340803984347, "grad_norm": 0.8276292428236063, "learning_rate": 2.1723359999112584e-05, "loss": 0.1047, "step": 18741 }, { "epoch": 2.2224593857464723, "grad_norm": 0.8199129944311886, "learning_rate": 2.172098035932525e-05, "loss": 0.1044, "step": 18742 }, { "epoch": 2.222577967508597, "grad_norm": 0.7741103604108954, "learning_rate": 2.171860074976645e-05, "loss": 0.1167, "step": 18743 }, { "epoch": 2.2226965492707222, "grad_norm": 0.9296993249199452, "learning_rate": 2.1716221170458135e-05, "loss": 0.1029, "step": 18744 }, { "epoch": 2.222815131032847, "grad_norm": 0.6322449514740297, "learning_rate": 2.1713841621422236e-05, "loss": 0.0829, "step": 18745 }, { "epoch": 2.222933712794972, "grad_norm": 0.7561955579018941, "learning_rate": 2.1711462102680674e-05, "loss": 0.096, "step": 18746 }, { "epoch": 2.223052294557097, "grad_norm": 0.7086477882702891, "learning_rate": 2.170908261425541e-05, "loss": 0.0989, "step": 18747 }, { "epoch": 2.223170876319222, "grad_norm": 0.7641369952199866, "learning_rate": 2.1706703156168364e-05, "loss": 0.1088, "step": 18748 }, { "epoch": 2.223289458081347, "grad_norm": 0.8338466259243604, "learning_rate": 2.1704323728441485e-05, "loss": 0.0993, "step": 18749 }, { "epoch": 2.223408039843472, "grad_norm": 0.6418169743060544, "learning_rate": 2.1701944331096685e-05, "loss": 0.0998, "step": 18750 }, { "epoch": 2.223526621605597, "grad_norm": 0.7896333584908579, "learning_rate": 2.1699564964155926e-05, "loss": 0.112, "step": 18751 }, { "epoch": 2.223645203367722, "grad_norm": 0.5436595342286243, "learning_rate": 2.169718562764113e-05, "loss": 0.0754, "step": 18752 }, { "epoch": 2.223763785129847, "grad_norm": 0.9491578386402872, "learning_rate": 2.1694806321574233e-05, "loss": 0.1597, "step": 18753 }, { "epoch": 2.223882366891972, "grad_norm": 0.7068064879579861, "learning_rate": 2.1692427045977158e-05, "loss": 0.107, "step": 18754 }, { "epoch": 2.224000948654097, "grad_norm": 0.6234575465611063, "learning_rate": 2.169004780087186e-05, "loss": 0.0774, "step": 18755 }, { "epoch": 2.224119530416222, "grad_norm": 1.0342195399836753, "learning_rate": 2.1687668586280263e-05, "loss": 0.1083, "step": 18756 }, { "epoch": 2.2242381121783468, "grad_norm": 0.6785739803533407, "learning_rate": 2.1685289402224292e-05, "loss": 0.0872, "step": 18757 }, { "epoch": 2.224356693940472, "grad_norm": 0.8490115567075731, "learning_rate": 2.1682910248725885e-05, "loss": 0.101, "step": 18758 }, { "epoch": 2.2244752757025967, "grad_norm": 0.6927880533800067, "learning_rate": 2.168053112580699e-05, "loss": 0.0987, "step": 18759 }, { "epoch": 2.224593857464722, "grad_norm": 0.5264706318452989, "learning_rate": 2.1678152033489525e-05, "loss": 0.0845, "step": 18760 }, { "epoch": 2.224712439226847, "grad_norm": 0.7547232201709428, "learning_rate": 2.1675772971795412e-05, "loss": 0.1234, "step": 18761 }, { "epoch": 2.224831020988972, "grad_norm": 0.5581102517391483, "learning_rate": 2.1673393940746608e-05, "loss": 0.0712, "step": 18762 }, { "epoch": 2.2249496027510967, "grad_norm": 0.9319771514438503, "learning_rate": 2.1671014940365036e-05, "loss": 0.1636, "step": 18763 }, { "epoch": 2.225068184513222, "grad_norm": 0.8595773018880462, "learning_rate": 2.1668635970672622e-05, "loss": 0.1056, "step": 18764 }, { "epoch": 2.225186766275347, "grad_norm": 0.7048883200320013, "learning_rate": 2.1666257031691286e-05, "loss": 0.1132, "step": 18765 }, { "epoch": 2.225305348037472, "grad_norm": 0.8996344855583275, "learning_rate": 2.1663878123442984e-05, "loss": 0.1061, "step": 18766 }, { "epoch": 2.225423929799597, "grad_norm": 0.8797930047428949, "learning_rate": 2.166149924594963e-05, "loss": 0.1033, "step": 18767 }, { "epoch": 2.225542511561722, "grad_norm": 0.603600491775206, "learning_rate": 2.1659120399233166e-05, "loss": 0.0944, "step": 18768 }, { "epoch": 2.225661093323847, "grad_norm": 0.7087980287441159, "learning_rate": 2.16567415833155e-05, "loss": 0.0818, "step": 18769 }, { "epoch": 2.2257796750859717, "grad_norm": 0.8365635078548959, "learning_rate": 2.165436279821859e-05, "loss": 0.1226, "step": 18770 }, { "epoch": 2.225898256848097, "grad_norm": 0.6541044937624289, "learning_rate": 2.1651984043964356e-05, "loss": 0.0826, "step": 18771 }, { "epoch": 2.2260168386102217, "grad_norm": 0.5660494797102648, "learning_rate": 2.1649605320574717e-05, "loss": 0.0721, "step": 18772 }, { "epoch": 2.226135420372347, "grad_norm": 0.6366593759643671, "learning_rate": 2.1647226628071603e-05, "loss": 0.0908, "step": 18773 }, { "epoch": 2.2262540021344717, "grad_norm": 0.7773936632106131, "learning_rate": 2.1644847966476955e-05, "loss": 0.1007, "step": 18774 }, { "epoch": 2.226372583896597, "grad_norm": 0.7805968138256604, "learning_rate": 2.1642469335812697e-05, "loss": 0.0819, "step": 18775 }, { "epoch": 2.2264911656587216, "grad_norm": 0.7487688021158309, "learning_rate": 2.1640090736100744e-05, "loss": 0.1095, "step": 18776 }, { "epoch": 2.226609747420847, "grad_norm": 0.7873243018571832, "learning_rate": 2.1637712167363042e-05, "loss": 0.0948, "step": 18777 }, { "epoch": 2.2267283291829716, "grad_norm": 0.5833673606835681, "learning_rate": 2.1635333629621507e-05, "loss": 0.0867, "step": 18778 }, { "epoch": 2.226846910945097, "grad_norm": 0.7736650311829509, "learning_rate": 2.1632955122898075e-05, "loss": 0.0986, "step": 18779 }, { "epoch": 2.2269654927072216, "grad_norm": 0.6152818324151181, "learning_rate": 2.1630576647214657e-05, "loss": 0.0838, "step": 18780 }, { "epoch": 2.2270840744693468, "grad_norm": 0.693872274101368, "learning_rate": 2.1628198202593203e-05, "loss": 0.083, "step": 18781 }, { "epoch": 2.2272026562314715, "grad_norm": 0.8169361671615917, "learning_rate": 2.1625819789055623e-05, "loss": 0.1012, "step": 18782 }, { "epoch": 2.2273212379935967, "grad_norm": 0.6628543288112296, "learning_rate": 2.162344140662385e-05, "loss": 0.0642, "step": 18783 }, { "epoch": 2.2274398197557215, "grad_norm": 0.6511974274077093, "learning_rate": 2.1621063055319797e-05, "loss": 0.084, "step": 18784 }, { "epoch": 2.2275584015178467, "grad_norm": 0.6247431062310503, "learning_rate": 2.1618684735165407e-05, "loss": 0.0893, "step": 18785 }, { "epoch": 2.2276769832799714, "grad_norm": 0.609766894539109, "learning_rate": 2.1616306446182592e-05, "loss": 0.0756, "step": 18786 }, { "epoch": 2.2277955650420966, "grad_norm": 0.5695849437434747, "learning_rate": 2.161392818839329e-05, "loss": 0.0749, "step": 18787 }, { "epoch": 2.2279141468042214, "grad_norm": 0.787348347238981, "learning_rate": 2.1611549961819405e-05, "loss": 0.0951, "step": 18788 }, { "epoch": 2.2280327285663466, "grad_norm": 0.6905358469519165, "learning_rate": 2.1609171766482882e-05, "loss": 0.0795, "step": 18789 }, { "epoch": 2.2281513103284714, "grad_norm": 1.0354238292423859, "learning_rate": 2.1606793602405643e-05, "loss": 0.125, "step": 18790 }, { "epoch": 2.2282698920905966, "grad_norm": 0.8314321206602734, "learning_rate": 2.160441546960959e-05, "loss": 0.1337, "step": 18791 }, { "epoch": 2.2283884738527213, "grad_norm": 1.1733671730858974, "learning_rate": 2.160203736811668e-05, "loss": 0.1607, "step": 18792 }, { "epoch": 2.2285070556148465, "grad_norm": 0.8593392276768198, "learning_rate": 2.1599659297948813e-05, "loss": 0.1098, "step": 18793 }, { "epoch": 2.2286256373769713, "grad_norm": 0.9183073058252581, "learning_rate": 2.159728125912792e-05, "loss": 0.1323, "step": 18794 }, { "epoch": 2.2287442191390965, "grad_norm": 0.7059799461980566, "learning_rate": 2.159490325167591e-05, "loss": 0.107, "step": 18795 }, { "epoch": 2.2288628009012212, "grad_norm": 0.8024018376737095, "learning_rate": 2.1592525275614725e-05, "loss": 0.1165, "step": 18796 }, { "epoch": 2.2289813826633464, "grad_norm": 0.7223505266758906, "learning_rate": 2.1590147330966273e-05, "loss": 0.1147, "step": 18797 }, { "epoch": 2.229099964425471, "grad_norm": 0.6282650319680421, "learning_rate": 2.158776941775249e-05, "loss": 0.0672, "step": 18798 }, { "epoch": 2.2292185461875964, "grad_norm": 0.6831447982654134, "learning_rate": 2.1585391535995277e-05, "loss": 0.0955, "step": 18799 }, { "epoch": 2.229337127949721, "grad_norm": 0.7273066142755623, "learning_rate": 2.1583013685716575e-05, "loss": 0.0838, "step": 18800 }, { "epoch": 2.2294557097118464, "grad_norm": 0.7426825357542508, "learning_rate": 2.1580635866938296e-05, "loss": 0.0924, "step": 18801 }, { "epoch": 2.229574291473971, "grad_norm": 0.7319850654352308, "learning_rate": 2.1578258079682366e-05, "loss": 0.1012, "step": 18802 }, { "epoch": 2.2296928732360963, "grad_norm": 0.5214485258605044, "learning_rate": 2.1575880323970688e-05, "loss": 0.0756, "step": 18803 }, { "epoch": 2.229811454998221, "grad_norm": 0.6081553509926184, "learning_rate": 2.15735025998252e-05, "loss": 0.0854, "step": 18804 }, { "epoch": 2.2299300367603463, "grad_norm": 0.8328187106150088, "learning_rate": 2.157112490726782e-05, "loss": 0.1084, "step": 18805 }, { "epoch": 2.230048618522471, "grad_norm": 0.6132669417220075, "learning_rate": 2.1568747246320455e-05, "loss": 0.0813, "step": 18806 }, { "epoch": 2.2301672002845963, "grad_norm": 0.7922557391904104, "learning_rate": 2.1566369617005046e-05, "loss": 0.1045, "step": 18807 }, { "epoch": 2.230285782046721, "grad_norm": 0.7801383735906623, "learning_rate": 2.156399201934349e-05, "loss": 0.1239, "step": 18808 }, { "epoch": 2.230404363808846, "grad_norm": 0.7151482338731723, "learning_rate": 2.1561614453357714e-05, "loss": 0.0854, "step": 18809 }, { "epoch": 2.2305229455709714, "grad_norm": 0.9638759466985427, "learning_rate": 2.155923691906963e-05, "loss": 0.1122, "step": 18810 }, { "epoch": 2.230641527333096, "grad_norm": 0.7750739485000125, "learning_rate": 2.1556859416501175e-05, "loss": 0.1076, "step": 18811 }, { "epoch": 2.230760109095221, "grad_norm": 0.8145145509739613, "learning_rate": 2.155448194567425e-05, "loss": 0.1302, "step": 18812 }, { "epoch": 2.230878690857346, "grad_norm": 0.9073695748759717, "learning_rate": 2.155210450661078e-05, "loss": 0.1367, "step": 18813 }, { "epoch": 2.2309972726194713, "grad_norm": 1.1816394577260945, "learning_rate": 2.1549727099332664e-05, "loss": 0.1566, "step": 18814 }, { "epoch": 2.231115854381596, "grad_norm": 0.8331976078740441, "learning_rate": 2.1547349723861848e-05, "loss": 0.1104, "step": 18815 }, { "epoch": 2.2312344361437213, "grad_norm": 0.6937543285825699, "learning_rate": 2.1544972380220223e-05, "loss": 0.1014, "step": 18816 }, { "epoch": 2.231353017905846, "grad_norm": 0.7294410537668036, "learning_rate": 2.154259506842972e-05, "loss": 0.0954, "step": 18817 }, { "epoch": 2.2314715996679713, "grad_norm": 0.5338344444228786, "learning_rate": 2.154021778851225e-05, "loss": 0.0793, "step": 18818 }, { "epoch": 2.231590181430096, "grad_norm": 0.9035805176108189, "learning_rate": 2.1537840540489733e-05, "loss": 0.1282, "step": 18819 }, { "epoch": 2.2317087631922212, "grad_norm": 0.7050253398578582, "learning_rate": 2.1535463324384084e-05, "loss": 0.1095, "step": 18820 }, { "epoch": 2.231827344954346, "grad_norm": 0.9032088897333036, "learning_rate": 2.15330861402172e-05, "loss": 0.1387, "step": 18821 }, { "epoch": 2.231945926716471, "grad_norm": 0.6553096511908723, "learning_rate": 2.1530708988011023e-05, "loss": 0.095, "step": 18822 }, { "epoch": 2.232064508478596, "grad_norm": 0.8031440439437122, "learning_rate": 2.1528331867787453e-05, "loss": 0.1144, "step": 18823 }, { "epoch": 2.232183090240721, "grad_norm": 0.7182569189548866, "learning_rate": 2.152595477956841e-05, "loss": 0.0861, "step": 18824 }, { "epoch": 2.232301672002846, "grad_norm": 0.6915433891118755, "learning_rate": 2.152357772337579e-05, "loss": 0.0833, "step": 18825 }, { "epoch": 2.232420253764971, "grad_norm": 0.7490665367998338, "learning_rate": 2.1521200699231532e-05, "loss": 0.0943, "step": 18826 }, { "epoch": 2.232538835527096, "grad_norm": 1.2242729249336664, "learning_rate": 2.1518823707157532e-05, "loss": 0.1634, "step": 18827 }, { "epoch": 2.232657417289221, "grad_norm": 1.133322763656763, "learning_rate": 2.1516446747175713e-05, "loss": 0.1498, "step": 18828 }, { "epoch": 2.232775999051346, "grad_norm": 0.9999142839799646, "learning_rate": 2.1514069819307976e-05, "loss": 0.1121, "step": 18829 }, { "epoch": 2.232894580813471, "grad_norm": 0.8495200033038777, "learning_rate": 2.1511692923576246e-05, "loss": 0.1115, "step": 18830 }, { "epoch": 2.233013162575596, "grad_norm": 0.662889091394239, "learning_rate": 2.1509316060002433e-05, "loss": 0.0977, "step": 18831 }, { "epoch": 2.233131744337721, "grad_norm": 0.8262139496396805, "learning_rate": 2.150693922860843e-05, "loss": 0.1093, "step": 18832 }, { "epoch": 2.2332503260998458, "grad_norm": 0.8990809309299874, "learning_rate": 2.150456242941618e-05, "loss": 0.0928, "step": 18833 }, { "epoch": 2.233368907861971, "grad_norm": 0.9379962875256497, "learning_rate": 2.1502185662447576e-05, "loss": 0.1235, "step": 18834 }, { "epoch": 2.2334874896240957, "grad_norm": 0.7531394175953882, "learning_rate": 2.149980892772453e-05, "loss": 0.1111, "step": 18835 }, { "epoch": 2.233606071386221, "grad_norm": 0.7997052703584738, "learning_rate": 2.1497432225268947e-05, "loss": 0.0936, "step": 18836 }, { "epoch": 2.2337246531483457, "grad_norm": 0.7761639998283222, "learning_rate": 2.1495055555102748e-05, "loss": 0.1171, "step": 18837 }, { "epoch": 2.233843234910471, "grad_norm": 0.6841340464595771, "learning_rate": 2.149267891724784e-05, "loss": 0.0957, "step": 18838 }, { "epoch": 2.2339618166725956, "grad_norm": 0.7271188037776125, "learning_rate": 2.1490302311726135e-05, "loss": 0.113, "step": 18839 }, { "epoch": 2.234080398434721, "grad_norm": 0.5344064706108158, "learning_rate": 2.1487925738559526e-05, "loss": 0.0826, "step": 18840 }, { "epoch": 2.2341989801968456, "grad_norm": 0.755979095229751, "learning_rate": 2.1485549197769945e-05, "loss": 0.0979, "step": 18841 }, { "epoch": 2.234317561958971, "grad_norm": 0.7890720888992585, "learning_rate": 2.1483172689379294e-05, "loss": 0.1094, "step": 18842 }, { "epoch": 2.2344361437210956, "grad_norm": 0.7907479320712044, "learning_rate": 2.1480796213409475e-05, "loss": 0.1014, "step": 18843 }, { "epoch": 2.2345547254832208, "grad_norm": 0.6358448794271879, "learning_rate": 2.147841976988239e-05, "loss": 0.084, "step": 18844 }, { "epoch": 2.2346733072453455, "grad_norm": 0.7735682252406759, "learning_rate": 2.1476043358819965e-05, "loss": 0.1144, "step": 18845 }, { "epoch": 2.2347918890074707, "grad_norm": 0.875111801265609, "learning_rate": 2.14736669802441e-05, "loss": 0.0965, "step": 18846 }, { "epoch": 2.2349104707695955, "grad_norm": 0.7379134265865817, "learning_rate": 2.1471290634176693e-05, "loss": 0.0984, "step": 18847 }, { "epoch": 2.2350290525317207, "grad_norm": 0.6718381116612439, "learning_rate": 2.1468914320639662e-05, "loss": 0.0946, "step": 18848 }, { "epoch": 2.2351476342938454, "grad_norm": 0.7589504529752553, "learning_rate": 2.1466538039654917e-05, "loss": 0.0947, "step": 18849 }, { "epoch": 2.2352662160559706, "grad_norm": 0.6800429576995427, "learning_rate": 2.146416179124436e-05, "loss": 0.1116, "step": 18850 }, { "epoch": 2.2353847978180954, "grad_norm": 0.7833938571961641, "learning_rate": 2.1461785575429882e-05, "loss": 0.1013, "step": 18851 }, { "epoch": 2.2355033795802206, "grad_norm": 0.5454432007354336, "learning_rate": 2.1459409392233414e-05, "loss": 0.0746, "step": 18852 }, { "epoch": 2.2356219613423454, "grad_norm": 0.6861710359905958, "learning_rate": 2.1457033241676848e-05, "loss": 0.0745, "step": 18853 }, { "epoch": 2.2357405431044706, "grad_norm": 0.6380638803104269, "learning_rate": 2.1454657123782094e-05, "loss": 0.0828, "step": 18854 }, { "epoch": 2.2358591248665953, "grad_norm": 0.6411095310624352, "learning_rate": 2.145228103857104e-05, "loss": 0.088, "step": 18855 }, { "epoch": 2.2359777066287205, "grad_norm": 0.8277616572126475, "learning_rate": 2.1449904986065617e-05, "loss": 0.1116, "step": 18856 }, { "epoch": 2.2360962883908453, "grad_norm": 0.8441712303381849, "learning_rate": 2.144752896628771e-05, "loss": 0.092, "step": 18857 }, { "epoch": 2.2362148701529705, "grad_norm": 0.8235665871619438, "learning_rate": 2.1445152979259237e-05, "loss": 0.1193, "step": 18858 }, { "epoch": 2.2363334519150957, "grad_norm": 0.747603940495877, "learning_rate": 2.144277702500208e-05, "loss": 0.0924, "step": 18859 }, { "epoch": 2.2364520336772205, "grad_norm": 0.6985403316308801, "learning_rate": 2.1440401103538172e-05, "loss": 0.1115, "step": 18860 }, { "epoch": 2.236570615439345, "grad_norm": 0.5831015556023748, "learning_rate": 2.1438025214889395e-05, "loss": 0.0683, "step": 18861 }, { "epoch": 2.2366891972014704, "grad_norm": 0.7646415199579337, "learning_rate": 2.143564935907765e-05, "loss": 0.1064, "step": 18862 }, { "epoch": 2.2368077789635956, "grad_norm": 0.9318470984950791, "learning_rate": 2.143327353612486e-05, "loss": 0.1177, "step": 18863 }, { "epoch": 2.2369263607257204, "grad_norm": 0.7586036405627115, "learning_rate": 2.1430897746052912e-05, "loss": 0.0942, "step": 18864 }, { "epoch": 2.237044942487845, "grad_norm": 0.8410941835501364, "learning_rate": 2.1428521988883706e-05, "loss": 0.1103, "step": 18865 }, { "epoch": 2.2371635242499703, "grad_norm": 0.5670953140084716, "learning_rate": 2.1426146264639145e-05, "loss": 0.0661, "step": 18866 }, { "epoch": 2.2372821060120955, "grad_norm": 0.6849696905241878, "learning_rate": 2.1423770573341133e-05, "loss": 0.1142, "step": 18867 }, { "epoch": 2.2374006877742203, "grad_norm": 1.0178732364438488, "learning_rate": 2.1421394915011578e-05, "loss": 0.1404, "step": 18868 }, { "epoch": 2.2375192695363455, "grad_norm": 0.7417392836522756, "learning_rate": 2.141901928967237e-05, "loss": 0.1105, "step": 18869 }, { "epoch": 2.2376378512984703, "grad_norm": 0.8306179505585711, "learning_rate": 2.1416643697345405e-05, "loss": 0.0786, "step": 18870 }, { "epoch": 2.2377564330605955, "grad_norm": 0.8543598082115276, "learning_rate": 2.1414268138052603e-05, "loss": 0.0845, "step": 18871 }, { "epoch": 2.23787501482272, "grad_norm": 0.6520212588362649, "learning_rate": 2.141189261181585e-05, "loss": 0.1006, "step": 18872 }, { "epoch": 2.2379935965848454, "grad_norm": 0.5386387311043194, "learning_rate": 2.1409517118657046e-05, "loss": 0.0788, "step": 18873 }, { "epoch": 2.23811217834697, "grad_norm": 0.6834238086064753, "learning_rate": 2.1407141658598084e-05, "loss": 0.0774, "step": 18874 }, { "epoch": 2.2382307601090954, "grad_norm": 0.7645859343333278, "learning_rate": 2.1404766231660878e-05, "loss": 0.0951, "step": 18875 }, { "epoch": 2.23834934187122, "grad_norm": 0.7661819972371129, "learning_rate": 2.1402390837867315e-05, "loss": 0.1138, "step": 18876 }, { "epoch": 2.2384679236333453, "grad_norm": 0.6632013097506498, "learning_rate": 2.1400015477239294e-05, "loss": 0.1125, "step": 18877 }, { "epoch": 2.23858650539547, "grad_norm": 0.7099746074185574, "learning_rate": 2.1397640149798718e-05, "loss": 0.0879, "step": 18878 }, { "epoch": 2.2387050871575953, "grad_norm": 0.6229539869285571, "learning_rate": 2.139526485556749e-05, "loss": 0.1106, "step": 18879 }, { "epoch": 2.23882366891972, "grad_norm": 0.7134464996813806, "learning_rate": 2.1392889594567496e-05, "loss": 0.1187, "step": 18880 }, { "epoch": 2.2389422506818453, "grad_norm": 0.6717632038325828, "learning_rate": 2.1390514366820624e-05, "loss": 0.102, "step": 18881 }, { "epoch": 2.23906083244397, "grad_norm": 0.5955593377091798, "learning_rate": 2.13881391723488e-05, "loss": 0.083, "step": 18882 }, { "epoch": 2.2391794142060952, "grad_norm": 0.8472397525940243, "learning_rate": 2.13857640111739e-05, "loss": 0.1122, "step": 18883 }, { "epoch": 2.23929799596822, "grad_norm": 0.9843788050704898, "learning_rate": 2.138338888331782e-05, "loss": 0.1483, "step": 18884 }, { "epoch": 2.239416577730345, "grad_norm": 0.7358397207627863, "learning_rate": 2.1381013788802454e-05, "loss": 0.1018, "step": 18885 }, { "epoch": 2.23953515949247, "grad_norm": 0.6440591621804126, "learning_rate": 2.1378638727649714e-05, "loss": 0.0997, "step": 18886 }, { "epoch": 2.239653741254595, "grad_norm": 0.7018834654611592, "learning_rate": 2.1376263699881474e-05, "loss": 0.0827, "step": 18887 }, { "epoch": 2.23977232301672, "grad_norm": 0.5952155952241978, "learning_rate": 2.1373888705519643e-05, "loss": 0.0968, "step": 18888 }, { "epoch": 2.239890904778845, "grad_norm": 0.6913547250129303, "learning_rate": 2.137151374458611e-05, "loss": 0.0896, "step": 18889 }, { "epoch": 2.24000948654097, "grad_norm": 0.6219516172228303, "learning_rate": 2.1369138817102776e-05, "loss": 0.0857, "step": 18890 }, { "epoch": 2.240128068303095, "grad_norm": 0.6545288669349081, "learning_rate": 2.136676392309153e-05, "loss": 0.0909, "step": 18891 }, { "epoch": 2.24024665006522, "grad_norm": 0.6263711047690557, "learning_rate": 2.136438906257426e-05, "loss": 0.088, "step": 18892 }, { "epoch": 2.240365231827345, "grad_norm": 0.7130280433219129, "learning_rate": 2.1362014235572868e-05, "loss": 0.1028, "step": 18893 }, { "epoch": 2.24048381358947, "grad_norm": 0.7952716289697666, "learning_rate": 2.1359639442109245e-05, "loss": 0.0793, "step": 18894 }, { "epoch": 2.240602395351595, "grad_norm": 0.8678019998458418, "learning_rate": 2.1357264682205277e-05, "loss": 0.1181, "step": 18895 }, { "epoch": 2.2407209771137198, "grad_norm": 0.9001229098397029, "learning_rate": 2.1354889955882862e-05, "loss": 0.1153, "step": 18896 }, { "epoch": 2.240839558875845, "grad_norm": 0.599454135667106, "learning_rate": 2.135251526316389e-05, "loss": 0.084, "step": 18897 }, { "epoch": 2.2409581406379697, "grad_norm": 1.3688572755857746, "learning_rate": 2.1350140604070264e-05, "loss": 0.243, "step": 18898 }, { "epoch": 2.241076722400095, "grad_norm": 0.8259674590773197, "learning_rate": 2.1347765978623863e-05, "loss": 0.1063, "step": 18899 }, { "epoch": 2.2411953041622197, "grad_norm": 0.6359413094814956, "learning_rate": 2.1345391386846574e-05, "loss": 0.0935, "step": 18900 }, { "epoch": 2.241313885924345, "grad_norm": 0.70845256581606, "learning_rate": 2.13430168287603e-05, "loss": 0.085, "step": 18901 }, { "epoch": 2.2414324676864696, "grad_norm": 1.0382199812724466, "learning_rate": 2.134064230438693e-05, "loss": 0.1304, "step": 18902 }, { "epoch": 2.241551049448595, "grad_norm": 0.8277205927541627, "learning_rate": 2.1338267813748335e-05, "loss": 0.0987, "step": 18903 }, { "epoch": 2.2416696312107196, "grad_norm": 0.8075284445579336, "learning_rate": 2.1335893356866437e-05, "loss": 0.1025, "step": 18904 }, { "epoch": 2.241788212972845, "grad_norm": 0.8756098676671197, "learning_rate": 2.1333518933763108e-05, "loss": 0.1143, "step": 18905 }, { "epoch": 2.2419067947349696, "grad_norm": 0.8035422139112733, "learning_rate": 2.133114454446023e-05, "loss": 0.0828, "step": 18906 }, { "epoch": 2.2420253764970948, "grad_norm": 0.9547125371853618, "learning_rate": 2.13287701889797e-05, "loss": 0.1474, "step": 18907 }, { "epoch": 2.2421439582592195, "grad_norm": 0.48853763498941616, "learning_rate": 2.1326395867343414e-05, "loss": 0.075, "step": 18908 }, { "epoch": 2.2422625400213447, "grad_norm": 0.7224447801368307, "learning_rate": 2.132402157957325e-05, "loss": 0.0956, "step": 18909 }, { "epoch": 2.2423811217834695, "grad_norm": 0.47093722074855326, "learning_rate": 2.1321647325691105e-05, "loss": 0.0737, "step": 18910 }, { "epoch": 2.2424997035455947, "grad_norm": 0.6743478127427879, "learning_rate": 2.1319273105718847e-05, "loss": 0.0899, "step": 18911 }, { "epoch": 2.24261828530772, "grad_norm": 0.69067812164709, "learning_rate": 2.131689891967839e-05, "loss": 0.0889, "step": 18912 }, { "epoch": 2.2427368670698447, "grad_norm": 0.7939170693137828, "learning_rate": 2.1314524767591607e-05, "loss": 0.1069, "step": 18913 }, { "epoch": 2.2428554488319694, "grad_norm": 0.8261261696780542, "learning_rate": 2.131215064948038e-05, "loss": 0.1189, "step": 18914 }, { "epoch": 2.2429740305940946, "grad_norm": 0.6709396937455359, "learning_rate": 2.1309776565366603e-05, "loss": 0.0775, "step": 18915 }, { "epoch": 2.24309261235622, "grad_norm": 0.8673974138933519, "learning_rate": 2.1307402515272162e-05, "loss": 0.1319, "step": 18916 }, { "epoch": 2.2432111941183446, "grad_norm": 1.022985165412336, "learning_rate": 2.1305028499218946e-05, "loss": 0.1237, "step": 18917 }, { "epoch": 2.24332977588047, "grad_norm": 0.6265655882377251, "learning_rate": 2.1302654517228827e-05, "loss": 0.0963, "step": 18918 }, { "epoch": 2.2434483576425945, "grad_norm": 0.807205876697594, "learning_rate": 2.1300280569323707e-05, "loss": 0.1129, "step": 18919 }, { "epoch": 2.2435669394047197, "grad_norm": 1.1017051998758185, "learning_rate": 2.1297906655525464e-05, "loss": 0.1503, "step": 18920 }, { "epoch": 2.2436855211668445, "grad_norm": 0.7684624117905947, "learning_rate": 2.1295532775855983e-05, "loss": 0.1096, "step": 18921 }, { "epoch": 2.2438041029289697, "grad_norm": 0.5204420018695394, "learning_rate": 2.1293158930337136e-05, "loss": 0.08, "step": 18922 }, { "epoch": 2.2439226846910945, "grad_norm": 1.0760012758866255, "learning_rate": 2.129078511899083e-05, "loss": 0.1941, "step": 18923 }, { "epoch": 2.2440412664532197, "grad_norm": 0.5774032878535313, "learning_rate": 2.1288411341838934e-05, "loss": 0.0731, "step": 18924 }, { "epoch": 2.2441598482153444, "grad_norm": 0.7014611983506263, "learning_rate": 2.128603759890333e-05, "loss": 0.1102, "step": 18925 }, { "epoch": 2.2442784299774696, "grad_norm": 0.7083999328470206, "learning_rate": 2.1283663890205905e-05, "loss": 0.0918, "step": 18926 }, { "epoch": 2.2443970117395944, "grad_norm": 0.5775466347580386, "learning_rate": 2.128129021576854e-05, "loss": 0.0846, "step": 18927 }, { "epoch": 2.2445155935017196, "grad_norm": 0.5489275259153773, "learning_rate": 2.127891657561313e-05, "loss": 0.0874, "step": 18928 }, { "epoch": 2.2446341752638443, "grad_norm": 0.7412139072646254, "learning_rate": 2.127654296976154e-05, "loss": 0.0916, "step": 18929 }, { "epoch": 2.2447527570259695, "grad_norm": 0.6859437068761141, "learning_rate": 2.127416939823565e-05, "loss": 0.1018, "step": 18930 }, { "epoch": 2.2448713387880943, "grad_norm": 0.7469130187199577, "learning_rate": 2.1271795861057358e-05, "loss": 0.1051, "step": 18931 }, { "epoch": 2.2449899205502195, "grad_norm": 0.8873685255282718, "learning_rate": 2.1269422358248536e-05, "loss": 0.1223, "step": 18932 }, { "epoch": 2.2451085023123443, "grad_norm": 0.9610368627359948, "learning_rate": 2.1267048889831055e-05, "loss": 0.1028, "step": 18933 }, { "epoch": 2.2452270840744695, "grad_norm": 0.5981277406930261, "learning_rate": 2.1264675455826818e-05, "loss": 0.0922, "step": 18934 }, { "epoch": 2.2453456658365942, "grad_norm": 0.6826640567441972, "learning_rate": 2.1262302056257692e-05, "loss": 0.108, "step": 18935 }, { "epoch": 2.2454642475987194, "grad_norm": 0.6461657561206243, "learning_rate": 2.1259928691145552e-05, "loss": 0.0999, "step": 18936 }, { "epoch": 2.245582829360844, "grad_norm": 0.8322745827744034, "learning_rate": 2.125755536051228e-05, "loss": 0.1181, "step": 18937 }, { "epoch": 2.2457014111229694, "grad_norm": 0.9613591938960737, "learning_rate": 2.1255182064379763e-05, "loss": 0.1336, "step": 18938 }, { "epoch": 2.245819992885094, "grad_norm": 0.8883616601047762, "learning_rate": 2.125280880276988e-05, "loss": 0.089, "step": 18939 }, { "epoch": 2.2459385746472194, "grad_norm": 0.563186814226707, "learning_rate": 2.1250435575704504e-05, "loss": 0.0893, "step": 18940 }, { "epoch": 2.246057156409344, "grad_norm": 0.6873457316351242, "learning_rate": 2.12480623832055e-05, "loss": 0.0844, "step": 18941 }, { "epoch": 2.2461757381714693, "grad_norm": 0.7606682796626696, "learning_rate": 2.1245689225294778e-05, "loss": 0.1063, "step": 18942 }, { "epoch": 2.246294319933594, "grad_norm": 0.7517825449773925, "learning_rate": 2.1243316101994192e-05, "loss": 0.1097, "step": 18943 }, { "epoch": 2.2464129016957193, "grad_norm": 0.699587381222274, "learning_rate": 2.124094301332562e-05, "loss": 0.0892, "step": 18944 }, { "epoch": 2.246531483457844, "grad_norm": 0.7711457725122731, "learning_rate": 2.1238569959310944e-05, "loss": 0.1074, "step": 18945 }, { "epoch": 2.2466500652199692, "grad_norm": 0.7756336214677487, "learning_rate": 2.1236196939972042e-05, "loss": 0.1097, "step": 18946 }, { "epoch": 2.246768646982094, "grad_norm": 0.8654655698138872, "learning_rate": 2.123382395533079e-05, "loss": 0.1148, "step": 18947 }, { "epoch": 2.246887228744219, "grad_norm": 1.1143090698948723, "learning_rate": 2.1231451005409055e-05, "loss": 0.1679, "step": 18948 }, { "epoch": 2.247005810506344, "grad_norm": 0.7503619400576503, "learning_rate": 2.122907809022873e-05, "loss": 0.1159, "step": 18949 }, { "epoch": 2.247124392268469, "grad_norm": 0.5276034584792226, "learning_rate": 2.1226705209811686e-05, "loss": 0.0754, "step": 18950 }, { "epoch": 2.247242974030594, "grad_norm": 0.6056072309389757, "learning_rate": 2.122433236417979e-05, "loss": 0.0681, "step": 18951 }, { "epoch": 2.247361555792719, "grad_norm": 0.8157470344104641, "learning_rate": 2.1221959553354904e-05, "loss": 0.1008, "step": 18952 }, { "epoch": 2.247480137554844, "grad_norm": 0.9269273638921808, "learning_rate": 2.1219586777358932e-05, "loss": 0.1545, "step": 18953 }, { "epoch": 2.247598719316969, "grad_norm": 0.43468208959259647, "learning_rate": 2.1217214036213734e-05, "loss": 0.0601, "step": 18954 }, { "epoch": 2.247717301079094, "grad_norm": 0.87788732550073, "learning_rate": 2.121484132994118e-05, "loss": 0.0789, "step": 18955 }, { "epoch": 2.247835882841219, "grad_norm": 1.0342873096727407, "learning_rate": 2.1212468658563146e-05, "loss": 0.0945, "step": 18956 }, { "epoch": 2.247954464603344, "grad_norm": 0.45864633357910395, "learning_rate": 2.1210096022101507e-05, "loss": 0.0602, "step": 18957 }, { "epoch": 2.248073046365469, "grad_norm": 0.7505465678781328, "learning_rate": 2.1207723420578136e-05, "loss": 0.095, "step": 18958 }, { "epoch": 2.2481916281275938, "grad_norm": 0.5777824008204963, "learning_rate": 2.120535085401491e-05, "loss": 0.0759, "step": 18959 }, { "epoch": 2.248310209889719, "grad_norm": 0.5415957483720728, "learning_rate": 2.1202978322433683e-05, "loss": 0.0789, "step": 18960 }, { "epoch": 2.248428791651844, "grad_norm": 0.744181198473498, "learning_rate": 2.120060582585635e-05, "loss": 0.0787, "step": 18961 }, { "epoch": 2.248547373413969, "grad_norm": 0.6184984687924054, "learning_rate": 2.1198233364304774e-05, "loss": 0.0875, "step": 18962 }, { "epoch": 2.2486659551760937, "grad_norm": 0.9848627018416894, "learning_rate": 2.1195860937800817e-05, "loss": 0.1276, "step": 18963 }, { "epoch": 2.248784536938219, "grad_norm": 0.6739047177239939, "learning_rate": 2.119348854636636e-05, "loss": 0.0723, "step": 18964 }, { "epoch": 2.248903118700344, "grad_norm": 0.5664251003846466, "learning_rate": 2.1191116190023268e-05, "loss": 0.078, "step": 18965 }, { "epoch": 2.249021700462469, "grad_norm": 0.6816252722221686, "learning_rate": 2.1188743868793424e-05, "loss": 0.0867, "step": 18966 }, { "epoch": 2.249140282224594, "grad_norm": 0.8814507342848475, "learning_rate": 2.1186371582698672e-05, "loss": 0.1254, "step": 18967 }, { "epoch": 2.249258863986719, "grad_norm": 0.7701764114053672, "learning_rate": 2.1183999331760908e-05, "loss": 0.0853, "step": 18968 }, { "epoch": 2.249377445748844, "grad_norm": 0.8526434642784664, "learning_rate": 2.1181627116001997e-05, "loss": 0.1219, "step": 18969 }, { "epoch": 2.2494960275109688, "grad_norm": 0.5323647228391825, "learning_rate": 2.1179254935443793e-05, "loss": 0.0594, "step": 18970 }, { "epoch": 2.249614609273094, "grad_norm": 0.8569539658646298, "learning_rate": 2.1176882790108167e-05, "loss": 0.1509, "step": 18971 }, { "epoch": 2.2497331910352187, "grad_norm": 0.658287323596999, "learning_rate": 2.1174510680017003e-05, "loss": 0.1064, "step": 18972 }, { "epoch": 2.249851772797344, "grad_norm": 0.7188078596127473, "learning_rate": 2.117213860519216e-05, "loss": 0.0781, "step": 18973 }, { "epoch": 2.2499703545594687, "grad_norm": 0.6536073128512936, "learning_rate": 2.1169766565655496e-05, "loss": 0.0818, "step": 18974 }, { "epoch": 2.250088936321594, "grad_norm": 0.5811204961292361, "learning_rate": 2.1167394561428898e-05, "loss": 0.0623, "step": 18975 }, { "epoch": 2.2502075180837187, "grad_norm": 0.9489249789868841, "learning_rate": 2.1165022592534212e-05, "loss": 0.1291, "step": 18976 }, { "epoch": 2.250326099845844, "grad_norm": 0.6894935899382619, "learning_rate": 2.1162650658993323e-05, "loss": 0.0872, "step": 18977 }, { "epoch": 2.2504446816079686, "grad_norm": 0.9497802059269165, "learning_rate": 2.116027876082808e-05, "loss": 0.1596, "step": 18978 }, { "epoch": 2.250563263370094, "grad_norm": 0.5695756401563482, "learning_rate": 2.115790689806037e-05, "loss": 0.0776, "step": 18979 }, { "epoch": 2.2506818451322186, "grad_norm": 0.773402356391573, "learning_rate": 2.1155535070712042e-05, "loss": 0.1206, "step": 18980 }, { "epoch": 2.250800426894344, "grad_norm": 0.6024278135369523, "learning_rate": 2.1153163278804972e-05, "loss": 0.0781, "step": 18981 }, { "epoch": 2.2509190086564685, "grad_norm": 0.7188108336265119, "learning_rate": 2.1150791522361003e-05, "loss": 0.0939, "step": 18982 }, { "epoch": 2.2510375904185937, "grad_norm": 0.6320656859745113, "learning_rate": 2.114841980140203e-05, "loss": 0.0955, "step": 18983 }, { "epoch": 2.2511561721807185, "grad_norm": 0.5944094817620409, "learning_rate": 2.1146048115949905e-05, "loss": 0.0792, "step": 18984 }, { "epoch": 2.2512747539428437, "grad_norm": 0.8517744893713042, "learning_rate": 2.1143676466026482e-05, "loss": 0.1172, "step": 18985 }, { "epoch": 2.2513933357049685, "grad_norm": 1.3934710464685394, "learning_rate": 2.1141304851653635e-05, "loss": 0.1572, "step": 18986 }, { "epoch": 2.2515119174670937, "grad_norm": 0.7979234048921818, "learning_rate": 2.1138933272853224e-05, "loss": 0.1066, "step": 18987 }, { "epoch": 2.2516304992292184, "grad_norm": 0.9094071662287244, "learning_rate": 2.113656172964712e-05, "loss": 0.1265, "step": 18988 }, { "epoch": 2.2517490809913436, "grad_norm": 0.7743089794025334, "learning_rate": 2.1134190222057166e-05, "loss": 0.1006, "step": 18989 }, { "epoch": 2.2518676627534684, "grad_norm": 0.7411886064193347, "learning_rate": 2.113181875010525e-05, "loss": 0.1234, "step": 18990 }, { "epoch": 2.2519862445155936, "grad_norm": 0.8205624181656376, "learning_rate": 2.112944731381322e-05, "loss": 0.1017, "step": 18991 }, { "epoch": 2.2521048262777184, "grad_norm": 0.9378917442216863, "learning_rate": 2.1127075913202942e-05, "loss": 0.1244, "step": 18992 }, { "epoch": 2.2522234080398436, "grad_norm": 0.6419689555143541, "learning_rate": 2.112470454829626e-05, "loss": 0.0778, "step": 18993 }, { "epoch": 2.2523419898019683, "grad_norm": 0.9297988497950536, "learning_rate": 2.1122333219115063e-05, "loss": 0.117, "step": 18994 }, { "epoch": 2.2524605715640935, "grad_norm": 0.6609726618094524, "learning_rate": 2.1119961925681198e-05, "loss": 0.0956, "step": 18995 }, { "epoch": 2.2525791533262183, "grad_norm": 0.935873103125508, "learning_rate": 2.1117590668016527e-05, "loss": 0.1222, "step": 18996 }, { "epoch": 2.2526977350883435, "grad_norm": 0.65742140806973, "learning_rate": 2.1115219446142898e-05, "loss": 0.0887, "step": 18997 }, { "epoch": 2.2528163168504682, "grad_norm": 0.9869060373985011, "learning_rate": 2.1112848260082195e-05, "loss": 0.1335, "step": 18998 }, { "epoch": 2.2529348986125934, "grad_norm": 0.7048185737884956, "learning_rate": 2.111047710985626e-05, "loss": 0.0869, "step": 18999 }, { "epoch": 2.253053480374718, "grad_norm": 0.8199998674400801, "learning_rate": 2.110810599548696e-05, "loss": 0.1015, "step": 19000 }, { "epoch": 2.2531720621368434, "grad_norm": 0.6994936897709783, "learning_rate": 2.1105734916996137e-05, "loss": 0.0765, "step": 19001 }, { "epoch": 2.253290643898968, "grad_norm": 0.7611550129695417, "learning_rate": 2.1103363874405675e-05, "loss": 0.0943, "step": 19002 }, { "epoch": 2.2534092256610934, "grad_norm": 0.8930682456829255, "learning_rate": 2.110099286773742e-05, "loss": 0.1411, "step": 19003 }, { "epoch": 2.253527807423218, "grad_norm": 0.7074982654728084, "learning_rate": 2.109862189701322e-05, "loss": 0.0889, "step": 19004 }, { "epoch": 2.2536463891853433, "grad_norm": 0.5736377549670725, "learning_rate": 2.1096250962254947e-05, "loss": 0.0702, "step": 19005 }, { "epoch": 2.2537649709474685, "grad_norm": 0.922325299402054, "learning_rate": 2.1093880063484452e-05, "loss": 0.1302, "step": 19006 }, { "epoch": 2.2538835527095933, "grad_norm": 0.939569258804825, "learning_rate": 2.1091509200723597e-05, "loss": 0.1074, "step": 19007 }, { "epoch": 2.254002134471718, "grad_norm": 0.9180041148578808, "learning_rate": 2.1089138373994223e-05, "loss": 0.1106, "step": 19008 }, { "epoch": 2.2541207162338432, "grad_norm": 1.1532584644064932, "learning_rate": 2.1086767583318213e-05, "loss": 0.1548, "step": 19009 }, { "epoch": 2.2542392979959684, "grad_norm": 0.7597800533335355, "learning_rate": 2.10843968287174e-05, "loss": 0.1046, "step": 19010 }, { "epoch": 2.254357879758093, "grad_norm": 0.6806702321968584, "learning_rate": 2.1082026110213654e-05, "loss": 0.0827, "step": 19011 }, { "epoch": 2.254476461520218, "grad_norm": 0.6461291521130037, "learning_rate": 2.1079655427828807e-05, "loss": 0.0922, "step": 19012 }, { "epoch": 2.254595043282343, "grad_norm": 0.6973348020273231, "learning_rate": 2.1077284781584743e-05, "loss": 0.103, "step": 19013 }, { "epoch": 2.2547136250444684, "grad_norm": 0.7130398927111192, "learning_rate": 2.10749141715033e-05, "loss": 0.0944, "step": 19014 }, { "epoch": 2.254832206806593, "grad_norm": 0.8552868569840796, "learning_rate": 2.107254359760633e-05, "loss": 0.118, "step": 19015 }, { "epoch": 2.254950788568718, "grad_norm": 0.6076907873620738, "learning_rate": 2.1070173059915692e-05, "loss": 0.0888, "step": 19016 }, { "epoch": 2.255069370330843, "grad_norm": 1.0245930457541086, "learning_rate": 2.106780255845324e-05, "loss": 0.1536, "step": 19017 }, { "epoch": 2.2551879520929683, "grad_norm": 0.6787786062576756, "learning_rate": 2.1065432093240832e-05, "loss": 0.084, "step": 19018 }, { "epoch": 2.255306533855093, "grad_norm": 0.7891600963867963, "learning_rate": 2.1063061664300305e-05, "loss": 0.1107, "step": 19019 }, { "epoch": 2.2554251156172183, "grad_norm": 0.6550424597603444, "learning_rate": 2.106069127165353e-05, "loss": 0.0768, "step": 19020 }, { "epoch": 2.255543697379343, "grad_norm": 0.8972838959045875, "learning_rate": 2.1058320915322356e-05, "loss": 0.1442, "step": 19021 }, { "epoch": 2.255662279141468, "grad_norm": 0.655910352025063, "learning_rate": 2.1055950595328626e-05, "loss": 0.0761, "step": 19022 }, { "epoch": 2.255780860903593, "grad_norm": 0.5576248863794625, "learning_rate": 2.1053580311694182e-05, "loss": 0.0858, "step": 19023 }, { "epoch": 2.255899442665718, "grad_norm": 0.6186647790286841, "learning_rate": 2.10512100644409e-05, "loss": 0.08, "step": 19024 }, { "epoch": 2.256018024427843, "grad_norm": 0.7670979376099206, "learning_rate": 2.1048839853590616e-05, "loss": 0.1076, "step": 19025 }, { "epoch": 2.256136606189968, "grad_norm": 1.0035868970443778, "learning_rate": 2.1046469679165187e-05, "loss": 0.1506, "step": 19026 }, { "epoch": 2.256255187952093, "grad_norm": 0.7957271525735949, "learning_rate": 2.1044099541186448e-05, "loss": 0.094, "step": 19027 }, { "epoch": 2.256373769714218, "grad_norm": 0.5165895253654987, "learning_rate": 2.104172943967627e-05, "loss": 0.0895, "step": 19028 }, { "epoch": 2.256492351476343, "grad_norm": 0.9626625646082806, "learning_rate": 2.1039359374656498e-05, "loss": 0.1686, "step": 19029 }, { "epoch": 2.256610933238468, "grad_norm": 1.0655772423066538, "learning_rate": 2.103698934614897e-05, "loss": 0.135, "step": 19030 }, { "epoch": 2.256729515000593, "grad_norm": 0.7642193498387415, "learning_rate": 2.1034619354175533e-05, "loss": 0.1002, "step": 19031 }, { "epoch": 2.256848096762718, "grad_norm": 0.606739170951713, "learning_rate": 2.1032249398758055e-05, "loss": 0.0802, "step": 19032 }, { "epoch": 2.256966678524843, "grad_norm": 0.7962680248922646, "learning_rate": 2.1029879479918367e-05, "loss": 0.1229, "step": 19033 }, { "epoch": 2.257085260286968, "grad_norm": 0.7406737158630667, "learning_rate": 2.1027509597678316e-05, "loss": 0.0813, "step": 19034 }, { "epoch": 2.2572038420490927, "grad_norm": 0.7273602578594168, "learning_rate": 2.1025139752059764e-05, "loss": 0.0862, "step": 19035 }, { "epoch": 2.257322423811218, "grad_norm": 0.7650585843566732, "learning_rate": 2.1022769943084542e-05, "loss": 0.0979, "step": 19036 }, { "epoch": 2.2574410055733427, "grad_norm": 0.6316738482405768, "learning_rate": 2.1020400170774513e-05, "loss": 0.0877, "step": 19037 }, { "epoch": 2.257559587335468, "grad_norm": 0.952254904244572, "learning_rate": 2.1018030435151498e-05, "loss": 0.1347, "step": 19038 }, { "epoch": 2.2576781690975927, "grad_norm": 0.6579637189630273, "learning_rate": 2.1015660736237375e-05, "loss": 0.0941, "step": 19039 }, { "epoch": 2.257796750859718, "grad_norm": 1.0251592602927104, "learning_rate": 2.1013291074053973e-05, "loss": 0.0992, "step": 19040 }, { "epoch": 2.2579153326218426, "grad_norm": 1.0283246201044156, "learning_rate": 2.1010921448623136e-05, "loss": 0.1544, "step": 19041 }, { "epoch": 2.258033914383968, "grad_norm": 0.6158578581907558, "learning_rate": 2.1008551859966703e-05, "loss": 0.0915, "step": 19042 }, { "epoch": 2.2581524961460926, "grad_norm": 0.7697647652226256, "learning_rate": 2.1006182308106537e-05, "loss": 0.1067, "step": 19043 }, { "epoch": 2.258271077908218, "grad_norm": 0.6181553755303844, "learning_rate": 2.100381279306447e-05, "loss": 0.0806, "step": 19044 }, { "epoch": 2.2583896596703426, "grad_norm": 1.042292609688342, "learning_rate": 2.1001443314862353e-05, "loss": 0.0997, "step": 19045 }, { "epoch": 2.2585082414324678, "grad_norm": 0.7338877948864893, "learning_rate": 2.099907387352201e-05, "loss": 0.0757, "step": 19046 }, { "epoch": 2.2586268231945925, "grad_norm": 0.7240121964295021, "learning_rate": 2.0996704469065314e-05, "loss": 0.0966, "step": 19047 }, { "epoch": 2.2587454049567177, "grad_norm": 0.7766283705011711, "learning_rate": 2.0994335101514096e-05, "loss": 0.1068, "step": 19048 }, { "epoch": 2.2588639867188425, "grad_norm": 0.6592380113778299, "learning_rate": 2.0991965770890183e-05, "loss": 0.1046, "step": 19049 }, { "epoch": 2.2589825684809677, "grad_norm": 0.6454739250026689, "learning_rate": 2.098959647721544e-05, "loss": 0.0796, "step": 19050 }, { "epoch": 2.2591011502430924, "grad_norm": 0.9012522270691243, "learning_rate": 2.0987227220511697e-05, "loss": 0.1293, "step": 19051 }, { "epoch": 2.2592197320052176, "grad_norm": 0.4596711365357253, "learning_rate": 2.0984858000800807e-05, "loss": 0.0812, "step": 19052 }, { "epoch": 2.2593383137673424, "grad_norm": 0.4683238466676581, "learning_rate": 2.0982488818104585e-05, "loss": 0.0779, "step": 19053 }, { "epoch": 2.2594568955294676, "grad_norm": 0.7869509545470769, "learning_rate": 2.09801196724449e-05, "loss": 0.1043, "step": 19054 }, { "epoch": 2.2595754772915924, "grad_norm": 0.7440629485843135, "learning_rate": 2.097775056384358e-05, "loss": 0.0857, "step": 19055 }, { "epoch": 2.2596940590537176, "grad_norm": 0.8414342628452722, "learning_rate": 2.097538149232247e-05, "loss": 0.1007, "step": 19056 }, { "epoch": 2.2598126408158423, "grad_norm": 0.8388670576628786, "learning_rate": 2.0973012457903403e-05, "loss": 0.1226, "step": 19057 }, { "epoch": 2.2599312225779675, "grad_norm": 0.7898747186670583, "learning_rate": 2.0970643460608226e-05, "loss": 0.0993, "step": 19058 }, { "epoch": 2.2600498043400927, "grad_norm": 1.1226706497174557, "learning_rate": 2.0968274500458777e-05, "loss": 0.1356, "step": 19059 }, { "epoch": 2.2601683861022175, "grad_norm": 0.9875345533594934, "learning_rate": 2.0965905577476884e-05, "loss": 0.0896, "step": 19060 }, { "epoch": 2.2602869678643422, "grad_norm": 1.026584840820942, "learning_rate": 2.0963536691684403e-05, "loss": 0.1735, "step": 19061 }, { "epoch": 2.2604055496264674, "grad_norm": 0.7927098021734523, "learning_rate": 2.096116784310317e-05, "loss": 0.0987, "step": 19062 }, { "epoch": 2.2605241313885927, "grad_norm": 0.7396541246673995, "learning_rate": 2.095879903175501e-05, "loss": 0.108, "step": 19063 }, { "epoch": 2.2606427131507174, "grad_norm": 0.6483771730637219, "learning_rate": 2.0956430257661758e-05, "loss": 0.1089, "step": 19064 }, { "epoch": 2.260761294912842, "grad_norm": 0.9145706638181258, "learning_rate": 2.0954061520845272e-05, "loss": 0.1064, "step": 19065 }, { "epoch": 2.2608798766749674, "grad_norm": 0.668566093775409, "learning_rate": 2.0951692821327374e-05, "loss": 0.1073, "step": 19066 }, { "epoch": 2.2609984584370926, "grad_norm": 0.7740892295396714, "learning_rate": 2.0949324159129906e-05, "loss": 0.1031, "step": 19067 }, { "epoch": 2.2611170401992173, "grad_norm": 0.8365310244954411, "learning_rate": 2.0946955534274693e-05, "loss": 0.1008, "step": 19068 }, { "epoch": 2.2612356219613425, "grad_norm": 0.8645992290608445, "learning_rate": 2.094458694678359e-05, "loss": 0.1582, "step": 19069 }, { "epoch": 2.2613542037234673, "grad_norm": 0.7138004982365895, "learning_rate": 2.094221839667842e-05, "loss": 0.1217, "step": 19070 }, { "epoch": 2.2614727854855925, "grad_norm": 0.868578394640825, "learning_rate": 2.0939849883981026e-05, "loss": 0.1254, "step": 19071 }, { "epoch": 2.2615913672477173, "grad_norm": 0.9099219663972437, "learning_rate": 2.0937481408713223e-05, "loss": 0.1481, "step": 19072 }, { "epoch": 2.2617099490098425, "grad_norm": 0.7932171843159301, "learning_rate": 2.093511297089687e-05, "loss": 0.127, "step": 19073 }, { "epoch": 2.261828530771967, "grad_norm": 0.7842286088072226, "learning_rate": 2.0932744570553788e-05, "loss": 0.1126, "step": 19074 }, { "epoch": 2.2619471125340924, "grad_norm": 0.575303979689616, "learning_rate": 2.093037620770581e-05, "loss": 0.0795, "step": 19075 }, { "epoch": 2.262065694296217, "grad_norm": 0.8784181574128842, "learning_rate": 2.0928007882374773e-05, "loss": 0.1125, "step": 19076 }, { "epoch": 2.2621842760583424, "grad_norm": 0.8829314731541245, "learning_rate": 2.0925639594582518e-05, "loss": 0.0955, "step": 19077 }, { "epoch": 2.262302857820467, "grad_norm": 0.6880560266211093, "learning_rate": 2.0923271344350864e-05, "loss": 0.095, "step": 19078 }, { "epoch": 2.2624214395825923, "grad_norm": 0.6011007565898318, "learning_rate": 2.0920903131701643e-05, "loss": 0.0806, "step": 19079 }, { "epoch": 2.262540021344717, "grad_norm": 0.6759378949841988, "learning_rate": 2.0918534956656706e-05, "loss": 0.0805, "step": 19080 }, { "epoch": 2.2626586031068423, "grad_norm": 0.9134641190271963, "learning_rate": 2.0916166819237868e-05, "loss": 0.1219, "step": 19081 }, { "epoch": 2.262777184868967, "grad_norm": 0.745725361069267, "learning_rate": 2.0913798719466964e-05, "loss": 0.1062, "step": 19082 }, { "epoch": 2.2628957666310923, "grad_norm": 0.8631063978182343, "learning_rate": 2.0911430657365817e-05, "loss": 0.1086, "step": 19083 }, { "epoch": 2.263014348393217, "grad_norm": 0.9911692893228753, "learning_rate": 2.090906263295627e-05, "loss": 0.1281, "step": 19084 }, { "epoch": 2.2631329301553422, "grad_norm": 0.7184018255579484, "learning_rate": 2.090669464626015e-05, "loss": 0.1151, "step": 19085 }, { "epoch": 2.263251511917467, "grad_norm": 0.7343871339205411, "learning_rate": 2.090432669729929e-05, "loss": 0.1046, "step": 19086 }, { "epoch": 2.263370093679592, "grad_norm": 0.8572090628943982, "learning_rate": 2.0901958786095502e-05, "loss": 0.0816, "step": 19087 }, { "epoch": 2.263488675441717, "grad_norm": 0.693983961405414, "learning_rate": 2.089959091267064e-05, "loss": 0.106, "step": 19088 }, { "epoch": 2.263607257203842, "grad_norm": 0.5577624248096062, "learning_rate": 2.0897223077046523e-05, "loss": 0.0635, "step": 19089 }, { "epoch": 2.263725838965967, "grad_norm": 0.707950048752636, "learning_rate": 2.089485527924497e-05, "loss": 0.0885, "step": 19090 }, { "epoch": 2.263844420728092, "grad_norm": 0.7196720926490557, "learning_rate": 2.0892487519287824e-05, "loss": 0.0667, "step": 19091 }, { "epoch": 2.263963002490217, "grad_norm": 0.49920917630014433, "learning_rate": 2.0890119797196907e-05, "loss": 0.0849, "step": 19092 }, { "epoch": 2.264081584252342, "grad_norm": 1.106861393532934, "learning_rate": 2.088775211299404e-05, "loss": 0.1167, "step": 19093 }, { "epoch": 2.264200166014467, "grad_norm": 0.7692171960499528, "learning_rate": 2.0885384466701054e-05, "loss": 0.0771, "step": 19094 }, { "epoch": 2.264318747776592, "grad_norm": 0.5477082543112483, "learning_rate": 2.0883016858339784e-05, "loss": 0.0718, "step": 19095 }, { "epoch": 2.264437329538717, "grad_norm": 0.6522196919614662, "learning_rate": 2.0880649287932047e-05, "loss": 0.09, "step": 19096 }, { "epoch": 2.264555911300842, "grad_norm": 0.7575052826367621, "learning_rate": 2.0878281755499678e-05, "loss": 0.0822, "step": 19097 }, { "epoch": 2.2646744930629668, "grad_norm": 1.168862424715898, "learning_rate": 2.087591426106448e-05, "loss": 0.1643, "step": 19098 }, { "epoch": 2.264793074825092, "grad_norm": 0.5143522750725421, "learning_rate": 2.0873546804648315e-05, "loss": 0.0771, "step": 19099 }, { "epoch": 2.2649116565872167, "grad_norm": 0.6769987132346699, "learning_rate": 2.087117938627298e-05, "loss": 0.1105, "step": 19100 }, { "epoch": 2.265030238349342, "grad_norm": 0.7612085677940538, "learning_rate": 2.0868812005960315e-05, "loss": 0.0928, "step": 19101 }, { "epoch": 2.2651488201114667, "grad_norm": 0.8811852539020466, "learning_rate": 2.086644466373212e-05, "loss": 0.1137, "step": 19102 }, { "epoch": 2.265267401873592, "grad_norm": 0.8991620600214749, "learning_rate": 2.086407735961025e-05, "loss": 0.1381, "step": 19103 }, { "epoch": 2.2653859836357166, "grad_norm": 0.6234836585536662, "learning_rate": 2.086171009361651e-05, "loss": 0.0803, "step": 19104 }, { "epoch": 2.265504565397842, "grad_norm": 0.8655072984032574, "learning_rate": 2.0859342865772728e-05, "loss": 0.1294, "step": 19105 }, { "epoch": 2.2656231471599666, "grad_norm": 0.9447337810983776, "learning_rate": 2.0856975676100725e-05, "loss": 0.1468, "step": 19106 }, { "epoch": 2.265741728922092, "grad_norm": 0.6283615638531427, "learning_rate": 2.0854608524622335e-05, "loss": 0.095, "step": 19107 }, { "epoch": 2.265860310684217, "grad_norm": 0.9440495461649917, "learning_rate": 2.0852241411359368e-05, "loss": 0.1498, "step": 19108 }, { "epoch": 2.2659788924463418, "grad_norm": 0.5549439840029914, "learning_rate": 2.084987433633364e-05, "loss": 0.0726, "step": 19109 }, { "epoch": 2.2660974742084665, "grad_norm": 0.8581652515466366, "learning_rate": 2.084750729956699e-05, "loss": 0.1187, "step": 19110 }, { "epoch": 2.2662160559705917, "grad_norm": 0.7293111225892883, "learning_rate": 2.0845140301081233e-05, "loss": 0.1016, "step": 19111 }, { "epoch": 2.266334637732717, "grad_norm": 0.5274874859525202, "learning_rate": 2.0842773340898187e-05, "loss": 0.0751, "step": 19112 }, { "epoch": 2.2664532194948417, "grad_norm": 0.6097803077004579, "learning_rate": 2.0840406419039658e-05, "loss": 0.101, "step": 19113 }, { "epoch": 2.2665718012569664, "grad_norm": 0.9683941023586264, "learning_rate": 2.0838039535527497e-05, "loss": 0.1565, "step": 19114 }, { "epoch": 2.2666903830190916, "grad_norm": 0.6659318938527254, "learning_rate": 2.0835672690383502e-05, "loss": 0.102, "step": 19115 }, { "epoch": 2.266808964781217, "grad_norm": 0.7680071577096189, "learning_rate": 2.08333058836295e-05, "loss": 0.1063, "step": 19116 }, { "epoch": 2.2669275465433416, "grad_norm": 0.7882555745910979, "learning_rate": 2.08309391152873e-05, "loss": 0.1136, "step": 19117 }, { "epoch": 2.267046128305467, "grad_norm": 0.9056377476821434, "learning_rate": 2.082857238537874e-05, "loss": 0.0789, "step": 19118 }, { "epoch": 2.2671647100675916, "grad_norm": 0.6227740299275926, "learning_rate": 2.0826205693925627e-05, "loss": 0.0926, "step": 19119 }, { "epoch": 2.2672832918297168, "grad_norm": 0.8109330737614423, "learning_rate": 2.0823839040949766e-05, "loss": 0.1262, "step": 19120 }, { "epoch": 2.2674018735918415, "grad_norm": 0.7189962678063827, "learning_rate": 2.0821472426473002e-05, "loss": 0.1185, "step": 19121 }, { "epoch": 2.2675204553539667, "grad_norm": 0.5495082647782953, "learning_rate": 2.0819105850517138e-05, "loss": 0.066, "step": 19122 }, { "epoch": 2.2676390371160915, "grad_norm": 1.0778053192978372, "learning_rate": 2.0816739313103984e-05, "loss": 0.1367, "step": 19123 }, { "epoch": 2.2677576188782167, "grad_norm": 0.6989521845873901, "learning_rate": 2.0814372814255363e-05, "loss": 0.0669, "step": 19124 }, { "epoch": 2.2678762006403415, "grad_norm": 0.6015121421934836, "learning_rate": 2.0812006353993093e-05, "loss": 0.0862, "step": 19125 }, { "epoch": 2.2679947824024667, "grad_norm": 0.6043537025565169, "learning_rate": 2.0809639932338996e-05, "loss": 0.0821, "step": 19126 }, { "epoch": 2.2681133641645914, "grad_norm": 1.0556055703481757, "learning_rate": 2.0807273549314876e-05, "loss": 0.1227, "step": 19127 }, { "epoch": 2.2682319459267166, "grad_norm": 0.8704670377491764, "learning_rate": 2.0804907204942546e-05, "loss": 0.0935, "step": 19128 }, { "epoch": 2.2683505276888414, "grad_norm": 1.1887979124386518, "learning_rate": 2.0802540899243833e-05, "loss": 0.1266, "step": 19129 }, { "epoch": 2.2684691094509666, "grad_norm": 0.989170728318912, "learning_rate": 2.080017463224055e-05, "loss": 0.1307, "step": 19130 }, { "epoch": 2.2685876912130913, "grad_norm": 0.6001166540020825, "learning_rate": 2.0797808403954493e-05, "loss": 0.0629, "step": 19131 }, { "epoch": 2.2687062729752165, "grad_norm": 0.6420886296824327, "learning_rate": 2.0795442214407504e-05, "loss": 0.1075, "step": 19132 }, { "epoch": 2.2688248547373413, "grad_norm": 0.7854034694422092, "learning_rate": 2.079307606362138e-05, "loss": 0.1031, "step": 19133 }, { "epoch": 2.2689434364994665, "grad_norm": 0.7270524835318557, "learning_rate": 2.079070995161793e-05, "loss": 0.0875, "step": 19134 }, { "epoch": 2.2690620182615913, "grad_norm": 0.572177861388817, "learning_rate": 2.0788343878418974e-05, "loss": 0.084, "step": 19135 }, { "epoch": 2.2691806000237165, "grad_norm": 0.696969399506367, "learning_rate": 2.078597784404632e-05, "loss": 0.1199, "step": 19136 }, { "epoch": 2.2692991817858412, "grad_norm": 0.6045318934330434, "learning_rate": 2.078361184852179e-05, "loss": 0.095, "step": 19137 }, { "epoch": 2.2694177635479664, "grad_norm": 0.9184305027267113, "learning_rate": 2.0781245891867187e-05, "loss": 0.1079, "step": 19138 }, { "epoch": 2.269536345310091, "grad_norm": 0.7114197178721431, "learning_rate": 2.0778879974104316e-05, "loss": 0.1101, "step": 19139 }, { "epoch": 2.2696549270722164, "grad_norm": 0.7448772891291007, "learning_rate": 2.0776514095255e-05, "loss": 0.1135, "step": 19140 }, { "epoch": 2.269773508834341, "grad_norm": 0.590216387124899, "learning_rate": 2.077414825534105e-05, "loss": 0.0951, "step": 19141 }, { "epoch": 2.2698920905964663, "grad_norm": 1.0972146848824706, "learning_rate": 2.077178245438427e-05, "loss": 0.1327, "step": 19142 }, { "epoch": 2.270010672358591, "grad_norm": 0.7801052223675795, "learning_rate": 2.0769416692406458e-05, "loss": 0.119, "step": 19143 }, { "epoch": 2.2701292541207163, "grad_norm": 0.6102100495112817, "learning_rate": 2.076705096942945e-05, "loss": 0.0799, "step": 19144 }, { "epoch": 2.270247835882841, "grad_norm": 0.8144869785783221, "learning_rate": 2.0764685285475033e-05, "loss": 0.0985, "step": 19145 }, { "epoch": 2.2703664176449663, "grad_norm": 0.9294382159091734, "learning_rate": 2.0762319640565026e-05, "loss": 0.1171, "step": 19146 }, { "epoch": 2.270484999407091, "grad_norm": 0.6845415434817347, "learning_rate": 2.0759954034721234e-05, "loss": 0.1084, "step": 19147 }, { "epoch": 2.2706035811692162, "grad_norm": 0.8043958799401203, "learning_rate": 2.0757588467965473e-05, "loss": 0.1289, "step": 19148 }, { "epoch": 2.270722162931341, "grad_norm": 0.6855092378067706, "learning_rate": 2.0755222940319543e-05, "loss": 0.0915, "step": 19149 }, { "epoch": 2.270840744693466, "grad_norm": 0.7516895479295843, "learning_rate": 2.075285745180524e-05, "loss": 0.0814, "step": 19150 }, { "epoch": 2.270959326455591, "grad_norm": 0.5843487961458899, "learning_rate": 2.0750492002444397e-05, "loss": 0.095, "step": 19151 }, { "epoch": 2.271077908217716, "grad_norm": 0.5066814285290991, "learning_rate": 2.0748126592258803e-05, "loss": 0.088, "step": 19152 }, { "epoch": 2.271196489979841, "grad_norm": 0.7381264945390102, "learning_rate": 2.0745761221270265e-05, "loss": 0.0843, "step": 19153 }, { "epoch": 2.271315071741966, "grad_norm": 0.9141150732020663, "learning_rate": 2.0743395889500596e-05, "loss": 0.0861, "step": 19154 }, { "epoch": 2.271433653504091, "grad_norm": 0.6384595902996227, "learning_rate": 2.0741030596971588e-05, "loss": 0.0827, "step": 19155 }, { "epoch": 2.271552235266216, "grad_norm": 0.43844523236452704, "learning_rate": 2.0738665343705065e-05, "loss": 0.0522, "step": 19156 }, { "epoch": 2.2716708170283413, "grad_norm": 0.6146275540249703, "learning_rate": 2.0736300129722825e-05, "loss": 0.098, "step": 19157 }, { "epoch": 2.271789398790466, "grad_norm": 0.8722438198342779, "learning_rate": 2.0733934955046654e-05, "loss": 0.1303, "step": 19158 }, { "epoch": 2.271907980552591, "grad_norm": 0.8031863990557682, "learning_rate": 2.0731569819698386e-05, "loss": 0.1073, "step": 19159 }, { "epoch": 2.272026562314716, "grad_norm": 0.8578489118593859, "learning_rate": 2.0729204723699806e-05, "loss": 0.1074, "step": 19160 }, { "epoch": 2.272145144076841, "grad_norm": 0.6040864819919964, "learning_rate": 2.0726839667072713e-05, "loss": 0.0972, "step": 19161 }, { "epoch": 2.272263725838966, "grad_norm": 0.7849055302776805, "learning_rate": 2.0724474649838927e-05, "loss": 0.1223, "step": 19162 }, { "epoch": 2.2723823076010907, "grad_norm": 0.9380154149544941, "learning_rate": 2.0722109672020244e-05, "loss": 0.1143, "step": 19163 }, { "epoch": 2.272500889363216, "grad_norm": 0.7530387064391912, "learning_rate": 2.071974473363846e-05, "loss": 0.1106, "step": 19164 }, { "epoch": 2.272619471125341, "grad_norm": 0.9638100293743602, "learning_rate": 2.0717379834715372e-05, "loss": 0.1171, "step": 19165 }, { "epoch": 2.272738052887466, "grad_norm": 0.7199473937380547, "learning_rate": 2.07150149752728e-05, "loss": 0.0878, "step": 19166 }, { "epoch": 2.2728566346495906, "grad_norm": 0.9664043426847073, "learning_rate": 2.0712650155332536e-05, "loss": 0.1541, "step": 19167 }, { "epoch": 2.272975216411716, "grad_norm": 0.6536860207000298, "learning_rate": 2.071028537491638e-05, "loss": 0.0805, "step": 19168 }, { "epoch": 2.273093798173841, "grad_norm": 0.6196023717080401, "learning_rate": 2.0707920634046123e-05, "loss": 0.1104, "step": 19169 }, { "epoch": 2.273212379935966, "grad_norm": 0.6870146726198311, "learning_rate": 2.0705555932743585e-05, "loss": 0.1208, "step": 19170 }, { "epoch": 2.273330961698091, "grad_norm": 0.7198773077565136, "learning_rate": 2.0703191271030557e-05, "loss": 0.0915, "step": 19171 }, { "epoch": 2.2734495434602158, "grad_norm": 1.0679228434207566, "learning_rate": 2.070082664892883e-05, "loss": 0.1479, "step": 19172 }, { "epoch": 2.273568125222341, "grad_norm": 1.0592283324171519, "learning_rate": 2.0698462066460205e-05, "loss": 0.1377, "step": 19173 }, { "epoch": 2.2736867069844657, "grad_norm": 0.7582371079136339, "learning_rate": 2.0696097523646486e-05, "loss": 0.1211, "step": 19174 }, { "epoch": 2.273805288746591, "grad_norm": 0.6788624568265205, "learning_rate": 2.069373302050948e-05, "loss": 0.101, "step": 19175 }, { "epoch": 2.2739238705087157, "grad_norm": 0.7367816515739971, "learning_rate": 2.0691368557070963e-05, "loss": 0.101, "step": 19176 }, { "epoch": 2.274042452270841, "grad_norm": 0.770807780621233, "learning_rate": 2.0689004133352753e-05, "loss": 0.1177, "step": 19177 }, { "epoch": 2.2741610340329657, "grad_norm": 0.7042518494516887, "learning_rate": 2.068663974937664e-05, "loss": 0.1175, "step": 19178 }, { "epoch": 2.274279615795091, "grad_norm": 0.9957904278291896, "learning_rate": 2.0684275405164417e-05, "loss": 0.1229, "step": 19179 }, { "epoch": 2.2743981975572156, "grad_norm": 1.0795763455747813, "learning_rate": 2.0681911100737872e-05, "loss": 0.1324, "step": 19180 }, { "epoch": 2.274516779319341, "grad_norm": 0.5879968540482619, "learning_rate": 2.0679546836118823e-05, "loss": 0.0696, "step": 19181 }, { "epoch": 2.2746353610814656, "grad_norm": 0.6620131923351383, "learning_rate": 2.0677182611329055e-05, "loss": 0.091, "step": 19182 }, { "epoch": 2.274753942843591, "grad_norm": 1.1363020310966965, "learning_rate": 2.0674818426390358e-05, "loss": 0.1419, "step": 19183 }, { "epoch": 2.2748725246057155, "grad_norm": 0.981368017768397, "learning_rate": 2.067245428132453e-05, "loss": 0.1236, "step": 19184 }, { "epoch": 2.2749911063678407, "grad_norm": 0.4903574304812896, "learning_rate": 2.0670090176153366e-05, "loss": 0.0696, "step": 19185 }, { "epoch": 2.2751096881299655, "grad_norm": 0.7975328607968287, "learning_rate": 2.0667726110898666e-05, "loss": 0.1076, "step": 19186 }, { "epoch": 2.2752282698920907, "grad_norm": 0.9385414474839877, "learning_rate": 2.066536208558222e-05, "loss": 0.1153, "step": 19187 }, { "epoch": 2.2753468516542155, "grad_norm": 0.7840708111736276, "learning_rate": 2.0662998100225807e-05, "loss": 0.1009, "step": 19188 }, { "epoch": 2.2754654334163407, "grad_norm": 0.8631761612995749, "learning_rate": 2.0660634154851246e-05, "loss": 0.1083, "step": 19189 }, { "epoch": 2.2755840151784654, "grad_norm": 0.5418115093445542, "learning_rate": 2.0658270249480317e-05, "loss": 0.066, "step": 19190 }, { "epoch": 2.2757025969405906, "grad_norm": 0.9712095193287779, "learning_rate": 2.0655906384134798e-05, "loss": 0.1427, "step": 19191 }, { "epoch": 2.2758211787027154, "grad_norm": 0.678167924504207, "learning_rate": 2.065354255883651e-05, "loss": 0.0832, "step": 19192 }, { "epoch": 2.2759397604648406, "grad_norm": 0.8492762201729729, "learning_rate": 2.065117877360723e-05, "loss": 0.1097, "step": 19193 }, { "epoch": 2.2760583422269653, "grad_norm": 0.7790942117272454, "learning_rate": 2.064881502846874e-05, "loss": 0.1101, "step": 19194 }, { "epoch": 2.2761769239890906, "grad_norm": 0.8656173543514246, "learning_rate": 2.064645132344284e-05, "loss": 0.1405, "step": 19195 }, { "epoch": 2.2762955057512153, "grad_norm": 0.9234769251165286, "learning_rate": 2.0644087658551324e-05, "loss": 0.1337, "step": 19196 }, { "epoch": 2.2764140875133405, "grad_norm": 0.7738830792429456, "learning_rate": 2.064172403381598e-05, "loss": 0.1081, "step": 19197 }, { "epoch": 2.2765326692754653, "grad_norm": 1.0871535124931713, "learning_rate": 2.0639360449258596e-05, "loss": 0.1401, "step": 19198 }, { "epoch": 2.2766512510375905, "grad_norm": 0.903751093774981, "learning_rate": 2.0636996904900952e-05, "loss": 0.1002, "step": 19199 }, { "epoch": 2.2767698327997152, "grad_norm": 0.5384939581806427, "learning_rate": 2.0634633400764854e-05, "loss": 0.0785, "step": 19200 }, { "epoch": 2.2768884145618404, "grad_norm": 0.9254094748195427, "learning_rate": 2.0632269936872088e-05, "loss": 0.1178, "step": 19201 }, { "epoch": 2.277006996323965, "grad_norm": 0.5715720700546214, "learning_rate": 2.062990651324443e-05, "loss": 0.0679, "step": 19202 }, { "epoch": 2.2771255780860904, "grad_norm": 1.0364274290674282, "learning_rate": 2.062754312990367e-05, "loss": 0.1112, "step": 19203 }, { "epoch": 2.277244159848215, "grad_norm": 0.8890384015769935, "learning_rate": 2.062517978687161e-05, "loss": 0.1043, "step": 19204 }, { "epoch": 2.2773627416103404, "grad_norm": 0.6717203309836065, "learning_rate": 2.0622816484170025e-05, "loss": 0.1167, "step": 19205 }, { "epoch": 2.2774813233724656, "grad_norm": 0.7854720806665783, "learning_rate": 2.0620453221820695e-05, "loss": 0.1019, "step": 19206 }, { "epoch": 2.2775999051345903, "grad_norm": 0.8158414368332162, "learning_rate": 2.0618089999845428e-05, "loss": 0.1008, "step": 19207 }, { "epoch": 2.277718486896715, "grad_norm": 0.5918990403733447, "learning_rate": 2.0615726818265995e-05, "loss": 0.0797, "step": 19208 }, { "epoch": 2.2778370686588403, "grad_norm": 0.919444833081891, "learning_rate": 2.0613363677104186e-05, "loss": 0.1426, "step": 19209 }, { "epoch": 2.2779556504209655, "grad_norm": 0.7731467893785997, "learning_rate": 2.0611000576381776e-05, "loss": 0.0932, "step": 19210 }, { "epoch": 2.2780742321830902, "grad_norm": 0.4644003754960123, "learning_rate": 2.060863751612057e-05, "loss": 0.0638, "step": 19211 }, { "epoch": 2.278192813945215, "grad_norm": 0.7621204685505636, "learning_rate": 2.060627449634234e-05, "loss": 0.1216, "step": 19212 }, { "epoch": 2.27831139570734, "grad_norm": 0.8176993436914342, "learning_rate": 2.0603911517068865e-05, "loss": 0.1065, "step": 19213 }, { "epoch": 2.2784299774694654, "grad_norm": 0.6179857489135397, "learning_rate": 2.0601548578321933e-05, "loss": 0.0793, "step": 19214 }, { "epoch": 2.27854855923159, "grad_norm": 0.70709518542041, "learning_rate": 2.059918568012333e-05, "loss": 0.1144, "step": 19215 }, { "epoch": 2.278667140993715, "grad_norm": 0.7315247181251626, "learning_rate": 2.0596822822494842e-05, "loss": 0.107, "step": 19216 }, { "epoch": 2.27878572275584, "grad_norm": 0.6430198661897033, "learning_rate": 2.0594460005458244e-05, "loss": 0.0886, "step": 19217 }, { "epoch": 2.2789043045179653, "grad_norm": 0.5441313523345018, "learning_rate": 2.0592097229035328e-05, "loss": 0.0677, "step": 19218 }, { "epoch": 2.27902288628009, "grad_norm": 0.428320364919597, "learning_rate": 2.0589734493247868e-05, "loss": 0.0651, "step": 19219 }, { "epoch": 2.2791414680422153, "grad_norm": 0.9984555750978432, "learning_rate": 2.0587371798117655e-05, "loss": 0.1417, "step": 19220 }, { "epoch": 2.27926004980434, "grad_norm": 0.8599782707807125, "learning_rate": 2.058500914366645e-05, "loss": 0.1158, "step": 19221 }, { "epoch": 2.2793786315664653, "grad_norm": 0.5969748465952649, "learning_rate": 2.0582646529916054e-05, "loss": 0.0793, "step": 19222 }, { "epoch": 2.27949721332859, "grad_norm": 0.6762920655474683, "learning_rate": 2.0580283956888235e-05, "loss": 0.0803, "step": 19223 }, { "epoch": 2.279615795090715, "grad_norm": 0.7560625730594209, "learning_rate": 2.0577921424604785e-05, "loss": 0.1105, "step": 19224 }, { "epoch": 2.27973437685284, "grad_norm": 0.5690085973618046, "learning_rate": 2.0575558933087465e-05, "loss": 0.0734, "step": 19225 }, { "epoch": 2.279852958614965, "grad_norm": 0.7529499840960499, "learning_rate": 2.0573196482358078e-05, "loss": 0.0998, "step": 19226 }, { "epoch": 2.27997154037709, "grad_norm": 0.7849298058165083, "learning_rate": 2.0570834072438392e-05, "loss": 0.0957, "step": 19227 }, { "epoch": 2.280090122139215, "grad_norm": 0.6419833273953517, "learning_rate": 2.0568471703350185e-05, "loss": 0.1042, "step": 19228 }, { "epoch": 2.28020870390134, "grad_norm": 0.7213281164189219, "learning_rate": 2.056610937511522e-05, "loss": 0.0941, "step": 19229 }, { "epoch": 2.280327285663465, "grad_norm": 0.5418800154535591, "learning_rate": 2.0563747087755303e-05, "loss": 0.056, "step": 19230 }, { "epoch": 2.28044586742559, "grad_norm": 0.8654167316475169, "learning_rate": 2.0561384841292197e-05, "loss": 0.129, "step": 19231 }, { "epoch": 2.280564449187715, "grad_norm": 0.8173990140024149, "learning_rate": 2.0559022635747668e-05, "loss": 0.091, "step": 19232 }, { "epoch": 2.28068303094984, "grad_norm": 0.6383706107186635, "learning_rate": 2.0556660471143517e-05, "loss": 0.0945, "step": 19233 }, { "epoch": 2.280801612711965, "grad_norm": 0.7524865298328898, "learning_rate": 2.05542983475015e-05, "loss": 0.1071, "step": 19234 }, { "epoch": 2.28092019447409, "grad_norm": 0.7289841140821686, "learning_rate": 2.055193626484341e-05, "loss": 0.0647, "step": 19235 }, { "epoch": 2.281038776236215, "grad_norm": 0.483883460375925, "learning_rate": 2.0549574223191e-05, "loss": 0.0621, "step": 19236 }, { "epoch": 2.2811573579983397, "grad_norm": 0.565199374624868, "learning_rate": 2.0547212222566064e-05, "loss": 0.0663, "step": 19237 }, { "epoch": 2.281275939760465, "grad_norm": 0.6496429590072367, "learning_rate": 2.0544850262990378e-05, "loss": 0.0799, "step": 19238 }, { "epoch": 2.2813945215225897, "grad_norm": 0.6630005017747211, "learning_rate": 2.0542488344485707e-05, "loss": 0.093, "step": 19239 }, { "epoch": 2.281513103284715, "grad_norm": 0.7739857551173044, "learning_rate": 2.0540126467073813e-05, "loss": 0.1034, "step": 19240 }, { "epoch": 2.2816316850468397, "grad_norm": 0.7056168520138631, "learning_rate": 2.0537764630776498e-05, "loss": 0.0913, "step": 19241 }, { "epoch": 2.281750266808965, "grad_norm": 0.6191492266153039, "learning_rate": 2.053540283561552e-05, "loss": 0.0888, "step": 19242 }, { "epoch": 2.2818688485710896, "grad_norm": 0.6685073692921839, "learning_rate": 2.053304108161265e-05, "loss": 0.1011, "step": 19243 }, { "epoch": 2.281987430333215, "grad_norm": 0.9857392715620021, "learning_rate": 2.0530679368789664e-05, "loss": 0.1737, "step": 19244 }, { "epoch": 2.2821060120953396, "grad_norm": 0.9757437265838953, "learning_rate": 2.052831769716833e-05, "loss": 0.1146, "step": 19245 }, { "epoch": 2.282224593857465, "grad_norm": 0.9218974068761951, "learning_rate": 2.0525956066770434e-05, "loss": 0.1081, "step": 19246 }, { "epoch": 2.2823431756195895, "grad_norm": 0.7765133370072175, "learning_rate": 2.052359447761772e-05, "loss": 0.1046, "step": 19247 }, { "epoch": 2.2824617573817148, "grad_norm": 1.00015118792085, "learning_rate": 2.0521232929731986e-05, "loss": 0.1225, "step": 19248 }, { "epoch": 2.2825803391438395, "grad_norm": 0.7542421383165739, "learning_rate": 2.0518871423135e-05, "loss": 0.094, "step": 19249 }, { "epoch": 2.2826989209059647, "grad_norm": 1.0468288432918036, "learning_rate": 2.0516509957848517e-05, "loss": 0.1299, "step": 19250 }, { "epoch": 2.2828175026680895, "grad_norm": 0.859189482224707, "learning_rate": 2.0514148533894305e-05, "loss": 0.1314, "step": 19251 }, { "epoch": 2.2829360844302147, "grad_norm": 0.9334741588463504, "learning_rate": 2.0511787151294155e-05, "loss": 0.1444, "step": 19252 }, { "epoch": 2.2830546661923394, "grad_norm": 0.6578772791881029, "learning_rate": 2.0509425810069814e-05, "loss": 0.0689, "step": 19253 }, { "epoch": 2.2831732479544646, "grad_norm": 0.8696439214720704, "learning_rate": 2.050706451024307e-05, "loss": 0.1188, "step": 19254 }, { "epoch": 2.28329182971659, "grad_norm": 0.8260382843255265, "learning_rate": 2.050470325183567e-05, "loss": 0.1054, "step": 19255 }, { "epoch": 2.2834104114787146, "grad_norm": 1.1360567544359537, "learning_rate": 2.0502342034869406e-05, "loss": 0.1767, "step": 19256 }, { "epoch": 2.2835289932408394, "grad_norm": 0.7724005631678209, "learning_rate": 2.049998085936603e-05, "loss": 0.1049, "step": 19257 }, { "epoch": 2.2836475750029646, "grad_norm": 0.6743902607956366, "learning_rate": 2.0497619725347314e-05, "loss": 0.0887, "step": 19258 }, { "epoch": 2.2837661567650898, "grad_norm": 0.6187374524484708, "learning_rate": 2.0495258632835014e-05, "loss": 0.0664, "step": 19259 }, { "epoch": 2.2838847385272145, "grad_norm": 0.7085611442316739, "learning_rate": 2.0492897581850913e-05, "loss": 0.0699, "step": 19260 }, { "epoch": 2.2840033202893393, "grad_norm": 0.7403015439595959, "learning_rate": 2.049053657241677e-05, "loss": 0.1, "step": 19261 }, { "epoch": 2.2841219020514645, "grad_norm": 0.6464808052800475, "learning_rate": 2.048817560455434e-05, "loss": 0.0755, "step": 19262 }, { "epoch": 2.2842404838135897, "grad_norm": 0.7622344207413221, "learning_rate": 2.0485814678285405e-05, "loss": 0.1159, "step": 19263 }, { "epoch": 2.2843590655757144, "grad_norm": 0.5761554846875863, "learning_rate": 2.048345379363172e-05, "loss": 0.0862, "step": 19264 }, { "epoch": 2.284477647337839, "grad_norm": 0.6642076079911737, "learning_rate": 2.048109295061506e-05, "loss": 0.0978, "step": 19265 }, { "epoch": 2.2845962290999644, "grad_norm": 0.8310738674783811, "learning_rate": 2.0478732149257168e-05, "loss": 0.1238, "step": 19266 }, { "epoch": 2.2847148108620896, "grad_norm": 1.0314956372654764, "learning_rate": 2.0476371389579834e-05, "loss": 0.1751, "step": 19267 }, { "epoch": 2.2848333926242144, "grad_norm": 0.5996143291581897, "learning_rate": 2.0474010671604805e-05, "loss": 0.0673, "step": 19268 }, { "epoch": 2.2849519743863396, "grad_norm": 0.8882320823984076, "learning_rate": 2.0471649995353854e-05, "loss": 0.1252, "step": 19269 }, { "epoch": 2.2850705561484643, "grad_norm": 0.5457057234219289, "learning_rate": 2.046928936084872e-05, "loss": 0.0729, "step": 19270 }, { "epoch": 2.2851891379105895, "grad_norm": 0.794058981324232, "learning_rate": 2.0466928768111197e-05, "loss": 0.1007, "step": 19271 }, { "epoch": 2.2853077196727143, "grad_norm": 0.6705227257694566, "learning_rate": 2.046456821716303e-05, "loss": 0.0879, "step": 19272 }, { "epoch": 2.2854263014348395, "grad_norm": 0.614133904929565, "learning_rate": 2.0462207708025975e-05, "loss": 0.0862, "step": 19273 }, { "epoch": 2.2855448831969642, "grad_norm": 0.794170195682687, "learning_rate": 2.0459847240721805e-05, "loss": 0.1047, "step": 19274 }, { "epoch": 2.2856634649590895, "grad_norm": 0.7859942709335062, "learning_rate": 2.045748681527227e-05, "loss": 0.0988, "step": 19275 }, { "epoch": 2.285782046721214, "grad_norm": 0.8247297894699633, "learning_rate": 2.045512643169915e-05, "loss": 0.1052, "step": 19276 }, { "epoch": 2.2859006284833394, "grad_norm": 0.835556872723295, "learning_rate": 2.045276609002417e-05, "loss": 0.132, "step": 19277 }, { "epoch": 2.286019210245464, "grad_norm": 0.5603548678961798, "learning_rate": 2.045040579026913e-05, "loss": 0.0792, "step": 19278 }, { "epoch": 2.2861377920075894, "grad_norm": 0.5783226199260886, "learning_rate": 2.0448045532455767e-05, "loss": 0.0863, "step": 19279 }, { "epoch": 2.286256373769714, "grad_norm": 0.5599909355644187, "learning_rate": 2.044568531660584e-05, "loss": 0.0757, "step": 19280 }, { "epoch": 2.2863749555318393, "grad_norm": 0.7511329034126231, "learning_rate": 2.04433251427411e-05, "loss": 0.0939, "step": 19281 }, { "epoch": 2.286493537293964, "grad_norm": 0.5933164546793431, "learning_rate": 2.0440965010883327e-05, "loss": 0.0948, "step": 19282 }, { "epoch": 2.2866121190560893, "grad_norm": 0.7132617980035278, "learning_rate": 2.0438604921054257e-05, "loss": 0.0867, "step": 19283 }, { "epoch": 2.286730700818214, "grad_norm": 0.6911315952089401, "learning_rate": 2.0436244873275664e-05, "loss": 0.0968, "step": 19284 }, { "epoch": 2.2868492825803393, "grad_norm": 0.8220070590437693, "learning_rate": 2.0433884867569285e-05, "loss": 0.1102, "step": 19285 }, { "epoch": 2.286967864342464, "grad_norm": 0.7317384016344366, "learning_rate": 2.04315249039569e-05, "loss": 0.1085, "step": 19286 }, { "epoch": 2.287086446104589, "grad_norm": 0.6905703741230856, "learning_rate": 2.0429164982460253e-05, "loss": 0.0727, "step": 19287 }, { "epoch": 2.287205027866714, "grad_norm": 0.6690898808587948, "learning_rate": 2.04268051031011e-05, "loss": 0.1058, "step": 19288 }, { "epoch": 2.287323609628839, "grad_norm": 0.8076604350454905, "learning_rate": 2.0424445265901183e-05, "loss": 0.1148, "step": 19289 }, { "epoch": 2.287442191390964, "grad_norm": 0.8817845378262333, "learning_rate": 2.0422085470882283e-05, "loss": 0.124, "step": 19290 }, { "epoch": 2.287560773153089, "grad_norm": 0.5578120811937483, "learning_rate": 2.0419725718066142e-05, "loss": 0.0768, "step": 19291 }, { "epoch": 2.287679354915214, "grad_norm": 0.757299570701759, "learning_rate": 2.04173660074745e-05, "loss": 0.0894, "step": 19292 }, { "epoch": 2.287797936677339, "grad_norm": 0.7164330194254496, "learning_rate": 2.0415006339129132e-05, "loss": 0.1019, "step": 19293 }, { "epoch": 2.287916518439464, "grad_norm": 0.5777314068762575, "learning_rate": 2.041264671305178e-05, "loss": 0.0767, "step": 19294 }, { "epoch": 2.288035100201589, "grad_norm": 0.46176258401088255, "learning_rate": 2.041028712926421e-05, "loss": 0.0645, "step": 19295 }, { "epoch": 2.288153681963714, "grad_norm": 0.7006649148980835, "learning_rate": 2.0407927587788148e-05, "loss": 0.0755, "step": 19296 }, { "epoch": 2.288272263725839, "grad_norm": 0.6438573255623036, "learning_rate": 2.0405568088645375e-05, "loss": 0.0852, "step": 19297 }, { "epoch": 2.288390845487964, "grad_norm": 0.7736884705678021, "learning_rate": 2.040320863185763e-05, "loss": 0.111, "step": 19298 }, { "epoch": 2.288509427250089, "grad_norm": 0.6824398736791147, "learning_rate": 2.0400849217446663e-05, "loss": 0.0953, "step": 19299 }, { "epoch": 2.2886280090122137, "grad_norm": 0.7062430605090256, "learning_rate": 2.0398489845434218e-05, "loss": 0.1057, "step": 19300 }, { "epoch": 2.288746590774339, "grad_norm": 0.7130453320070889, "learning_rate": 2.0396130515842064e-05, "loss": 0.0848, "step": 19301 }, { "epoch": 2.2888651725364637, "grad_norm": 0.9635573734868425, "learning_rate": 2.0393771228691935e-05, "loss": 0.1383, "step": 19302 }, { "epoch": 2.288983754298589, "grad_norm": 0.7093140359420517, "learning_rate": 2.0391411984005587e-05, "loss": 0.1215, "step": 19303 }, { "epoch": 2.2891023360607137, "grad_norm": 0.7187973817434513, "learning_rate": 2.038905278180477e-05, "loss": 0.0952, "step": 19304 }, { "epoch": 2.289220917822839, "grad_norm": 0.5674224206215714, "learning_rate": 2.0386693622111236e-05, "loss": 0.0793, "step": 19305 }, { "epoch": 2.2893394995849636, "grad_norm": 0.9287261589838464, "learning_rate": 2.038433450494673e-05, "loss": 0.1031, "step": 19306 }, { "epoch": 2.289458081347089, "grad_norm": 0.681139716228886, "learning_rate": 2.038197543033299e-05, "loss": 0.1086, "step": 19307 }, { "epoch": 2.289576663109214, "grad_norm": 0.6809644867244221, "learning_rate": 2.0379616398291783e-05, "loss": 0.1124, "step": 19308 }, { "epoch": 2.289695244871339, "grad_norm": 0.4533257922626541, "learning_rate": 2.0377257408844848e-05, "loss": 0.0554, "step": 19309 }, { "epoch": 2.2898138266334636, "grad_norm": 1.1568026967715022, "learning_rate": 2.0374898462013926e-05, "loss": 0.1213, "step": 19310 }, { "epoch": 2.2899324083955888, "grad_norm": 0.6118144235826306, "learning_rate": 2.0372539557820763e-05, "loss": 0.0699, "step": 19311 }, { "epoch": 2.290050990157714, "grad_norm": 0.4871498127907941, "learning_rate": 2.0370180696287123e-05, "loss": 0.0702, "step": 19312 }, { "epoch": 2.2901695719198387, "grad_norm": 0.6767891766358276, "learning_rate": 2.036782187743473e-05, "loss": 0.1042, "step": 19313 }, { "epoch": 2.2902881536819635, "grad_norm": 0.9115089920770655, "learning_rate": 2.036546310128535e-05, "loss": 0.1102, "step": 19314 }, { "epoch": 2.2904067354440887, "grad_norm": 0.7640385535612699, "learning_rate": 2.03631043678607e-05, "loss": 0.1021, "step": 19315 }, { "epoch": 2.290525317206214, "grad_norm": 1.0970011399877608, "learning_rate": 2.0360745677182553e-05, "loss": 0.1494, "step": 19316 }, { "epoch": 2.2906438989683386, "grad_norm": 0.568876312567954, "learning_rate": 2.0358387029272645e-05, "loss": 0.0893, "step": 19317 }, { "epoch": 2.290762480730464, "grad_norm": 1.3684796768263194, "learning_rate": 2.0356028424152703e-05, "loss": 0.0907, "step": 19318 }, { "epoch": 2.2908810624925886, "grad_norm": 0.618732165248974, "learning_rate": 2.0353669861844497e-05, "loss": 0.099, "step": 19319 }, { "epoch": 2.290999644254714, "grad_norm": 0.6925458573927483, "learning_rate": 2.0351311342369755e-05, "loss": 0.1048, "step": 19320 }, { "epoch": 2.2911182260168386, "grad_norm": 0.6906037527765264, "learning_rate": 2.0348952865750224e-05, "loss": 0.0869, "step": 19321 }, { "epoch": 2.2912368077789638, "grad_norm": 1.0020190904136268, "learning_rate": 2.0346594432007633e-05, "loss": 0.1454, "step": 19322 }, { "epoch": 2.2913553895410885, "grad_norm": 0.8391588965852007, "learning_rate": 2.0344236041163744e-05, "loss": 0.1058, "step": 19323 }, { "epoch": 2.2914739713032137, "grad_norm": 0.796058085391224, "learning_rate": 2.0341877693240285e-05, "loss": 0.0982, "step": 19324 }, { "epoch": 2.2915925530653385, "grad_norm": 0.5253463575448818, "learning_rate": 2.0339519388259008e-05, "loss": 0.0782, "step": 19325 }, { "epoch": 2.2917111348274637, "grad_norm": 0.569674246338537, "learning_rate": 2.0337161126241637e-05, "loss": 0.0788, "step": 19326 }, { "epoch": 2.2918297165895885, "grad_norm": 0.6708297197753342, "learning_rate": 2.0334802907209928e-05, "loss": 0.086, "step": 19327 }, { "epoch": 2.2919482983517137, "grad_norm": 0.8561609047259156, "learning_rate": 2.0332444731185622e-05, "loss": 0.1026, "step": 19328 }, { "epoch": 2.2920668801138384, "grad_norm": 0.6976312464079919, "learning_rate": 2.0330086598190448e-05, "loss": 0.1081, "step": 19329 }, { "epoch": 2.2921854618759636, "grad_norm": 0.6358461910508553, "learning_rate": 2.0327728508246137e-05, "loss": 0.0847, "step": 19330 }, { "epoch": 2.2923040436380884, "grad_norm": 0.46136079307948147, "learning_rate": 2.0325370461374454e-05, "loss": 0.073, "step": 19331 }, { "epoch": 2.2924226254002136, "grad_norm": 0.521142676788854, "learning_rate": 2.0323012457597117e-05, "loss": 0.074, "step": 19332 }, { "epoch": 2.2925412071623383, "grad_norm": 1.0133053034094206, "learning_rate": 2.0320654496935867e-05, "loss": 0.1414, "step": 19333 }, { "epoch": 2.2926597889244635, "grad_norm": 0.7068490434045177, "learning_rate": 2.0318296579412448e-05, "loss": 0.1054, "step": 19334 }, { "epoch": 2.2927783706865883, "grad_norm": 0.7662328500064516, "learning_rate": 2.0315938705048596e-05, "loss": 0.1159, "step": 19335 }, { "epoch": 2.2928969524487135, "grad_norm": 0.9891864842675068, "learning_rate": 2.0313580873866046e-05, "loss": 0.0987, "step": 19336 }, { "epoch": 2.2930155342108383, "grad_norm": 0.715012608535461, "learning_rate": 2.0311223085886524e-05, "loss": 0.0791, "step": 19337 }, { "epoch": 2.2931341159729635, "grad_norm": 0.5511383636788786, "learning_rate": 2.030886534113178e-05, "loss": 0.0792, "step": 19338 }, { "epoch": 2.293252697735088, "grad_norm": 0.7023454532013924, "learning_rate": 2.0306507639623556e-05, "loss": 0.0876, "step": 19339 }, { "epoch": 2.2933712794972134, "grad_norm": 0.7764746978587386, "learning_rate": 2.030414998138357e-05, "loss": 0.1156, "step": 19340 }, { "epoch": 2.293489861259338, "grad_norm": 0.9529682454048283, "learning_rate": 2.030179236643355e-05, "loss": 0.1301, "step": 19341 }, { "epoch": 2.2936084430214634, "grad_norm": 0.7430733999273693, "learning_rate": 2.0299434794795257e-05, "loss": 0.0812, "step": 19342 }, { "epoch": 2.293727024783588, "grad_norm": 0.7215794310711059, "learning_rate": 2.0297077266490405e-05, "loss": 0.1109, "step": 19343 }, { "epoch": 2.2938456065457133, "grad_norm": 0.5390012830071894, "learning_rate": 2.029471978154074e-05, "loss": 0.0809, "step": 19344 }, { "epoch": 2.293964188307838, "grad_norm": 0.6825101403959933, "learning_rate": 2.0292362339967975e-05, "loss": 0.0968, "step": 19345 }, { "epoch": 2.2940827700699633, "grad_norm": 0.7646323010000067, "learning_rate": 2.0290004941793865e-05, "loss": 0.1191, "step": 19346 }, { "epoch": 2.294201351832088, "grad_norm": 0.7360976645823127, "learning_rate": 2.028764758704014e-05, "loss": 0.1162, "step": 19347 }, { "epoch": 2.2943199335942133, "grad_norm": 0.5735217346581379, "learning_rate": 2.028529027572851e-05, "loss": 0.079, "step": 19348 }, { "epoch": 2.294438515356338, "grad_norm": 0.5705181140148529, "learning_rate": 2.0282933007880736e-05, "loss": 0.071, "step": 19349 }, { "epoch": 2.2945570971184632, "grad_norm": 0.7607344717948376, "learning_rate": 2.0280575783518533e-05, "loss": 0.1298, "step": 19350 }, { "epoch": 2.294675678880588, "grad_norm": 0.7055793617239015, "learning_rate": 2.0278218602663628e-05, "loss": 0.0923, "step": 19351 }, { "epoch": 2.294794260642713, "grad_norm": 0.6478123711846586, "learning_rate": 2.027586146533776e-05, "loss": 0.0924, "step": 19352 }, { "epoch": 2.294912842404838, "grad_norm": 0.8516234965282263, "learning_rate": 2.0273504371562653e-05, "loss": 0.1169, "step": 19353 }, { "epoch": 2.295031424166963, "grad_norm": 0.7441171406620491, "learning_rate": 2.027114732136005e-05, "loss": 0.1105, "step": 19354 }, { "epoch": 2.295150005929088, "grad_norm": 0.7032662265203407, "learning_rate": 2.0268790314751664e-05, "loss": 0.091, "step": 19355 }, { "epoch": 2.295268587691213, "grad_norm": 0.8590446707047361, "learning_rate": 2.0266433351759224e-05, "loss": 0.1009, "step": 19356 }, { "epoch": 2.2953871694533383, "grad_norm": 0.6889212627535122, "learning_rate": 2.026407643240447e-05, "loss": 0.0851, "step": 19357 }, { "epoch": 2.295505751215463, "grad_norm": 0.721944027318995, "learning_rate": 2.0261719556709126e-05, "loss": 0.1031, "step": 19358 }, { "epoch": 2.295624332977588, "grad_norm": 1.0515953115492114, "learning_rate": 2.0259362724694915e-05, "loss": 0.1522, "step": 19359 }, { "epoch": 2.295742914739713, "grad_norm": 0.6652391608242114, "learning_rate": 2.025700593638356e-05, "loss": 0.1073, "step": 19360 }, { "epoch": 2.2958614965018382, "grad_norm": 0.7777475958803388, "learning_rate": 2.0254649191796802e-05, "loss": 0.1035, "step": 19361 }, { "epoch": 2.295980078263963, "grad_norm": 0.7669133282714261, "learning_rate": 2.0252292490956353e-05, "loss": 0.12, "step": 19362 }, { "epoch": 2.2960986600260878, "grad_norm": 0.5629293244182242, "learning_rate": 2.024993583388395e-05, "loss": 0.0824, "step": 19363 }, { "epoch": 2.296217241788213, "grad_norm": 0.7960076991474768, "learning_rate": 2.024757922060131e-05, "loss": 0.0958, "step": 19364 }, { "epoch": 2.296335823550338, "grad_norm": 0.5642739092275387, "learning_rate": 2.0245222651130165e-05, "loss": 0.0908, "step": 19365 }, { "epoch": 2.296454405312463, "grad_norm": 0.8405023551154287, "learning_rate": 2.0242866125492237e-05, "loss": 0.1397, "step": 19366 }, { "epoch": 2.2965729870745877, "grad_norm": 0.6988088310085141, "learning_rate": 2.0240509643709243e-05, "loss": 0.0845, "step": 19367 }, { "epoch": 2.296691568836713, "grad_norm": 0.8135788810703107, "learning_rate": 2.0238153205802925e-05, "loss": 0.1169, "step": 19368 }, { "epoch": 2.296810150598838, "grad_norm": 0.5269513662238885, "learning_rate": 2.0235796811794987e-05, "loss": 0.0777, "step": 19369 }, { "epoch": 2.296928732360963, "grad_norm": 0.6847598748246339, "learning_rate": 2.0233440461707168e-05, "loss": 0.0897, "step": 19370 }, { "epoch": 2.297047314123088, "grad_norm": 0.5184045271664632, "learning_rate": 2.0231084155561168e-05, "loss": 0.0827, "step": 19371 }, { "epoch": 2.297165895885213, "grad_norm": 0.7976048528659898, "learning_rate": 2.0228727893378737e-05, "loss": 0.105, "step": 19372 }, { "epoch": 2.297284477647338, "grad_norm": 1.0686901238373245, "learning_rate": 2.0226371675181577e-05, "loss": 0.1253, "step": 19373 }, { "epoch": 2.2974030594094628, "grad_norm": 0.5685958190872293, "learning_rate": 2.0224015500991422e-05, "loss": 0.0818, "step": 19374 }, { "epoch": 2.297521641171588, "grad_norm": 0.7821769426985752, "learning_rate": 2.0221659370829975e-05, "loss": 0.1105, "step": 19375 }, { "epoch": 2.2976402229337127, "grad_norm": 0.7013230082452497, "learning_rate": 2.0219303284718985e-05, "loss": 0.0728, "step": 19376 }, { "epoch": 2.297758804695838, "grad_norm": 0.7131972188536402, "learning_rate": 2.0216947242680152e-05, "loss": 0.07, "step": 19377 }, { "epoch": 2.2978773864579627, "grad_norm": 0.555032406529683, "learning_rate": 2.021459124473519e-05, "loss": 0.0899, "step": 19378 }, { "epoch": 2.297995968220088, "grad_norm": 0.809217341117534, "learning_rate": 2.021223529090584e-05, "loss": 0.1239, "step": 19379 }, { "epoch": 2.2981145499822127, "grad_norm": 0.889689873959065, "learning_rate": 2.020987938121381e-05, "loss": 0.1335, "step": 19380 }, { "epoch": 2.298233131744338, "grad_norm": 0.6196681059220045, "learning_rate": 2.0207523515680813e-05, "loss": 0.0943, "step": 19381 }, { "epoch": 2.2983517135064626, "grad_norm": 0.5229136358161144, "learning_rate": 2.020516769432857e-05, "loss": 0.0564, "step": 19382 }, { "epoch": 2.298470295268588, "grad_norm": 0.8261892219418496, "learning_rate": 2.02028119171788e-05, "loss": 0.0965, "step": 19383 }, { "epoch": 2.2985888770307126, "grad_norm": 0.5549836804729648, "learning_rate": 2.020045618425323e-05, "loss": 0.0924, "step": 19384 }, { "epoch": 2.2987074587928378, "grad_norm": 0.6017332266395271, "learning_rate": 2.019810049557357e-05, "loss": 0.0898, "step": 19385 }, { "epoch": 2.2988260405549625, "grad_norm": 0.6453493911874947, "learning_rate": 2.019574485116152e-05, "loss": 0.0995, "step": 19386 }, { "epoch": 2.2989446223170877, "grad_norm": 0.8825344014456329, "learning_rate": 2.0193389251038823e-05, "loss": 0.0846, "step": 19387 }, { "epoch": 2.2990632040792125, "grad_norm": 0.6262424115062848, "learning_rate": 2.019103369522718e-05, "loss": 0.0888, "step": 19388 }, { "epoch": 2.2991817858413377, "grad_norm": 0.816148150850977, "learning_rate": 2.0188678183748306e-05, "loss": 0.131, "step": 19389 }, { "epoch": 2.2993003676034625, "grad_norm": 0.7321381054505017, "learning_rate": 2.0186322716623924e-05, "loss": 0.1103, "step": 19390 }, { "epoch": 2.2994189493655877, "grad_norm": 0.4310172926841919, "learning_rate": 2.0183967293875745e-05, "loss": 0.0635, "step": 19391 }, { "epoch": 2.2995375311277124, "grad_norm": 0.5464152998705958, "learning_rate": 2.0181611915525475e-05, "loss": 0.0839, "step": 19392 }, { "epoch": 2.2996561128898376, "grad_norm": 0.6433442539813817, "learning_rate": 2.0179256581594836e-05, "loss": 0.0782, "step": 19393 }, { "epoch": 2.2997746946519624, "grad_norm": 0.623256417167043, "learning_rate": 2.0176901292105542e-05, "loss": 0.098, "step": 19394 }, { "epoch": 2.2998932764140876, "grad_norm": 0.7777457103637672, "learning_rate": 2.0174546047079305e-05, "loss": 0.0926, "step": 19395 }, { "epoch": 2.3000118581762123, "grad_norm": 0.6584474517268538, "learning_rate": 2.0172190846537835e-05, "loss": 0.0827, "step": 19396 }, { "epoch": 2.3001304399383375, "grad_norm": 0.7838229857700109, "learning_rate": 2.0169835690502838e-05, "loss": 0.1172, "step": 19397 }, { "epoch": 2.3002490217004623, "grad_norm": 0.5541647393464497, "learning_rate": 2.0167480578996044e-05, "loss": 0.0788, "step": 19398 }, { "epoch": 2.3003676034625875, "grad_norm": 0.8564196077741129, "learning_rate": 2.016512551203915e-05, "loss": 0.1234, "step": 19399 }, { "epoch": 2.3004861852247123, "grad_norm": 0.48585531127661735, "learning_rate": 2.0162770489653874e-05, "loss": 0.0599, "step": 19400 }, { "epoch": 2.3006047669868375, "grad_norm": 0.7946132021560789, "learning_rate": 2.0160415511861907e-05, "loss": 0.1265, "step": 19401 }, { "epoch": 2.3007233487489622, "grad_norm": 0.5250935147066745, "learning_rate": 2.0158060578684988e-05, "loss": 0.0773, "step": 19402 }, { "epoch": 2.3008419305110874, "grad_norm": 0.6876997593857951, "learning_rate": 2.0155705690144808e-05, "loss": 0.0837, "step": 19403 }, { "epoch": 2.300960512273212, "grad_norm": 0.8175745394616659, "learning_rate": 2.015335084626308e-05, "loss": 0.1044, "step": 19404 }, { "epoch": 2.3010790940353374, "grad_norm": 0.77155422653049, "learning_rate": 2.0150996047061513e-05, "loss": 0.1158, "step": 19405 }, { "epoch": 2.3011976757974626, "grad_norm": 0.6701007967267799, "learning_rate": 2.014864129256182e-05, "loss": 0.0889, "step": 19406 }, { "epoch": 2.3013162575595874, "grad_norm": 0.7100427568870292, "learning_rate": 2.0146286582785707e-05, "loss": 0.122, "step": 19407 }, { "epoch": 2.301434839321712, "grad_norm": 1.0020926084534914, "learning_rate": 2.0143931917754872e-05, "loss": 0.1162, "step": 19408 }, { "epoch": 2.3015534210838373, "grad_norm": 0.6957996751124674, "learning_rate": 2.0141577297491037e-05, "loss": 0.1004, "step": 19409 }, { "epoch": 2.3016720028459625, "grad_norm": 1.0593699126365925, "learning_rate": 2.01392227220159e-05, "loss": 0.1523, "step": 19410 }, { "epoch": 2.3017905846080873, "grad_norm": 0.7729142198656017, "learning_rate": 2.013686819135117e-05, "loss": 0.088, "step": 19411 }, { "epoch": 2.301909166370212, "grad_norm": 0.7652837867341464, "learning_rate": 2.0134513705518547e-05, "loss": 0.0834, "step": 19412 }, { "epoch": 2.3020277481323372, "grad_norm": 0.5286088537099453, "learning_rate": 2.013215926453974e-05, "loss": 0.0806, "step": 19413 }, { "epoch": 2.3021463298944624, "grad_norm": 0.8927823380437614, "learning_rate": 2.0129804868436463e-05, "loss": 0.12, "step": 19414 }, { "epoch": 2.302264911656587, "grad_norm": 0.7729137957577683, "learning_rate": 2.0127450517230413e-05, "loss": 0.1176, "step": 19415 }, { "epoch": 2.302383493418712, "grad_norm": 0.5047947033245339, "learning_rate": 2.012509621094328e-05, "loss": 0.0637, "step": 19416 }, { "epoch": 2.302502075180837, "grad_norm": 0.5972869187641996, "learning_rate": 2.0122741949596797e-05, "loss": 0.0836, "step": 19417 }, { "epoch": 2.3026206569429624, "grad_norm": 0.5153627445155109, "learning_rate": 2.0120387733212647e-05, "loss": 0.0709, "step": 19418 }, { "epoch": 2.302739238705087, "grad_norm": 0.5242397418858895, "learning_rate": 2.0118033561812532e-05, "loss": 0.0817, "step": 19419 }, { "epoch": 2.3028578204672123, "grad_norm": 1.0243478575828784, "learning_rate": 2.011567943541817e-05, "loss": 0.1539, "step": 19420 }, { "epoch": 2.302976402229337, "grad_norm": 0.6450694231047359, "learning_rate": 2.011332535405126e-05, "loss": 0.0842, "step": 19421 }, { "epoch": 2.3030949839914623, "grad_norm": 0.5625627551443518, "learning_rate": 2.0110971317733485e-05, "loss": 0.0744, "step": 19422 }, { "epoch": 2.303213565753587, "grad_norm": 1.1015133644573898, "learning_rate": 2.010861732648656e-05, "loss": 0.147, "step": 19423 }, { "epoch": 2.3033321475157122, "grad_norm": 0.8166247211058721, "learning_rate": 2.0106263380332186e-05, "loss": 0.1348, "step": 19424 }, { "epoch": 2.303450729277837, "grad_norm": 0.7481777340703266, "learning_rate": 2.010390947929207e-05, "loss": 0.0935, "step": 19425 }, { "epoch": 2.303569311039962, "grad_norm": 0.656004992376425, "learning_rate": 2.0101555623387902e-05, "loss": 0.102, "step": 19426 }, { "epoch": 2.303687892802087, "grad_norm": 0.7369271646673531, "learning_rate": 2.0099201812641375e-05, "loss": 0.0983, "step": 19427 }, { "epoch": 2.303806474564212, "grad_norm": 0.751544507553673, "learning_rate": 2.0096848047074207e-05, "loss": 0.0907, "step": 19428 }, { "epoch": 2.303925056326337, "grad_norm": 0.8796345881474645, "learning_rate": 2.0094494326708088e-05, "loss": 0.1373, "step": 19429 }, { "epoch": 2.304043638088462, "grad_norm": 0.6482901845082302, "learning_rate": 2.0092140651564712e-05, "loss": 0.0839, "step": 19430 }, { "epoch": 2.304162219850587, "grad_norm": 0.8263887947010895, "learning_rate": 2.0089787021665778e-05, "loss": 0.1266, "step": 19431 }, { "epoch": 2.304280801612712, "grad_norm": 0.6051037436955741, "learning_rate": 2.008743343703299e-05, "loss": 0.0843, "step": 19432 }, { "epoch": 2.304399383374837, "grad_norm": 0.6036957418789926, "learning_rate": 2.0085079897688043e-05, "loss": 0.0675, "step": 19433 }, { "epoch": 2.304517965136962, "grad_norm": 0.7402950804968166, "learning_rate": 2.008272640365262e-05, "loss": 0.0852, "step": 19434 }, { "epoch": 2.304636546899087, "grad_norm": 0.8162108572827044, "learning_rate": 2.0080372954948446e-05, "loss": 0.1047, "step": 19435 }, { "epoch": 2.304755128661212, "grad_norm": 0.7433492840701581, "learning_rate": 2.0078019551597198e-05, "loss": 0.0862, "step": 19436 }, { "epoch": 2.3048737104233368, "grad_norm": 0.6105505317058032, "learning_rate": 2.0075666193620573e-05, "loss": 0.0712, "step": 19437 }, { "epoch": 2.304992292185462, "grad_norm": 0.7660922974364301, "learning_rate": 2.0073312881040255e-05, "loss": 0.0944, "step": 19438 }, { "epoch": 2.3051108739475867, "grad_norm": 0.821975394552258, "learning_rate": 2.0070959613877966e-05, "loss": 0.0989, "step": 19439 }, { "epoch": 2.305229455709712, "grad_norm": 0.8277305675160203, "learning_rate": 2.0068606392155383e-05, "loss": 0.1086, "step": 19440 }, { "epoch": 2.3053480374718367, "grad_norm": 0.7460435929851807, "learning_rate": 2.0066253215894196e-05, "loss": 0.1258, "step": 19441 }, { "epoch": 2.305466619233962, "grad_norm": 0.9109004690029164, "learning_rate": 2.0063900085116104e-05, "loss": 0.0999, "step": 19442 }, { "epoch": 2.3055852009960867, "grad_norm": 0.47888685411980325, "learning_rate": 2.00615469998428e-05, "loss": 0.0726, "step": 19443 }, { "epoch": 2.305703782758212, "grad_norm": 0.7052707637708844, "learning_rate": 2.0059193960095986e-05, "loss": 0.0749, "step": 19444 }, { "epoch": 2.3058223645203366, "grad_norm": 0.8710302350129289, "learning_rate": 2.005684096589734e-05, "loss": 0.0895, "step": 19445 }, { "epoch": 2.305940946282462, "grad_norm": 0.6981314003104832, "learning_rate": 2.0054488017268545e-05, "loss": 0.0773, "step": 19446 }, { "epoch": 2.3060595280445866, "grad_norm": 0.8288291128569834, "learning_rate": 2.0052135114231324e-05, "loss": 0.1007, "step": 19447 }, { "epoch": 2.306178109806712, "grad_norm": 0.9429552203417806, "learning_rate": 2.004978225680735e-05, "loss": 0.1384, "step": 19448 }, { "epoch": 2.3062966915688365, "grad_norm": 0.5643267167430157, "learning_rate": 2.0047429445018296e-05, "loss": 0.0789, "step": 19449 }, { "epoch": 2.3064152733309617, "grad_norm": 0.635706988858197, "learning_rate": 2.0045076678885882e-05, "loss": 0.08, "step": 19450 }, { "epoch": 2.3065338550930865, "grad_norm": 0.8312213620431713, "learning_rate": 2.0042723958431786e-05, "loss": 0.1007, "step": 19451 }, { "epoch": 2.3066524368552117, "grad_norm": 0.6120616342568947, "learning_rate": 2.0040371283677693e-05, "loss": 0.0875, "step": 19452 }, { "epoch": 2.3067710186173365, "grad_norm": 0.6450283327682592, "learning_rate": 2.003801865464529e-05, "loss": 0.0834, "step": 19453 }, { "epoch": 2.3068896003794617, "grad_norm": 0.8945899152545437, "learning_rate": 2.0035666071356274e-05, "loss": 0.1149, "step": 19454 }, { "epoch": 2.307008182141587, "grad_norm": 0.6910211025401297, "learning_rate": 2.0033313533832336e-05, "loss": 0.0984, "step": 19455 }, { "epoch": 2.3071267639037116, "grad_norm": 1.2470675877366493, "learning_rate": 2.0030961042095154e-05, "loss": 0.1984, "step": 19456 }, { "epoch": 2.3072453456658364, "grad_norm": 0.651441747875577, "learning_rate": 2.0028608596166405e-05, "loss": 0.0751, "step": 19457 }, { "epoch": 2.3073639274279616, "grad_norm": 0.5766297427246903, "learning_rate": 2.0026256196067803e-05, "loss": 0.086, "step": 19458 }, { "epoch": 2.307482509190087, "grad_norm": 0.7825650915137339, "learning_rate": 2.002390384182102e-05, "loss": 0.1012, "step": 19459 }, { "epoch": 2.3076010909522116, "grad_norm": 0.5179230206807838, "learning_rate": 2.0021551533447724e-05, "loss": 0.0582, "step": 19460 }, { "epoch": 2.3077196727143363, "grad_norm": 0.8421475564730062, "learning_rate": 2.0019199270969635e-05, "loss": 0.1182, "step": 19461 }, { "epoch": 2.3078382544764615, "grad_norm": 0.7663389612902954, "learning_rate": 2.0016847054408412e-05, "loss": 0.1021, "step": 19462 }, { "epoch": 2.3079568362385867, "grad_norm": 0.7094299579522881, "learning_rate": 2.0014494883785757e-05, "loss": 0.087, "step": 19463 }, { "epoch": 2.3080754180007115, "grad_norm": 1.0277254292117632, "learning_rate": 2.0012142759123334e-05, "loss": 0.1384, "step": 19464 }, { "epoch": 2.3081939997628362, "grad_norm": 0.6419057075382641, "learning_rate": 2.0009790680442846e-05, "loss": 0.1063, "step": 19465 }, { "epoch": 2.3083125815249614, "grad_norm": 0.591407046282619, "learning_rate": 2.000743864776597e-05, "loss": 0.0925, "step": 19466 }, { "epoch": 2.3084311632870866, "grad_norm": 0.6204018941866264, "learning_rate": 2.000508666111439e-05, "loss": 0.0793, "step": 19467 }, { "epoch": 2.3085497450492114, "grad_norm": 0.5998293286823643, "learning_rate": 2.0002734720509776e-05, "loss": 0.084, "step": 19468 }, { "epoch": 2.3086683268113366, "grad_norm": 0.8942062966524026, "learning_rate": 2.0000382825973828e-05, "loss": 0.0921, "step": 19469 }, { "epoch": 2.3087869085734614, "grad_norm": 1.147351945375658, "learning_rate": 1.9998030977528218e-05, "loss": 0.1823, "step": 19470 }, { "epoch": 2.3089054903355866, "grad_norm": 0.6449766986417694, "learning_rate": 1.9995679175194625e-05, "loss": 0.0921, "step": 19471 }, { "epoch": 2.3090240720977113, "grad_norm": 0.6412417895789546, "learning_rate": 1.999332741899473e-05, "loss": 0.0851, "step": 19472 }, { "epoch": 2.3091426538598365, "grad_norm": 0.43901846870254496, "learning_rate": 1.9990975708950223e-05, "loss": 0.0585, "step": 19473 }, { "epoch": 2.3092612356219613, "grad_norm": 0.6871945908823685, "learning_rate": 1.9988624045082776e-05, "loss": 0.1112, "step": 19474 }, { "epoch": 2.3093798173840865, "grad_norm": 0.903528615036626, "learning_rate": 1.9986272427414064e-05, "loss": 0.1077, "step": 19475 }, { "epoch": 2.3094983991462112, "grad_norm": 0.6870463688072702, "learning_rate": 1.9983920855965782e-05, "loss": 0.0811, "step": 19476 }, { "epoch": 2.3096169809083364, "grad_norm": 0.6726087995500419, "learning_rate": 1.9981569330759597e-05, "loss": 0.1008, "step": 19477 }, { "epoch": 2.309735562670461, "grad_norm": 0.5944697005781283, "learning_rate": 1.997921785181719e-05, "loss": 0.0802, "step": 19478 }, { "epoch": 2.3098541444325864, "grad_norm": 0.7383816364442289, "learning_rate": 1.9976866419160226e-05, "loss": 0.0846, "step": 19479 }, { "epoch": 2.309972726194711, "grad_norm": 0.5449047745015513, "learning_rate": 1.9974515032810405e-05, "loss": 0.068, "step": 19480 }, { "epoch": 2.3100913079568364, "grad_norm": 0.7118929265370622, "learning_rate": 1.9972163692789393e-05, "loss": 0.1415, "step": 19481 }, { "epoch": 2.310209889718961, "grad_norm": 0.6633309988168661, "learning_rate": 1.9969812399118858e-05, "loss": 0.0845, "step": 19482 }, { "epoch": 2.3103284714810863, "grad_norm": 0.44887097952780264, "learning_rate": 1.996746115182049e-05, "loss": 0.0661, "step": 19483 }, { "epoch": 2.310447053243211, "grad_norm": 0.48956358426121327, "learning_rate": 1.9965109950915956e-05, "loss": 0.071, "step": 19484 }, { "epoch": 2.3105656350053363, "grad_norm": 0.8634225340731697, "learning_rate": 1.996275879642694e-05, "loss": 0.1245, "step": 19485 }, { "epoch": 2.310684216767461, "grad_norm": 0.676481389162232, "learning_rate": 1.9960407688375106e-05, "loss": 0.0835, "step": 19486 }, { "epoch": 2.3108027985295863, "grad_norm": 1.0712256586226838, "learning_rate": 1.995805662678213e-05, "loss": 0.1262, "step": 19487 }, { "epoch": 2.310921380291711, "grad_norm": 0.7892003741933462, "learning_rate": 1.9955705611669694e-05, "loss": 0.0918, "step": 19488 }, { "epoch": 2.311039962053836, "grad_norm": 0.9584174381622288, "learning_rate": 1.995335464305947e-05, "loss": 0.0969, "step": 19489 }, { "epoch": 2.311158543815961, "grad_norm": 0.6658673835401892, "learning_rate": 1.9951003720973117e-05, "loss": 0.0812, "step": 19490 }, { "epoch": 2.311277125578086, "grad_norm": 0.5894626174079831, "learning_rate": 1.9948652845432326e-05, "loss": 0.0849, "step": 19491 }, { "epoch": 2.311395707340211, "grad_norm": 0.5959504993073242, "learning_rate": 1.9946302016458756e-05, "loss": 0.1015, "step": 19492 }, { "epoch": 2.311514289102336, "grad_norm": 0.731612608034272, "learning_rate": 1.9943951234074092e-05, "loss": 0.0945, "step": 19493 }, { "epoch": 2.311632870864461, "grad_norm": 0.5142237613350996, "learning_rate": 1.9941600498299985e-05, "loss": 0.0745, "step": 19494 }, { "epoch": 2.311751452626586, "grad_norm": 0.6311016213909606, "learning_rate": 1.993924980915813e-05, "loss": 0.0836, "step": 19495 }, { "epoch": 2.311870034388711, "grad_norm": 1.2131841800505143, "learning_rate": 1.9936899166670188e-05, "loss": 0.1118, "step": 19496 }, { "epoch": 2.311988616150836, "grad_norm": 1.2261626087689872, "learning_rate": 1.9934548570857823e-05, "loss": 0.1698, "step": 19497 }, { "epoch": 2.312107197912961, "grad_norm": 0.709818491577816, "learning_rate": 1.99321980217427e-05, "loss": 0.099, "step": 19498 }, { "epoch": 2.312225779675086, "grad_norm": 1.19255819735611, "learning_rate": 1.9929847519346507e-05, "loss": 0.1361, "step": 19499 }, { "epoch": 2.312344361437211, "grad_norm": 0.7024706404644304, "learning_rate": 1.99274970636909e-05, "loss": 0.0837, "step": 19500 }, { "epoch": 2.312462943199336, "grad_norm": 1.0499534068280045, "learning_rate": 1.992514665479755e-05, "loss": 0.1274, "step": 19501 }, { "epoch": 2.3125815249614607, "grad_norm": 0.64253994103003, "learning_rate": 1.9922796292688116e-05, "loss": 0.1061, "step": 19502 }, { "epoch": 2.312700106723586, "grad_norm": 0.8490238846701407, "learning_rate": 1.9920445977384284e-05, "loss": 0.1202, "step": 19503 }, { "epoch": 2.3128186884857107, "grad_norm": 0.512750930417355, "learning_rate": 1.991809570890771e-05, "loss": 0.0813, "step": 19504 }, { "epoch": 2.312937270247836, "grad_norm": 0.4276462797389517, "learning_rate": 1.9915745487280053e-05, "loss": 0.0593, "step": 19505 }, { "epoch": 2.3130558520099607, "grad_norm": 0.8384790174158365, "learning_rate": 1.9913395312523e-05, "loss": 0.1281, "step": 19506 }, { "epoch": 2.313174433772086, "grad_norm": 1.1063189528515829, "learning_rate": 1.99110451846582e-05, "loss": 0.1278, "step": 19507 }, { "epoch": 2.313293015534211, "grad_norm": 0.8046009780998737, "learning_rate": 1.9908695103707327e-05, "loss": 0.1046, "step": 19508 }, { "epoch": 2.313411597296336, "grad_norm": 0.447946703767494, "learning_rate": 1.990634506969203e-05, "loss": 0.0722, "step": 19509 }, { "epoch": 2.3135301790584606, "grad_norm": 0.7444092759381513, "learning_rate": 1.9903995082633997e-05, "loss": 0.0922, "step": 19510 }, { "epoch": 2.313648760820586, "grad_norm": 0.9879504724016865, "learning_rate": 1.990164514255487e-05, "loss": 0.1272, "step": 19511 }, { "epoch": 2.313767342582711, "grad_norm": 0.6344211996443779, "learning_rate": 1.9899295249476337e-05, "loss": 0.0951, "step": 19512 }, { "epoch": 2.3138859243448358, "grad_norm": 0.5900483101671112, "learning_rate": 1.989694540342003e-05, "loss": 0.0767, "step": 19513 }, { "epoch": 2.3140045061069605, "grad_norm": 0.5377512623883821, "learning_rate": 1.989459560440764e-05, "loss": 0.0534, "step": 19514 }, { "epoch": 2.3141230878690857, "grad_norm": 0.7103578712142256, "learning_rate": 1.9892245852460818e-05, "loss": 0.0918, "step": 19515 }, { "epoch": 2.314241669631211, "grad_norm": 0.8039556893095237, "learning_rate": 1.9889896147601227e-05, "loss": 0.095, "step": 19516 }, { "epoch": 2.3143602513933357, "grad_norm": 0.7845241147483656, "learning_rate": 1.9887546489850515e-05, "loss": 0.1065, "step": 19517 }, { "epoch": 2.314478833155461, "grad_norm": 0.8441020564562346, "learning_rate": 1.9885196879230367e-05, "loss": 0.1262, "step": 19518 }, { "epoch": 2.3145974149175856, "grad_norm": 0.8188597771379673, "learning_rate": 1.988284731576243e-05, "loss": 0.0847, "step": 19519 }, { "epoch": 2.314715996679711, "grad_norm": 0.5769080881484961, "learning_rate": 1.9880497799468357e-05, "loss": 0.0719, "step": 19520 }, { "epoch": 2.3148345784418356, "grad_norm": 0.9425391382660115, "learning_rate": 1.9878148330369826e-05, "loss": 0.1239, "step": 19521 }, { "epoch": 2.314953160203961, "grad_norm": 0.6535475198077045, "learning_rate": 1.987579890848848e-05, "loss": 0.0764, "step": 19522 }, { "epoch": 2.3150717419660856, "grad_norm": 0.6450343843113554, "learning_rate": 1.987344953384599e-05, "loss": 0.0978, "step": 19523 }, { "epoch": 2.3151903237282108, "grad_norm": 0.6999815375798448, "learning_rate": 1.9871100206464e-05, "loss": 0.102, "step": 19524 }, { "epoch": 2.3153089054903355, "grad_norm": 0.9815367797233678, "learning_rate": 1.9868750926364185e-05, "loss": 0.0998, "step": 19525 }, { "epoch": 2.3154274872524607, "grad_norm": 1.1408641534879014, "learning_rate": 1.9866401693568195e-05, "loss": 0.1292, "step": 19526 }, { "epoch": 2.3155460690145855, "grad_norm": 0.8075052508368438, "learning_rate": 1.9864052508097684e-05, "loss": 0.0804, "step": 19527 }, { "epoch": 2.3156646507767107, "grad_norm": 0.8112218612023014, "learning_rate": 1.9861703369974306e-05, "loss": 0.0784, "step": 19528 }, { "epoch": 2.3157832325388354, "grad_norm": 1.040371198222734, "learning_rate": 1.9859354279219728e-05, "loss": 0.1204, "step": 19529 }, { "epoch": 2.3159018143009606, "grad_norm": 1.0551748277403288, "learning_rate": 1.98570052358556e-05, "loss": 0.1203, "step": 19530 }, { "epoch": 2.3160203960630854, "grad_norm": 0.7034080004619069, "learning_rate": 1.9854656239903574e-05, "loss": 0.0783, "step": 19531 }, { "epoch": 2.3161389778252106, "grad_norm": 0.7760391319543551, "learning_rate": 1.9852307291385306e-05, "loss": 0.0825, "step": 19532 }, { "epoch": 2.3162575595873354, "grad_norm": 0.812389387933463, "learning_rate": 1.984995839032245e-05, "loss": 0.106, "step": 19533 }, { "epoch": 2.3163761413494606, "grad_norm": 1.1819771022325625, "learning_rate": 1.984760953673667e-05, "loss": 0.1921, "step": 19534 }, { "epoch": 2.3164947231115853, "grad_norm": 0.6942867081654149, "learning_rate": 1.9845260730649602e-05, "loss": 0.0933, "step": 19535 }, { "epoch": 2.3166133048737105, "grad_norm": 0.6423297314158021, "learning_rate": 1.984291197208292e-05, "loss": 0.0657, "step": 19536 }, { "epoch": 2.3167318866358353, "grad_norm": 1.3269219865232027, "learning_rate": 1.9840563261058265e-05, "loss": 0.1905, "step": 19537 }, { "epoch": 2.3168504683979605, "grad_norm": 0.8632993360116618, "learning_rate": 1.9838214597597287e-05, "loss": 0.0887, "step": 19538 }, { "epoch": 2.3169690501600853, "grad_norm": 0.6089540378495435, "learning_rate": 1.9835865981721633e-05, "loss": 0.0844, "step": 19539 }, { "epoch": 2.3170876319222105, "grad_norm": 0.7190937806357365, "learning_rate": 1.983351741345297e-05, "loss": 0.0986, "step": 19540 }, { "epoch": 2.317206213684335, "grad_norm": 0.7009237675292992, "learning_rate": 1.9831168892812937e-05, "loss": 0.0973, "step": 19541 }, { "epoch": 2.3173247954464604, "grad_norm": 0.7459161027002865, "learning_rate": 1.9828820419823197e-05, "loss": 0.0993, "step": 19542 }, { "epoch": 2.317443377208585, "grad_norm": 0.8311516293764057, "learning_rate": 1.9826471994505376e-05, "loss": 0.107, "step": 19543 }, { "epoch": 2.3175619589707104, "grad_norm": 0.707807650933055, "learning_rate": 1.982412361688115e-05, "loss": 0.1113, "step": 19544 }, { "epoch": 2.317680540732835, "grad_norm": 0.7639304983171717, "learning_rate": 1.982177528697216e-05, "loss": 0.0995, "step": 19545 }, { "epoch": 2.3177991224949603, "grad_norm": 0.6340470964965855, "learning_rate": 1.981942700480004e-05, "loss": 0.0734, "step": 19546 }, { "epoch": 2.317917704257085, "grad_norm": 0.7944096592372047, "learning_rate": 1.981707877038646e-05, "loss": 0.1063, "step": 19547 }, { "epoch": 2.3180362860192103, "grad_norm": 0.6045125509221941, "learning_rate": 1.9814730583753058e-05, "loss": 0.0722, "step": 19548 }, { "epoch": 2.318154867781335, "grad_norm": 0.853854824177437, "learning_rate": 1.9812382444921482e-05, "loss": 0.0878, "step": 19549 }, { "epoch": 2.3182734495434603, "grad_norm": 0.5882713606520377, "learning_rate": 1.981003435391337e-05, "loss": 0.0833, "step": 19550 }, { "epoch": 2.318392031305585, "grad_norm": 0.6006016191154062, "learning_rate": 1.9807686310750383e-05, "loss": 0.0914, "step": 19551 }, { "epoch": 2.3185106130677102, "grad_norm": 0.6351846623092647, "learning_rate": 1.9805338315454157e-05, "loss": 0.0894, "step": 19552 }, { "epoch": 2.318629194829835, "grad_norm": 0.6013585188866459, "learning_rate": 1.980299036804635e-05, "loss": 0.0671, "step": 19553 }, { "epoch": 2.31874777659196, "grad_norm": 0.7852954857359877, "learning_rate": 1.9800642468548587e-05, "loss": 0.1047, "step": 19554 }, { "epoch": 2.318866358354085, "grad_norm": 0.9450697516809748, "learning_rate": 1.9798294616982533e-05, "loss": 0.106, "step": 19555 }, { "epoch": 2.31898494011621, "grad_norm": 0.8812428539388657, "learning_rate": 1.9795946813369826e-05, "loss": 0.1348, "step": 19556 }, { "epoch": 2.3191035218783353, "grad_norm": 0.9691708442322058, "learning_rate": 1.9793599057732102e-05, "loss": 0.1502, "step": 19557 }, { "epoch": 2.31922210364046, "grad_norm": 0.9950858566419218, "learning_rate": 1.979125135009101e-05, "loss": 0.1181, "step": 19558 }, { "epoch": 2.319340685402585, "grad_norm": 0.7778314961196723, "learning_rate": 1.97889036904682e-05, "loss": 0.1314, "step": 19559 }, { "epoch": 2.31945926716471, "grad_norm": 1.0055452006417973, "learning_rate": 1.9786556078885297e-05, "loss": 0.1705, "step": 19560 }, { "epoch": 2.3195778489268353, "grad_norm": 0.541504628145367, "learning_rate": 1.9784208515363957e-05, "loss": 0.0793, "step": 19561 }, { "epoch": 2.31969643068896, "grad_norm": 0.6604952245766792, "learning_rate": 1.9781860999925818e-05, "loss": 0.1047, "step": 19562 }, { "epoch": 2.319815012451085, "grad_norm": 0.6218952823831445, "learning_rate": 1.9779513532592526e-05, "loss": 0.0838, "step": 19563 }, { "epoch": 2.31993359421321, "grad_norm": 0.6726587400585863, "learning_rate": 1.9777166113385722e-05, "loss": 0.0897, "step": 19564 }, { "epoch": 2.320052175975335, "grad_norm": 0.7331279651465841, "learning_rate": 1.977481874232703e-05, "loss": 0.1069, "step": 19565 }, { "epoch": 2.32017075773746, "grad_norm": 0.7522635274520322, "learning_rate": 1.977247141943811e-05, "loss": 0.103, "step": 19566 }, { "epoch": 2.320289339499585, "grad_norm": 1.0909727646435528, "learning_rate": 1.977012414474059e-05, "loss": 0.1239, "step": 19567 }, { "epoch": 2.32040792126171, "grad_norm": 0.9385352698033184, "learning_rate": 1.976777691825612e-05, "loss": 0.1139, "step": 19568 }, { "epoch": 2.320526503023835, "grad_norm": 0.6316216311924798, "learning_rate": 1.9765429740006314e-05, "loss": 0.0795, "step": 19569 }, { "epoch": 2.32064508478596, "grad_norm": 0.7070766735745233, "learning_rate": 1.976308261001284e-05, "loss": 0.1063, "step": 19570 }, { "epoch": 2.320763666548085, "grad_norm": 0.651622763099539, "learning_rate": 1.9760735528297317e-05, "loss": 0.0878, "step": 19571 }, { "epoch": 2.32088224831021, "grad_norm": 0.7984324501934332, "learning_rate": 1.9758388494881392e-05, "loss": 0.1148, "step": 19572 }, { "epoch": 2.321000830072335, "grad_norm": 0.8143879075953584, "learning_rate": 1.975604150978669e-05, "loss": 0.1058, "step": 19573 }, { "epoch": 2.32111941183446, "grad_norm": 0.6316095463962412, "learning_rate": 1.975369457303486e-05, "loss": 0.0737, "step": 19574 }, { "epoch": 2.321237993596585, "grad_norm": 0.6648258564808079, "learning_rate": 1.9751347684647533e-05, "loss": 0.075, "step": 19575 }, { "epoch": 2.3213565753587098, "grad_norm": 0.618418146743983, "learning_rate": 1.9749000844646337e-05, "loss": 0.1091, "step": 19576 }, { "epoch": 2.321475157120835, "grad_norm": 0.5645326368080992, "learning_rate": 1.9746654053052922e-05, "loss": 0.0663, "step": 19577 }, { "epoch": 2.3215937388829597, "grad_norm": 0.9819120024066299, "learning_rate": 1.9744307309888914e-05, "loss": 0.1465, "step": 19578 }, { "epoch": 2.321712320645085, "grad_norm": 0.6507165978905625, "learning_rate": 1.9741960615175952e-05, "loss": 0.1037, "step": 19579 }, { "epoch": 2.3218309024072097, "grad_norm": 0.5382783820634356, "learning_rate": 1.973961396893565e-05, "loss": 0.0815, "step": 19580 }, { "epoch": 2.321949484169335, "grad_norm": 0.6600320733273624, "learning_rate": 1.9737267371189673e-05, "loss": 0.0713, "step": 19581 }, { "epoch": 2.3220680659314596, "grad_norm": 0.9607757608438607, "learning_rate": 1.9734920821959625e-05, "loss": 0.1592, "step": 19582 }, { "epoch": 2.322186647693585, "grad_norm": 0.8226844498073277, "learning_rate": 1.973257432126716e-05, "loss": 0.1218, "step": 19583 }, { "epoch": 2.3223052294557096, "grad_norm": 0.9862022264668764, "learning_rate": 1.9730227869133887e-05, "loss": 0.1227, "step": 19584 }, { "epoch": 2.322423811217835, "grad_norm": 0.4100522886855501, "learning_rate": 1.9727881465581464e-05, "loss": 0.0637, "step": 19585 }, { "epoch": 2.3225423929799596, "grad_norm": 0.6752619136160796, "learning_rate": 1.9725535110631507e-05, "loss": 0.115, "step": 19586 }, { "epoch": 2.3226609747420848, "grad_norm": 0.8271875797107203, "learning_rate": 1.972318880430565e-05, "loss": 0.1032, "step": 19587 }, { "epoch": 2.3227795565042095, "grad_norm": 0.44179272657253565, "learning_rate": 1.972084254662551e-05, "loss": 0.0731, "step": 19588 }, { "epoch": 2.3228981382663347, "grad_norm": 0.6720423138232527, "learning_rate": 1.9718496337612737e-05, "loss": 0.0864, "step": 19589 }, { "epoch": 2.3230167200284595, "grad_norm": 0.6489550988701074, "learning_rate": 1.971615017728895e-05, "loss": 0.0856, "step": 19590 }, { "epoch": 2.3231353017905847, "grad_norm": 0.754656323958397, "learning_rate": 1.9713804065675775e-05, "loss": 0.0964, "step": 19591 }, { "epoch": 2.3232538835527095, "grad_norm": 0.6024092987410897, "learning_rate": 1.9711458002794844e-05, "loss": 0.098, "step": 19592 }, { "epoch": 2.3233724653148347, "grad_norm": 0.9505005202994333, "learning_rate": 1.9709111988667794e-05, "loss": 0.1371, "step": 19593 }, { "epoch": 2.3234910470769594, "grad_norm": 0.5984828937950394, "learning_rate": 1.9706766023316238e-05, "loss": 0.0894, "step": 19594 }, { "epoch": 2.3236096288390846, "grad_norm": 1.201781102426866, "learning_rate": 1.9704420106761803e-05, "loss": 0.1407, "step": 19595 }, { "epoch": 2.3237282106012094, "grad_norm": 0.5586530100470325, "learning_rate": 1.970207423902613e-05, "loss": 0.0962, "step": 19596 }, { "epoch": 2.3238467923633346, "grad_norm": 0.8388997351133879, "learning_rate": 1.9699728420130833e-05, "loss": 0.1178, "step": 19597 }, { "epoch": 2.3239653741254593, "grad_norm": 0.6799894863401414, "learning_rate": 1.9697382650097543e-05, "loss": 0.0713, "step": 19598 }, { "epoch": 2.3240839558875845, "grad_norm": 0.734462881266322, "learning_rate": 1.9695036928947873e-05, "loss": 0.109, "step": 19599 }, { "epoch": 2.3242025376497093, "grad_norm": 0.8916128202521879, "learning_rate": 1.9692691256703463e-05, "loss": 0.1293, "step": 19600 }, { "epoch": 2.3243211194118345, "grad_norm": 0.5509371283389083, "learning_rate": 1.969034563338593e-05, "loss": 0.0956, "step": 19601 }, { "epoch": 2.3244397011739593, "grad_norm": 0.691293193765222, "learning_rate": 1.9688000059016904e-05, "loss": 0.0867, "step": 19602 }, { "epoch": 2.3245582829360845, "grad_norm": 0.5227752425732899, "learning_rate": 1.968565453361799e-05, "loss": 0.0781, "step": 19603 }, { "epoch": 2.3246768646982092, "grad_norm": 0.7314106253762587, "learning_rate": 1.9683309057210838e-05, "loss": 0.0911, "step": 19604 }, { "epoch": 2.3247954464603344, "grad_norm": 0.7422039010714777, "learning_rate": 1.9680963629817056e-05, "loss": 0.088, "step": 19605 }, { "epoch": 2.3249140282224596, "grad_norm": 0.5386597718884868, "learning_rate": 1.9678618251458255e-05, "loss": 0.08, "step": 19606 }, { "epoch": 2.3250326099845844, "grad_norm": 0.9260304925510692, "learning_rate": 1.967627292215608e-05, "loss": 0.1453, "step": 19607 }, { "epoch": 2.325151191746709, "grad_norm": 0.6727558626817153, "learning_rate": 1.9673927641932137e-05, "loss": 0.0932, "step": 19608 }, { "epoch": 2.3252697735088343, "grad_norm": 0.8060927853960526, "learning_rate": 1.9671582410808056e-05, "loss": 0.1048, "step": 19609 }, { "epoch": 2.3253883552709596, "grad_norm": 0.7238883464192284, "learning_rate": 1.9669237228805438e-05, "loss": 0.082, "step": 19610 }, { "epoch": 2.3255069370330843, "grad_norm": 0.7003090385631497, "learning_rate": 1.9666892095945924e-05, "loss": 0.083, "step": 19611 }, { "epoch": 2.325625518795209, "grad_norm": 0.8737299823262829, "learning_rate": 1.9664547012251122e-05, "loss": 0.1111, "step": 19612 }, { "epoch": 2.3257441005573343, "grad_norm": 0.7012433779526921, "learning_rate": 1.966220197774266e-05, "loss": 0.114, "step": 19613 }, { "epoch": 2.3258626823194595, "grad_norm": 0.7569027016007822, "learning_rate": 1.9659856992442135e-05, "loss": 0.0978, "step": 19614 }, { "epoch": 2.3259812640815842, "grad_norm": 0.9514026626406528, "learning_rate": 1.9657512056371193e-05, "loss": 0.1157, "step": 19615 }, { "epoch": 2.326099845843709, "grad_norm": 1.2791249210001723, "learning_rate": 1.9655167169551443e-05, "loss": 0.1262, "step": 19616 }, { "epoch": 2.326218427605834, "grad_norm": 0.8003484174904755, "learning_rate": 1.965282233200449e-05, "loss": 0.0937, "step": 19617 }, { "epoch": 2.3263370093679594, "grad_norm": 1.0696019656946385, "learning_rate": 1.965047754375195e-05, "loss": 0.123, "step": 19618 }, { "epoch": 2.326455591130084, "grad_norm": 0.465430307793162, "learning_rate": 1.964813280481546e-05, "loss": 0.0748, "step": 19619 }, { "epoch": 2.3265741728922094, "grad_norm": 0.5700660865085883, "learning_rate": 1.9645788115216614e-05, "loss": 0.0931, "step": 19620 }, { "epoch": 2.326692754654334, "grad_norm": 0.6136573610865601, "learning_rate": 1.964344347497704e-05, "loss": 0.0935, "step": 19621 }, { "epoch": 2.3268113364164593, "grad_norm": 0.623024883853895, "learning_rate": 1.9641098884118345e-05, "loss": 0.0765, "step": 19622 }, { "epoch": 2.326929918178584, "grad_norm": 0.8577286996179616, "learning_rate": 1.963875434266215e-05, "loss": 0.1103, "step": 19623 }, { "epoch": 2.3270484999407093, "grad_norm": 0.7425164041624043, "learning_rate": 1.963640985063007e-05, "loss": 0.1037, "step": 19624 }, { "epoch": 2.327167081702834, "grad_norm": 0.6984518078082514, "learning_rate": 1.9634065408043706e-05, "loss": 0.095, "step": 19625 }, { "epoch": 2.3272856634649592, "grad_norm": 0.7388524919515016, "learning_rate": 1.9631721014924684e-05, "loss": 0.11, "step": 19626 }, { "epoch": 2.327404245227084, "grad_norm": 0.6444896473936335, "learning_rate": 1.9629376671294614e-05, "loss": 0.0849, "step": 19627 }, { "epoch": 2.327522826989209, "grad_norm": 0.9125596871168674, "learning_rate": 1.9627032377175102e-05, "loss": 0.127, "step": 19628 }, { "epoch": 2.327641408751334, "grad_norm": 0.6091693328749415, "learning_rate": 1.9624688132587754e-05, "loss": 0.0737, "step": 19629 }, { "epoch": 2.327759990513459, "grad_norm": 0.4682559415356481, "learning_rate": 1.96223439375542e-05, "loss": 0.061, "step": 19630 }, { "epoch": 2.327878572275584, "grad_norm": 0.7655616942254497, "learning_rate": 1.9619999792096036e-05, "loss": 0.0879, "step": 19631 }, { "epoch": 2.327997154037709, "grad_norm": 0.6616630555046017, "learning_rate": 1.9617655696234876e-05, "loss": 0.107, "step": 19632 }, { "epoch": 2.328115735799834, "grad_norm": 1.103396690661148, "learning_rate": 1.961531164999233e-05, "loss": 0.2043, "step": 19633 }, { "epoch": 2.328234317561959, "grad_norm": 0.7656191004064901, "learning_rate": 1.9612967653390014e-05, "loss": 0.0997, "step": 19634 }, { "epoch": 2.328352899324084, "grad_norm": 0.7725198295403711, "learning_rate": 1.9610623706449528e-05, "loss": 0.1036, "step": 19635 }, { "epoch": 2.328471481086209, "grad_norm": 0.6879006644903779, "learning_rate": 1.9608279809192475e-05, "loss": 0.0698, "step": 19636 }, { "epoch": 2.328590062848334, "grad_norm": 0.6136471258780314, "learning_rate": 1.9605935961640476e-05, "loss": 0.0834, "step": 19637 }, { "epoch": 2.328708644610459, "grad_norm": 0.751937081289528, "learning_rate": 1.9603592163815137e-05, "loss": 0.1065, "step": 19638 }, { "epoch": 2.3288272263725838, "grad_norm": 0.5815452714196307, "learning_rate": 1.9601248415738054e-05, "loss": 0.0944, "step": 19639 }, { "epoch": 2.328945808134709, "grad_norm": 0.9153912070454847, "learning_rate": 1.9598904717430842e-05, "loss": 0.1436, "step": 19640 }, { "epoch": 2.3290643898968337, "grad_norm": 0.7490436463238564, "learning_rate": 1.9596561068915103e-05, "loss": 0.1169, "step": 19641 }, { "epoch": 2.329182971658959, "grad_norm": 0.716307560085018, "learning_rate": 1.959421747021245e-05, "loss": 0.0911, "step": 19642 }, { "epoch": 2.3293015534210837, "grad_norm": 0.5933023454175723, "learning_rate": 1.9591873921344483e-05, "loss": 0.084, "step": 19643 }, { "epoch": 2.329420135183209, "grad_norm": 0.7205943000590765, "learning_rate": 1.9589530422332797e-05, "loss": 0.1094, "step": 19644 }, { "epoch": 2.3295387169453337, "grad_norm": 0.6682982771411663, "learning_rate": 1.958718697319902e-05, "loss": 0.1025, "step": 19645 }, { "epoch": 2.329657298707459, "grad_norm": 0.9048478487967007, "learning_rate": 1.9584843573964735e-05, "loss": 0.1116, "step": 19646 }, { "epoch": 2.3297758804695836, "grad_norm": 0.7393912062264846, "learning_rate": 1.9582500224651543e-05, "loss": 0.1067, "step": 19647 }, { "epoch": 2.329894462231709, "grad_norm": 0.8093916872243289, "learning_rate": 1.9580156925281067e-05, "loss": 0.1233, "step": 19648 }, { "epoch": 2.3300130439938336, "grad_norm": 0.4508682903165856, "learning_rate": 1.9577813675874897e-05, "loss": 0.0592, "step": 19649 }, { "epoch": 2.330131625755959, "grad_norm": 0.8673545628162307, "learning_rate": 1.9575470476454634e-05, "loss": 0.0999, "step": 19650 }, { "epoch": 2.3302502075180835, "grad_norm": 0.6487028212588282, "learning_rate": 1.9573127327041875e-05, "loss": 0.0951, "step": 19651 }, { "epoch": 2.3303687892802087, "grad_norm": 0.8081895582119544, "learning_rate": 1.9570784227658233e-05, "loss": 0.1246, "step": 19652 }, { "epoch": 2.3304873710423335, "grad_norm": 0.8685687986685234, "learning_rate": 1.9568441178325304e-05, "loss": 0.0946, "step": 19653 }, { "epoch": 2.3306059528044587, "grad_norm": 1.308034299292033, "learning_rate": 1.9566098179064686e-05, "loss": 0.1625, "step": 19654 }, { "epoch": 2.330724534566584, "grad_norm": 0.5762780804481167, "learning_rate": 1.9563755229897973e-05, "loss": 0.0978, "step": 19655 }, { "epoch": 2.3308431163287087, "grad_norm": 0.6615979346505892, "learning_rate": 1.9561412330846778e-05, "loss": 0.0827, "step": 19656 }, { "epoch": 2.3309616980908334, "grad_norm": 0.8439458520280403, "learning_rate": 1.9559069481932696e-05, "loss": 0.1164, "step": 19657 }, { "epoch": 2.3310802798529586, "grad_norm": 0.6022588170968688, "learning_rate": 1.9556726683177317e-05, "loss": 0.0834, "step": 19658 }, { "epoch": 2.331198861615084, "grad_norm": 0.7771491188197321, "learning_rate": 1.9554383934602233e-05, "loss": 0.0942, "step": 19659 }, { "epoch": 2.3313174433772086, "grad_norm": 0.661049339849892, "learning_rate": 1.9552041236229063e-05, "loss": 0.0868, "step": 19660 }, { "epoch": 2.3314360251393333, "grad_norm": 0.7660036541731323, "learning_rate": 1.9549698588079385e-05, "loss": 0.0995, "step": 19661 }, { "epoch": 2.3315546069014585, "grad_norm": 0.546759546632222, "learning_rate": 1.9547355990174802e-05, "loss": 0.0888, "step": 19662 }, { "epoch": 2.3316731886635838, "grad_norm": 0.6276902736150629, "learning_rate": 1.9545013442536912e-05, "loss": 0.0668, "step": 19663 }, { "epoch": 2.3317917704257085, "grad_norm": 0.5222905331930124, "learning_rate": 1.9542670945187318e-05, "loss": 0.0673, "step": 19664 }, { "epoch": 2.3319103521878333, "grad_norm": 0.6095844730792853, "learning_rate": 1.95403284981476e-05, "loss": 0.0756, "step": 19665 }, { "epoch": 2.3320289339499585, "grad_norm": 0.8351580165020239, "learning_rate": 1.953798610143935e-05, "loss": 0.1133, "step": 19666 }, { "epoch": 2.3321475157120837, "grad_norm": 0.9468603894207163, "learning_rate": 1.953564375508418e-05, "loss": 0.1199, "step": 19667 }, { "epoch": 2.3322660974742084, "grad_norm": 0.7985714177422902, "learning_rate": 1.9533301459103676e-05, "loss": 0.1079, "step": 19668 }, { "epoch": 2.3323846792363336, "grad_norm": 0.9678303791845877, "learning_rate": 1.953095921351942e-05, "loss": 0.1162, "step": 19669 }, { "epoch": 2.3325032609984584, "grad_norm": 0.5527903826354482, "learning_rate": 1.9528617018353016e-05, "loss": 0.0737, "step": 19670 }, { "epoch": 2.3326218427605836, "grad_norm": 1.062955385549872, "learning_rate": 1.952627487362605e-05, "loss": 0.1211, "step": 19671 }, { "epoch": 2.3327404245227084, "grad_norm": 0.6674053674517046, "learning_rate": 1.9523932779360126e-05, "loss": 0.099, "step": 19672 }, { "epoch": 2.3328590062848336, "grad_norm": 0.9679840305458146, "learning_rate": 1.9521590735576824e-05, "loss": 0.1375, "step": 19673 }, { "epoch": 2.3329775880469583, "grad_norm": 0.7046136398564593, "learning_rate": 1.951924874229773e-05, "loss": 0.0792, "step": 19674 }, { "epoch": 2.3330961698090835, "grad_norm": 0.6265409137721989, "learning_rate": 1.951690679954445e-05, "loss": 0.1119, "step": 19675 }, { "epoch": 2.3332147515712083, "grad_norm": 0.7679439433878709, "learning_rate": 1.951456490733857e-05, "loss": 0.0928, "step": 19676 }, { "epoch": 2.3333333333333335, "grad_norm": 0.6046103274596862, "learning_rate": 1.9512223065701658e-05, "loss": 0.1045, "step": 19677 }, { "epoch": 2.3334519150954582, "grad_norm": 0.5452435691395431, "learning_rate": 1.9509881274655335e-05, "loss": 0.0649, "step": 19678 }, { "epoch": 2.3335704968575834, "grad_norm": 0.7194761730955362, "learning_rate": 1.950753953422117e-05, "loss": 0.1072, "step": 19679 }, { "epoch": 2.333689078619708, "grad_norm": 0.7047804862838191, "learning_rate": 1.950519784442075e-05, "loss": 0.0791, "step": 19680 }, { "epoch": 2.3338076603818334, "grad_norm": 0.6057165150684416, "learning_rate": 1.9502856205275672e-05, "loss": 0.0824, "step": 19681 }, { "epoch": 2.333926242143958, "grad_norm": 0.6418007828358009, "learning_rate": 1.9500514616807518e-05, "loss": 0.0741, "step": 19682 }, { "epoch": 2.3340448239060834, "grad_norm": 0.5978403637457337, "learning_rate": 1.9498173079037876e-05, "loss": 0.076, "step": 19683 }, { "epoch": 2.334163405668208, "grad_norm": 0.5283480307554714, "learning_rate": 1.9495831591988334e-05, "loss": 0.0744, "step": 19684 }, { "epoch": 2.3342819874303333, "grad_norm": 0.9003501111134286, "learning_rate": 1.9493490155680466e-05, "loss": 0.1163, "step": 19685 }, { "epoch": 2.334400569192458, "grad_norm": 0.5050102995885705, "learning_rate": 1.9491148770135876e-05, "loss": 0.0671, "step": 19686 }, { "epoch": 2.3345191509545833, "grad_norm": 0.6631994156165885, "learning_rate": 1.948880743537614e-05, "loss": 0.0883, "step": 19687 }, { "epoch": 2.334637732716708, "grad_norm": 0.9061859891115267, "learning_rate": 1.9486466151422834e-05, "loss": 0.1279, "step": 19688 }, { "epoch": 2.3347563144788333, "grad_norm": 0.5468868359407588, "learning_rate": 1.9484124918297547e-05, "loss": 0.0733, "step": 19689 }, { "epoch": 2.334874896240958, "grad_norm": 0.8104339970184976, "learning_rate": 1.948178373602187e-05, "loss": 0.1272, "step": 19690 }, { "epoch": 2.334993478003083, "grad_norm": 0.6025015903654741, "learning_rate": 1.9479442604617383e-05, "loss": 0.0749, "step": 19691 }, { "epoch": 2.335112059765208, "grad_norm": 0.5440059701386678, "learning_rate": 1.947710152410565e-05, "loss": 0.079, "step": 19692 }, { "epoch": 2.335230641527333, "grad_norm": 0.7751655072712493, "learning_rate": 1.9474760494508286e-05, "loss": 0.1283, "step": 19693 }, { "epoch": 2.335349223289458, "grad_norm": 0.7328838140024185, "learning_rate": 1.947241951584685e-05, "loss": 0.1161, "step": 19694 }, { "epoch": 2.335467805051583, "grad_norm": 0.7167600048414362, "learning_rate": 1.947007858814293e-05, "loss": 0.1063, "step": 19695 }, { "epoch": 2.335586386813708, "grad_norm": 0.7071499132477514, "learning_rate": 1.9467737711418095e-05, "loss": 0.0953, "step": 19696 }, { "epoch": 2.335704968575833, "grad_norm": 0.6963646487515647, "learning_rate": 1.946539688569394e-05, "loss": 0.0826, "step": 19697 }, { "epoch": 2.335823550337958, "grad_norm": 0.7230030399307877, "learning_rate": 1.9463056110992043e-05, "loss": 0.1187, "step": 19698 }, { "epoch": 2.335942132100083, "grad_norm": 0.9302146387773629, "learning_rate": 1.9460715387333973e-05, "loss": 0.1226, "step": 19699 }, { "epoch": 2.336060713862208, "grad_norm": 0.8262518044903985, "learning_rate": 1.945837471474131e-05, "loss": 0.0937, "step": 19700 }, { "epoch": 2.336179295624333, "grad_norm": 0.885883779918424, "learning_rate": 1.945603409323564e-05, "loss": 0.0853, "step": 19701 }, { "epoch": 2.3362978773864578, "grad_norm": 0.8798300985808893, "learning_rate": 1.945369352283854e-05, "loss": 0.0969, "step": 19702 }, { "epoch": 2.336416459148583, "grad_norm": 0.852929187049192, "learning_rate": 1.945135300357159e-05, "loss": 0.1103, "step": 19703 }, { "epoch": 2.3365350409107077, "grad_norm": 0.6397432125221462, "learning_rate": 1.9449012535456346e-05, "loss": 0.0923, "step": 19704 }, { "epoch": 2.336653622672833, "grad_norm": 0.7438897731870475, "learning_rate": 1.9446672118514415e-05, "loss": 0.0993, "step": 19705 }, { "epoch": 2.3367722044349577, "grad_norm": 0.45016352253737263, "learning_rate": 1.944433175276735e-05, "loss": 0.0681, "step": 19706 }, { "epoch": 2.336890786197083, "grad_norm": 0.6005121500929034, "learning_rate": 1.9441991438236727e-05, "loss": 0.0969, "step": 19707 }, { "epoch": 2.337009367959208, "grad_norm": 0.4964019039508338, "learning_rate": 1.9439651174944137e-05, "loss": 0.0706, "step": 19708 }, { "epoch": 2.337127949721333, "grad_norm": 0.9093817777558677, "learning_rate": 1.9437310962911146e-05, "loss": 0.1329, "step": 19709 }, { "epoch": 2.3372465314834576, "grad_norm": 1.19056677509281, "learning_rate": 1.9434970802159318e-05, "loss": 0.1382, "step": 19710 }, { "epoch": 2.337365113245583, "grad_norm": 0.6385920399928435, "learning_rate": 1.943263069271024e-05, "loss": 0.0883, "step": 19711 }, { "epoch": 2.337483695007708, "grad_norm": 0.9542566440145908, "learning_rate": 1.9430290634585475e-05, "loss": 0.1387, "step": 19712 }, { "epoch": 2.337602276769833, "grad_norm": 1.0145455332029072, "learning_rate": 1.9427950627806608e-05, "loss": 0.128, "step": 19713 }, { "epoch": 2.3377208585319575, "grad_norm": 0.6339858608827293, "learning_rate": 1.94256106723952e-05, "loss": 0.0784, "step": 19714 }, { "epoch": 2.3378394402940827, "grad_norm": 0.6498993438421286, "learning_rate": 1.942327076837282e-05, "loss": 0.105, "step": 19715 }, { "epoch": 2.337958022056208, "grad_norm": 0.7012573692346179, "learning_rate": 1.9420930915761053e-05, "loss": 0.1016, "step": 19716 }, { "epoch": 2.3380766038183327, "grad_norm": 0.5535169045110334, "learning_rate": 1.941859111458146e-05, "loss": 0.0691, "step": 19717 }, { "epoch": 2.338195185580458, "grad_norm": 0.8002325088268389, "learning_rate": 1.94162513648556e-05, "loss": 0.1378, "step": 19718 }, { "epoch": 2.3383137673425827, "grad_norm": 0.6086876027604352, "learning_rate": 1.9413911666605074e-05, "loss": 0.0844, "step": 19719 }, { "epoch": 2.338432349104708, "grad_norm": 0.7849635112057574, "learning_rate": 1.941157201985142e-05, "loss": 0.135, "step": 19720 }, { "epoch": 2.3385509308668326, "grad_norm": 0.932555263183837, "learning_rate": 1.9409232424616232e-05, "loss": 0.1545, "step": 19721 }, { "epoch": 2.338669512628958, "grad_norm": 0.8794876046105843, "learning_rate": 1.9406892880921045e-05, "loss": 0.1197, "step": 19722 }, { "epoch": 2.3387880943910826, "grad_norm": 0.5943152842185945, "learning_rate": 1.9404553388787464e-05, "loss": 0.0685, "step": 19723 }, { "epoch": 2.338906676153208, "grad_norm": 0.8238051825266502, "learning_rate": 1.9402213948237034e-05, "loss": 0.1166, "step": 19724 }, { "epoch": 2.3390252579153326, "grad_norm": 0.784311353500338, "learning_rate": 1.939987455929133e-05, "loss": 0.1021, "step": 19725 }, { "epoch": 2.3391438396774578, "grad_norm": 0.8048938194411933, "learning_rate": 1.9397535221971907e-05, "loss": 0.1095, "step": 19726 }, { "epoch": 2.3392624214395825, "grad_norm": 0.859953989584921, "learning_rate": 1.9395195936300347e-05, "loss": 0.1189, "step": 19727 }, { "epoch": 2.3393810032017077, "grad_norm": 0.5923798689844896, "learning_rate": 1.9392856702298207e-05, "loss": 0.0905, "step": 19728 }, { "epoch": 2.3394995849638325, "grad_norm": 0.5321311125332476, "learning_rate": 1.9390517519987048e-05, "loss": 0.0754, "step": 19729 }, { "epoch": 2.3396181667259577, "grad_norm": 0.6100689098342109, "learning_rate": 1.938817838938844e-05, "loss": 0.0929, "step": 19730 }, { "epoch": 2.3397367484880824, "grad_norm": 0.5591611123443301, "learning_rate": 1.9385839310523942e-05, "loss": 0.0638, "step": 19731 }, { "epoch": 2.3398553302502076, "grad_norm": 0.645484209315893, "learning_rate": 1.938350028341513e-05, "loss": 0.102, "step": 19732 }, { "epoch": 2.3399739120123324, "grad_norm": 0.7001111475243336, "learning_rate": 1.9381161308083545e-05, "loss": 0.0969, "step": 19733 }, { "epoch": 2.3400924937744576, "grad_norm": 0.5013442041598899, "learning_rate": 1.9378822384550774e-05, "loss": 0.0751, "step": 19734 }, { "epoch": 2.3402110755365824, "grad_norm": 0.8394260205917882, "learning_rate": 1.9376483512838368e-05, "loss": 0.1273, "step": 19735 }, { "epoch": 2.3403296572987076, "grad_norm": 0.5343040232702672, "learning_rate": 1.9374144692967885e-05, "loss": 0.0664, "step": 19736 }, { "epoch": 2.3404482390608323, "grad_norm": 0.5831301773840358, "learning_rate": 1.937180592496088e-05, "loss": 0.0967, "step": 19737 }, { "epoch": 2.3405668208229575, "grad_norm": 0.7213953099544432, "learning_rate": 1.9369467208838936e-05, "loss": 0.1004, "step": 19738 }, { "epoch": 2.3406854025850823, "grad_norm": 1.1032907756567942, "learning_rate": 1.9367128544623597e-05, "loss": 0.1707, "step": 19739 }, { "epoch": 2.3408039843472075, "grad_norm": 0.6261687788453586, "learning_rate": 1.936478993233642e-05, "loss": 0.0891, "step": 19740 }, { "epoch": 2.3409225661093322, "grad_norm": 0.644738856512835, "learning_rate": 1.936245137199897e-05, "loss": 0.0839, "step": 19741 }, { "epoch": 2.3410411478714575, "grad_norm": 0.7120428435143825, "learning_rate": 1.9360112863632808e-05, "loss": 0.0944, "step": 19742 }, { "epoch": 2.341159729633582, "grad_norm": 0.9127382456328282, "learning_rate": 1.9357774407259487e-05, "loss": 0.1404, "step": 19743 }, { "epoch": 2.3412783113957074, "grad_norm": 0.5518385402521992, "learning_rate": 1.9355436002900572e-05, "loss": 0.0633, "step": 19744 }, { "epoch": 2.341396893157832, "grad_norm": 0.609748355163441, "learning_rate": 1.9353097650577605e-05, "loss": 0.0817, "step": 19745 }, { "epoch": 2.3415154749199574, "grad_norm": 0.6471446153383712, "learning_rate": 1.9350759350312165e-05, "loss": 0.0804, "step": 19746 }, { "epoch": 2.341634056682082, "grad_norm": 0.813520647194993, "learning_rate": 1.9348421102125794e-05, "loss": 0.1242, "step": 19747 }, { "epoch": 2.3417526384442073, "grad_norm": 0.9546953262479119, "learning_rate": 1.934608290604004e-05, "loss": 0.1617, "step": 19748 }, { "epoch": 2.341871220206332, "grad_norm": 0.5825445619721515, "learning_rate": 1.9343744762076477e-05, "loss": 0.0672, "step": 19749 }, { "epoch": 2.3419898019684573, "grad_norm": 0.5514580577342542, "learning_rate": 1.9341406670256647e-05, "loss": 0.0748, "step": 19750 }, { "epoch": 2.342108383730582, "grad_norm": 0.547867145504884, "learning_rate": 1.9339068630602112e-05, "loss": 0.0883, "step": 19751 }, { "epoch": 2.3422269654927073, "grad_norm": 0.6029595041376784, "learning_rate": 1.9336730643134417e-05, "loss": 0.0713, "step": 19752 }, { "epoch": 2.342345547254832, "grad_norm": 0.5367020330942217, "learning_rate": 1.9334392707875128e-05, "loss": 0.0804, "step": 19753 }, { "epoch": 2.342464129016957, "grad_norm": 0.738647823957988, "learning_rate": 1.933205482484579e-05, "loss": 0.0923, "step": 19754 }, { "epoch": 2.342582710779082, "grad_norm": 0.662668803125056, "learning_rate": 1.932971699406796e-05, "loss": 0.0729, "step": 19755 }, { "epoch": 2.342701292541207, "grad_norm": 0.7973397700803287, "learning_rate": 1.932737921556317e-05, "loss": 0.0827, "step": 19756 }, { "epoch": 2.3428198743033324, "grad_norm": 0.6275708490014971, "learning_rate": 1.9325041489352998e-05, "loss": 0.0724, "step": 19757 }, { "epoch": 2.342938456065457, "grad_norm": 0.47540294258275795, "learning_rate": 1.932270381545899e-05, "loss": 0.0572, "step": 19758 }, { "epoch": 2.343057037827582, "grad_norm": 0.9093494813092565, "learning_rate": 1.932036619390268e-05, "loss": 0.1197, "step": 19759 }, { "epoch": 2.343175619589707, "grad_norm": 0.5195766936470755, "learning_rate": 1.931802862470563e-05, "loss": 0.0536, "step": 19760 }, { "epoch": 2.3432942013518323, "grad_norm": 0.9041600548697046, "learning_rate": 1.9315691107889393e-05, "loss": 0.1294, "step": 19761 }, { "epoch": 2.343412783113957, "grad_norm": 0.6909154171215438, "learning_rate": 1.9313353643475513e-05, "loss": 0.106, "step": 19762 }, { "epoch": 2.343531364876082, "grad_norm": 0.8259139049507424, "learning_rate": 1.9311016231485528e-05, "loss": 0.1373, "step": 19763 }, { "epoch": 2.343649946638207, "grad_norm": 0.945278727795582, "learning_rate": 1.930867887194101e-05, "loss": 0.1414, "step": 19764 }, { "epoch": 2.3437685284003322, "grad_norm": 0.6710708204029491, "learning_rate": 1.9306341564863495e-05, "loss": 0.0958, "step": 19765 }, { "epoch": 2.343887110162457, "grad_norm": 0.846903222593842, "learning_rate": 1.930400431027453e-05, "loss": 0.1085, "step": 19766 }, { "epoch": 2.344005691924582, "grad_norm": 0.7893763158241576, "learning_rate": 1.9301667108195647e-05, "loss": 0.0969, "step": 19767 }, { "epoch": 2.344124273686707, "grad_norm": 0.9502660844568719, "learning_rate": 1.9299329958648416e-05, "loss": 0.1233, "step": 19768 }, { "epoch": 2.344242855448832, "grad_norm": 0.7930684079201124, "learning_rate": 1.9296992861654365e-05, "loss": 0.1049, "step": 19769 }, { "epoch": 2.344361437210957, "grad_norm": 0.7117666331400104, "learning_rate": 1.9294655817235054e-05, "loss": 0.0942, "step": 19770 }, { "epoch": 2.344480018973082, "grad_norm": 0.5352042000066355, "learning_rate": 1.9292318825412012e-05, "loss": 0.067, "step": 19771 }, { "epoch": 2.344598600735207, "grad_norm": 0.732324123265562, "learning_rate": 1.92899818862068e-05, "loss": 0.1188, "step": 19772 }, { "epoch": 2.344717182497332, "grad_norm": 0.8942760579746863, "learning_rate": 1.928764499964095e-05, "loss": 0.0925, "step": 19773 }, { "epoch": 2.344835764259457, "grad_norm": 0.8730917955815471, "learning_rate": 1.928530816573601e-05, "loss": 0.1165, "step": 19774 }, { "epoch": 2.344954346021582, "grad_norm": 0.6454134179398539, "learning_rate": 1.928297138451351e-05, "loss": 0.0952, "step": 19775 }, { "epoch": 2.345072927783707, "grad_norm": 1.0236629590657687, "learning_rate": 1.9280634655995016e-05, "loss": 0.143, "step": 19776 }, { "epoch": 2.345191509545832, "grad_norm": 0.5561109857553563, "learning_rate": 1.9278297980202054e-05, "loss": 0.0645, "step": 19777 }, { "epoch": 2.3453100913079568, "grad_norm": 0.7143221421012023, "learning_rate": 1.9275961357156158e-05, "loss": 0.0915, "step": 19778 }, { "epoch": 2.345428673070082, "grad_norm": 0.7362716418379507, "learning_rate": 1.927362478687889e-05, "loss": 0.1066, "step": 19779 }, { "epoch": 2.3455472548322067, "grad_norm": 0.5862018638967907, "learning_rate": 1.9271288269391775e-05, "loss": 0.0812, "step": 19780 }, { "epoch": 2.345665836594332, "grad_norm": 0.759167104771286, "learning_rate": 1.926895180471636e-05, "loss": 0.0819, "step": 19781 }, { "epoch": 2.3457844183564567, "grad_norm": 0.6340702208384057, "learning_rate": 1.9266615392874176e-05, "loss": 0.0865, "step": 19782 }, { "epoch": 2.345903000118582, "grad_norm": 0.9414069799285542, "learning_rate": 1.9264279033886773e-05, "loss": 0.1086, "step": 19783 }, { "epoch": 2.3460215818807066, "grad_norm": 0.6111695038203748, "learning_rate": 1.9261942727775686e-05, "loss": 0.0773, "step": 19784 }, { "epoch": 2.346140163642832, "grad_norm": 0.7739393484556246, "learning_rate": 1.925960647456245e-05, "loss": 0.1039, "step": 19785 }, { "epoch": 2.3462587454049566, "grad_norm": 0.8277164227650491, "learning_rate": 1.9257270274268595e-05, "loss": 0.1195, "step": 19786 }, { "epoch": 2.346377327167082, "grad_norm": 0.7543015721018771, "learning_rate": 1.9254934126915674e-05, "loss": 0.1224, "step": 19787 }, { "epoch": 2.3464959089292066, "grad_norm": 0.6314558329946334, "learning_rate": 1.9252598032525212e-05, "loss": 0.0779, "step": 19788 }, { "epoch": 2.3466144906913318, "grad_norm": 0.5411889510724461, "learning_rate": 1.925026199111874e-05, "loss": 0.0761, "step": 19789 }, { "epoch": 2.3467330724534565, "grad_norm": 0.5929264616699161, "learning_rate": 1.9247926002717814e-05, "loss": 0.0798, "step": 19790 }, { "epoch": 2.3468516542155817, "grad_norm": 0.638449023004693, "learning_rate": 1.924559006734395e-05, "loss": 0.0724, "step": 19791 }, { "epoch": 2.3469702359777065, "grad_norm": 0.6722579479339007, "learning_rate": 1.9243254185018694e-05, "loss": 0.1076, "step": 19792 }, { "epoch": 2.3470888177398317, "grad_norm": 0.6275003425221145, "learning_rate": 1.9240918355763565e-05, "loss": 0.0822, "step": 19793 }, { "epoch": 2.3472073995019564, "grad_norm": 0.8466869530758483, "learning_rate": 1.923858257960012e-05, "loss": 0.1189, "step": 19794 }, { "epoch": 2.3473259812640817, "grad_norm": 0.6323141594548182, "learning_rate": 1.9236246856549874e-05, "loss": 0.0909, "step": 19795 }, { "epoch": 2.3474445630262064, "grad_norm": 0.8831156396962699, "learning_rate": 1.9233911186634364e-05, "loss": 0.1338, "step": 19796 }, { "epoch": 2.3475631447883316, "grad_norm": 0.5984376097872689, "learning_rate": 1.9231575569875116e-05, "loss": 0.0673, "step": 19797 }, { "epoch": 2.3476817265504564, "grad_norm": 0.9409244213173198, "learning_rate": 1.9229240006293673e-05, "loss": 0.1048, "step": 19798 }, { "epoch": 2.3478003083125816, "grad_norm": 0.7626834185464162, "learning_rate": 1.9226904495911558e-05, "loss": 0.1179, "step": 19799 }, { "epoch": 2.3479188900747063, "grad_norm": 0.6421182570507952, "learning_rate": 1.9224569038750308e-05, "loss": 0.0967, "step": 19800 }, { "epoch": 2.3480374718368315, "grad_norm": 0.8978157066620671, "learning_rate": 1.922223363483144e-05, "loss": 0.1261, "step": 19801 }, { "epoch": 2.3481560535989563, "grad_norm": 0.7233825952185878, "learning_rate": 1.92198982841765e-05, "loss": 0.094, "step": 19802 }, { "epoch": 2.3482746353610815, "grad_norm": 0.6737318182095349, "learning_rate": 1.9217562986807014e-05, "loss": 0.103, "step": 19803 }, { "epoch": 2.3483932171232063, "grad_norm": 0.7747020458422441, "learning_rate": 1.9215227742744495e-05, "loss": 0.0921, "step": 19804 }, { "epoch": 2.3485117988853315, "grad_norm": 1.1471690218904318, "learning_rate": 1.9212892552010493e-05, "loss": 0.1526, "step": 19805 }, { "epoch": 2.3486303806474567, "grad_norm": 1.0830756717501433, "learning_rate": 1.9210557414626522e-05, "loss": 0.1654, "step": 19806 }, { "epoch": 2.3487489624095814, "grad_norm": 0.7177847027961992, "learning_rate": 1.9208222330614117e-05, "loss": 0.1086, "step": 19807 }, { "epoch": 2.348867544171706, "grad_norm": 0.8701836263846132, "learning_rate": 1.9205887299994783e-05, "loss": 0.1142, "step": 19808 }, { "epoch": 2.3489861259338314, "grad_norm": 0.5826198252567217, "learning_rate": 1.920355232279008e-05, "loss": 0.0864, "step": 19809 }, { "epoch": 2.3491047076959566, "grad_norm": 0.6251157946987146, "learning_rate": 1.920121739902151e-05, "loss": 0.0827, "step": 19810 }, { "epoch": 2.3492232894580813, "grad_norm": 0.7922254825335069, "learning_rate": 1.9198882528710608e-05, "loss": 0.136, "step": 19811 }, { "epoch": 2.349341871220206, "grad_norm": 0.7256369643690842, "learning_rate": 1.9196547711878883e-05, "loss": 0.1194, "step": 19812 }, { "epoch": 2.3494604529823313, "grad_norm": 0.5793194977530085, "learning_rate": 1.9194212948547884e-05, "loss": 0.0879, "step": 19813 }, { "epoch": 2.3495790347444565, "grad_norm": 0.548692633728102, "learning_rate": 1.919187823873912e-05, "loss": 0.0775, "step": 19814 }, { "epoch": 2.3496976165065813, "grad_norm": 0.6754916291897857, "learning_rate": 1.9189543582474123e-05, "loss": 0.1035, "step": 19815 }, { "epoch": 2.349816198268706, "grad_norm": 0.7762238902797676, "learning_rate": 1.918720897977439e-05, "loss": 0.1012, "step": 19816 }, { "epoch": 2.3499347800308312, "grad_norm": 0.6663830316645823, "learning_rate": 1.918487443066148e-05, "loss": 0.0795, "step": 19817 }, { "epoch": 2.3500533617929564, "grad_norm": 0.7375851298626243, "learning_rate": 1.9182539935156884e-05, "loss": 0.1027, "step": 19818 }, { "epoch": 2.350171943555081, "grad_norm": 0.705449094417613, "learning_rate": 1.9180205493282143e-05, "loss": 0.0747, "step": 19819 }, { "epoch": 2.3502905253172064, "grad_norm": 0.8659654096583117, "learning_rate": 1.9177871105058766e-05, "loss": 0.1162, "step": 19820 }, { "epoch": 2.350409107079331, "grad_norm": 0.6892590392475275, "learning_rate": 1.9175536770508286e-05, "loss": 0.1073, "step": 19821 }, { "epoch": 2.3505276888414564, "grad_norm": 0.5399384648640292, "learning_rate": 1.917320248965221e-05, "loss": 0.0741, "step": 19822 }, { "epoch": 2.350646270603581, "grad_norm": 0.7297137154513963, "learning_rate": 1.9170868262512057e-05, "loss": 0.1, "step": 19823 }, { "epoch": 2.3507648523657063, "grad_norm": 0.6302455281561586, "learning_rate": 1.9168534089109358e-05, "loss": 0.086, "step": 19824 }, { "epoch": 2.350883434127831, "grad_norm": 0.8790209207206711, "learning_rate": 1.9166199969465627e-05, "loss": 0.1324, "step": 19825 }, { "epoch": 2.3510020158899563, "grad_norm": 0.7489139956891488, "learning_rate": 1.9163865903602374e-05, "loss": 0.0976, "step": 19826 }, { "epoch": 2.351120597652081, "grad_norm": 0.7562583080799781, "learning_rate": 1.916153189154111e-05, "loss": 0.1228, "step": 19827 }, { "epoch": 2.3512391794142062, "grad_norm": 0.8174630110142237, "learning_rate": 1.9159197933303374e-05, "loss": 0.1127, "step": 19828 }, { "epoch": 2.351357761176331, "grad_norm": 0.7082858625607088, "learning_rate": 1.9156864028910665e-05, "loss": 0.104, "step": 19829 }, { "epoch": 2.351476342938456, "grad_norm": 0.7274738513368273, "learning_rate": 1.915453017838451e-05, "loss": 0.1214, "step": 19830 }, { "epoch": 2.351594924700581, "grad_norm": 0.726896386416814, "learning_rate": 1.915219638174641e-05, "loss": 0.0984, "step": 19831 }, { "epoch": 2.351713506462706, "grad_norm": 0.631438451060847, "learning_rate": 1.9149862639017895e-05, "loss": 0.0825, "step": 19832 }, { "epoch": 2.351832088224831, "grad_norm": 0.6246939140584256, "learning_rate": 1.914752895022048e-05, "loss": 0.0885, "step": 19833 }, { "epoch": 2.351950669986956, "grad_norm": 0.7910329437392917, "learning_rate": 1.9145195315375652e-05, "loss": 0.0886, "step": 19834 }, { "epoch": 2.352069251749081, "grad_norm": 0.7895658861988475, "learning_rate": 1.9142861734504958e-05, "loss": 0.1256, "step": 19835 }, { "epoch": 2.352187833511206, "grad_norm": 1.054074322324468, "learning_rate": 1.9140528207629897e-05, "loss": 0.1358, "step": 19836 }, { "epoch": 2.352306415273331, "grad_norm": 0.8596446818299028, "learning_rate": 1.913819473477198e-05, "loss": 0.101, "step": 19837 }, { "epoch": 2.352424997035456, "grad_norm": 0.6682199950058394, "learning_rate": 1.913586131595271e-05, "loss": 0.1105, "step": 19838 }, { "epoch": 2.352543578797581, "grad_norm": 0.8712253936077597, "learning_rate": 1.9133527951193615e-05, "loss": 0.1157, "step": 19839 }, { "epoch": 2.352662160559706, "grad_norm": 1.3984441475866805, "learning_rate": 1.9131194640516192e-05, "loss": 0.2518, "step": 19840 }, { "epoch": 2.3527807423218308, "grad_norm": 0.6505983405250783, "learning_rate": 1.9128861383941965e-05, "loss": 0.0825, "step": 19841 }, { "epoch": 2.352899324083956, "grad_norm": 0.8278307276182898, "learning_rate": 1.9126528181492425e-05, "loss": 0.1069, "step": 19842 }, { "epoch": 2.3530179058460807, "grad_norm": 0.9723643652949461, "learning_rate": 1.9124195033189108e-05, "loss": 0.1197, "step": 19843 }, { "epoch": 2.353136487608206, "grad_norm": 1.2826229192036434, "learning_rate": 1.9121861939053503e-05, "loss": 0.1549, "step": 19844 }, { "epoch": 2.3532550693703307, "grad_norm": 0.5026035557759794, "learning_rate": 1.9119528899107122e-05, "loss": 0.0873, "step": 19845 }, { "epoch": 2.353373651132456, "grad_norm": 0.6775614818952257, "learning_rate": 1.9117195913371467e-05, "loss": 0.0801, "step": 19846 }, { "epoch": 2.3534922328945806, "grad_norm": 0.5644852513466332, "learning_rate": 1.9114862981868058e-05, "loss": 0.0859, "step": 19847 }, { "epoch": 2.353610814656706, "grad_norm": 0.5274703675187851, "learning_rate": 1.9112530104618394e-05, "loss": 0.066, "step": 19848 }, { "epoch": 2.3537293964188306, "grad_norm": 0.6975473551743911, "learning_rate": 1.9110197281643978e-05, "loss": 0.0922, "step": 19849 }, { "epoch": 2.353847978180956, "grad_norm": 0.9034120340807436, "learning_rate": 1.9107864512966323e-05, "loss": 0.1101, "step": 19850 }, { "epoch": 2.3539665599430806, "grad_norm": 0.5871157366074711, "learning_rate": 1.910553179860694e-05, "loss": 0.0643, "step": 19851 }, { "epoch": 2.3540851417052058, "grad_norm": 0.669264411511396, "learning_rate": 1.910319913858732e-05, "loss": 0.093, "step": 19852 }, { "epoch": 2.3542037234673305, "grad_norm": 0.5815072177925263, "learning_rate": 1.9100866532928965e-05, "loss": 0.0661, "step": 19853 }, { "epoch": 2.3543223052294557, "grad_norm": 0.9551699858858056, "learning_rate": 1.9098533981653395e-05, "loss": 0.1476, "step": 19854 }, { "epoch": 2.354440886991581, "grad_norm": 0.7399535789505438, "learning_rate": 1.9096201484782107e-05, "loss": 0.1143, "step": 19855 }, { "epoch": 2.3545594687537057, "grad_norm": 0.6501666067495723, "learning_rate": 1.9093869042336603e-05, "loss": 0.0966, "step": 19856 }, { "epoch": 2.3546780505158305, "grad_norm": 0.7693990446451955, "learning_rate": 1.909153665433837e-05, "loss": 0.1361, "step": 19857 }, { "epoch": 2.3547966322779557, "grad_norm": 0.7379651079016216, "learning_rate": 1.908920432080894e-05, "loss": 0.0992, "step": 19858 }, { "epoch": 2.354915214040081, "grad_norm": 0.8159564627233838, "learning_rate": 1.908687204176979e-05, "loss": 0.096, "step": 19859 }, { "epoch": 2.3550337958022056, "grad_norm": 0.6483743967761181, "learning_rate": 1.908453981724243e-05, "loss": 0.0905, "step": 19860 }, { "epoch": 2.3551523775643304, "grad_norm": 0.7336448548410783, "learning_rate": 1.9082207647248352e-05, "loss": 0.0902, "step": 19861 }, { "epoch": 2.3552709593264556, "grad_norm": 0.9597187172715144, "learning_rate": 1.9079875531809072e-05, "loss": 0.1036, "step": 19862 }, { "epoch": 2.355389541088581, "grad_norm": 0.8663429031859886, "learning_rate": 1.9077543470946076e-05, "loss": 0.1202, "step": 19863 }, { "epoch": 2.3555081228507055, "grad_norm": 0.8742438180109534, "learning_rate": 1.9075211464680862e-05, "loss": 0.1135, "step": 19864 }, { "epoch": 2.3556267046128303, "grad_norm": 1.0394826477405401, "learning_rate": 1.907287951303494e-05, "loss": 0.1149, "step": 19865 }, { "epoch": 2.3557452863749555, "grad_norm": 0.7646873060114979, "learning_rate": 1.9070547616029802e-05, "loss": 0.1123, "step": 19866 }, { "epoch": 2.3558638681370807, "grad_norm": 0.6409418979347721, "learning_rate": 1.9068215773686944e-05, "loss": 0.0842, "step": 19867 }, { "epoch": 2.3559824498992055, "grad_norm": 0.562437137355907, "learning_rate": 1.9065883986027852e-05, "loss": 0.0786, "step": 19868 }, { "epoch": 2.3561010316613307, "grad_norm": 0.6335742049633799, "learning_rate": 1.9063552253074044e-05, "loss": 0.078, "step": 19869 }, { "epoch": 2.3562196134234554, "grad_norm": 0.7030561559825409, "learning_rate": 1.9061220574846998e-05, "loss": 0.1105, "step": 19870 }, { "epoch": 2.3563381951855806, "grad_norm": 0.9221840403760874, "learning_rate": 1.905888895136822e-05, "loss": 0.1432, "step": 19871 }, { "epoch": 2.3564567769477054, "grad_norm": 0.7220588075103385, "learning_rate": 1.905655738265919e-05, "loss": 0.1082, "step": 19872 }, { "epoch": 2.3565753587098306, "grad_norm": 0.7144417130030801, "learning_rate": 1.9054225868741426e-05, "loss": 0.104, "step": 19873 }, { "epoch": 2.3566939404719554, "grad_norm": 0.623889755175121, "learning_rate": 1.9051894409636406e-05, "loss": 0.0789, "step": 19874 }, { "epoch": 2.3568125222340806, "grad_norm": 0.6617505360199163, "learning_rate": 1.9049563005365615e-05, "loss": 0.0692, "step": 19875 }, { "epoch": 2.3569311039962053, "grad_norm": 0.9281244400222286, "learning_rate": 1.904723165595057e-05, "loss": 0.1109, "step": 19876 }, { "epoch": 2.3570496857583305, "grad_norm": 0.5593013847280809, "learning_rate": 1.9044900361412744e-05, "loss": 0.0618, "step": 19877 }, { "epoch": 2.3571682675204553, "grad_norm": 0.7553037364350648, "learning_rate": 1.904256912177363e-05, "loss": 0.1101, "step": 19878 }, { "epoch": 2.3572868492825805, "grad_norm": 0.8935580767507609, "learning_rate": 1.9040237937054724e-05, "loss": 0.1257, "step": 19879 }, { "epoch": 2.3574054310447052, "grad_norm": 0.6175700671350106, "learning_rate": 1.9037906807277516e-05, "loss": 0.087, "step": 19880 }, { "epoch": 2.3575240128068304, "grad_norm": 0.586312170015034, "learning_rate": 1.90355757324635e-05, "loss": 0.0933, "step": 19881 }, { "epoch": 2.357642594568955, "grad_norm": 0.5806703324516157, "learning_rate": 1.9033244712634165e-05, "loss": 0.0781, "step": 19882 }, { "epoch": 2.3577611763310804, "grad_norm": 0.9564470972189384, "learning_rate": 1.9030913747810986e-05, "loss": 0.1083, "step": 19883 }, { "epoch": 2.357879758093205, "grad_norm": 0.7116920857010154, "learning_rate": 1.9028582838015472e-05, "loss": 0.0813, "step": 19884 }, { "epoch": 2.3579983398553304, "grad_norm": 1.0127890265485777, "learning_rate": 1.90262519832691e-05, "loss": 0.1051, "step": 19885 }, { "epoch": 2.358116921617455, "grad_norm": 0.7004707639386736, "learning_rate": 1.902392118359336e-05, "loss": 0.0866, "step": 19886 }, { "epoch": 2.3582355033795803, "grad_norm": 0.7386486494071466, "learning_rate": 1.902159043900973e-05, "loss": 0.109, "step": 19887 }, { "epoch": 2.358354085141705, "grad_norm": 0.9755352516217708, "learning_rate": 1.901925974953971e-05, "loss": 0.1321, "step": 19888 }, { "epoch": 2.3584726669038303, "grad_norm": 0.7269959368807667, "learning_rate": 1.9016929115204782e-05, "loss": 0.1041, "step": 19889 }, { "epoch": 2.358591248665955, "grad_norm": 1.124987155974015, "learning_rate": 1.901459853602643e-05, "loss": 0.1433, "step": 19890 }, { "epoch": 2.3587098304280802, "grad_norm": 0.7173994222167089, "learning_rate": 1.9012268012026137e-05, "loss": 0.0789, "step": 19891 }, { "epoch": 2.358828412190205, "grad_norm": 1.1139010944296883, "learning_rate": 1.9009937543225394e-05, "loss": 0.1503, "step": 19892 }, { "epoch": 2.35894699395233, "grad_norm": 0.8242291058444831, "learning_rate": 1.9007607129645684e-05, "loss": 0.1034, "step": 19893 }, { "epoch": 2.359065575714455, "grad_norm": 0.9676347375556623, "learning_rate": 1.9005276771308476e-05, "loss": 0.1114, "step": 19894 }, { "epoch": 2.35918415747658, "grad_norm": 0.9390613014127689, "learning_rate": 1.9002946468235276e-05, "loss": 0.1384, "step": 19895 }, { "epoch": 2.359302739238705, "grad_norm": 0.6495182431153013, "learning_rate": 1.9000616220447558e-05, "loss": 0.1042, "step": 19896 }, { "epoch": 2.35942132100083, "grad_norm": 0.7104690312019263, "learning_rate": 1.8998286027966794e-05, "loss": 0.0958, "step": 19897 }, { "epoch": 2.359539902762955, "grad_norm": 0.5783462146022365, "learning_rate": 1.8995955890814474e-05, "loss": 0.0882, "step": 19898 }, { "epoch": 2.35965848452508, "grad_norm": 0.6487935321917006, "learning_rate": 1.8993625809012077e-05, "loss": 0.0907, "step": 19899 }, { "epoch": 2.359777066287205, "grad_norm": 0.7661267038295545, "learning_rate": 1.899129578258109e-05, "loss": 0.0838, "step": 19900 }, { "epoch": 2.35989564804933, "grad_norm": 0.5843039470981641, "learning_rate": 1.8988965811542987e-05, "loss": 0.0601, "step": 19901 }, { "epoch": 2.360014229811455, "grad_norm": 0.5158724456391116, "learning_rate": 1.8986635895919238e-05, "loss": 0.0677, "step": 19902 }, { "epoch": 2.36013281157358, "grad_norm": 0.7526477058401673, "learning_rate": 1.8984306035731343e-05, "loss": 0.1034, "step": 19903 }, { "epoch": 2.360251393335705, "grad_norm": 0.6892706222282654, "learning_rate": 1.898197623100077e-05, "loss": 0.0905, "step": 19904 }, { "epoch": 2.36036997509783, "grad_norm": 0.7696994708635592, "learning_rate": 1.8979646481748982e-05, "loss": 0.0899, "step": 19905 }, { "epoch": 2.3604885568599547, "grad_norm": 0.9435518134021565, "learning_rate": 1.8977316787997484e-05, "loss": 0.107, "step": 19906 }, { "epoch": 2.36060713862208, "grad_norm": 0.7430218755818324, "learning_rate": 1.8974987149767742e-05, "loss": 0.1123, "step": 19907 }, { "epoch": 2.360725720384205, "grad_norm": 0.7339742647781969, "learning_rate": 1.8972657567081224e-05, "loss": 0.107, "step": 19908 }, { "epoch": 2.36084430214633, "grad_norm": 0.5741792078095238, "learning_rate": 1.897032803995941e-05, "loss": 0.0621, "step": 19909 }, { "epoch": 2.3609628839084547, "grad_norm": 0.7839605335493782, "learning_rate": 1.8967998568423777e-05, "loss": 0.0934, "step": 19910 }, { "epoch": 2.36108146567058, "grad_norm": 0.8353336038019893, "learning_rate": 1.8965669152495806e-05, "loss": 0.1314, "step": 19911 }, { "epoch": 2.361200047432705, "grad_norm": 0.45598454802048627, "learning_rate": 1.8963339792196968e-05, "loss": 0.0662, "step": 19912 }, { "epoch": 2.36131862919483, "grad_norm": 0.6740446257299825, "learning_rate": 1.8961010487548723e-05, "loss": 0.1068, "step": 19913 }, { "epoch": 2.3614372109569546, "grad_norm": 0.8494219236681743, "learning_rate": 1.8958681238572566e-05, "loss": 0.13, "step": 19914 }, { "epoch": 2.36155579271908, "grad_norm": 0.6728658091237214, "learning_rate": 1.8956352045289956e-05, "loss": 0.0863, "step": 19915 }, { "epoch": 2.361674374481205, "grad_norm": 0.610201917025246, "learning_rate": 1.8954022907722375e-05, "loss": 0.0855, "step": 19916 }, { "epoch": 2.3617929562433297, "grad_norm": 0.8165771099835846, "learning_rate": 1.8951693825891277e-05, "loss": 0.1132, "step": 19917 }, { "epoch": 2.361911538005455, "grad_norm": 0.8269950070467138, "learning_rate": 1.8949364799818152e-05, "loss": 0.1029, "step": 19918 }, { "epoch": 2.3620301197675797, "grad_norm": 0.8217942785014746, "learning_rate": 1.894703582952446e-05, "loss": 0.0843, "step": 19919 }, { "epoch": 2.362148701529705, "grad_norm": 0.4771207143828343, "learning_rate": 1.8944706915031673e-05, "loss": 0.0779, "step": 19920 }, { "epoch": 2.3622672832918297, "grad_norm": 0.6878069183292139, "learning_rate": 1.8942378056361267e-05, "loss": 0.0894, "step": 19921 }, { "epoch": 2.362385865053955, "grad_norm": 0.7508953956717498, "learning_rate": 1.894004925353471e-05, "loss": 0.1135, "step": 19922 }, { "epoch": 2.3625044468160796, "grad_norm": 0.7423991021993497, "learning_rate": 1.893772050657347e-05, "loss": 0.0757, "step": 19923 }, { "epoch": 2.362623028578205, "grad_norm": 0.5857239107523646, "learning_rate": 1.8935391815499e-05, "loss": 0.0703, "step": 19924 }, { "epoch": 2.3627416103403296, "grad_norm": 0.7825219143585538, "learning_rate": 1.893306318033279e-05, "loss": 0.112, "step": 19925 }, { "epoch": 2.362860192102455, "grad_norm": 0.5749701813711697, "learning_rate": 1.8930734601096302e-05, "loss": 0.0773, "step": 19926 }, { "epoch": 2.3629787738645796, "grad_norm": 0.750079780636554, "learning_rate": 1.892840607781099e-05, "loss": 0.0961, "step": 19927 }, { "epoch": 2.3630973556267048, "grad_norm": 0.8772427984184082, "learning_rate": 1.8926077610498328e-05, "loss": 0.1434, "step": 19928 }, { "epoch": 2.3632159373888295, "grad_norm": 0.6890904001253891, "learning_rate": 1.892374919917978e-05, "loss": 0.0946, "step": 19929 }, { "epoch": 2.3633345191509547, "grad_norm": 0.7430910933224466, "learning_rate": 1.8921420843876824e-05, "loss": 0.1019, "step": 19930 }, { "epoch": 2.3634531009130795, "grad_norm": 0.6121896308552848, "learning_rate": 1.891909254461091e-05, "loss": 0.0691, "step": 19931 }, { "epoch": 2.3635716826752047, "grad_norm": 0.9919160581002853, "learning_rate": 1.8916764301403498e-05, "loss": 0.1491, "step": 19932 }, { "epoch": 2.3636902644373294, "grad_norm": 0.7910497561177082, "learning_rate": 1.891443611427607e-05, "loss": 0.1142, "step": 19933 }, { "epoch": 2.3638088461994546, "grad_norm": 0.8722898224173303, "learning_rate": 1.8912107983250073e-05, "loss": 0.1028, "step": 19934 }, { "epoch": 2.3639274279615794, "grad_norm": 0.7264868666385337, "learning_rate": 1.890977990834697e-05, "loss": 0.0969, "step": 19935 }, { "epoch": 2.3640460097237046, "grad_norm": 0.8822827991262268, "learning_rate": 1.8907451889588236e-05, "loss": 0.1029, "step": 19936 }, { "epoch": 2.3641645914858294, "grad_norm": 0.5721121777530824, "learning_rate": 1.8905123926995323e-05, "loss": 0.092, "step": 19937 }, { "epoch": 2.3642831732479546, "grad_norm": 1.2508364482372387, "learning_rate": 1.890279602058969e-05, "loss": 0.1622, "step": 19938 }, { "epoch": 2.3644017550100793, "grad_norm": 0.7250648440542052, "learning_rate": 1.8900468170392806e-05, "loss": 0.1054, "step": 19939 }, { "epoch": 2.3645203367722045, "grad_norm": 0.9006123805228402, "learning_rate": 1.8898140376426118e-05, "loss": 0.1269, "step": 19940 }, { "epoch": 2.3646389185343293, "grad_norm": 0.7791109758853495, "learning_rate": 1.8895812638711102e-05, "loss": 0.113, "step": 19941 }, { "epoch": 2.3647575002964545, "grad_norm": 0.7831450902538151, "learning_rate": 1.8893484957269207e-05, "loss": 0.1053, "step": 19942 }, { "epoch": 2.3648760820585792, "grad_norm": 0.8503348803455865, "learning_rate": 1.889115733212188e-05, "loss": 0.109, "step": 19943 }, { "epoch": 2.3649946638207044, "grad_norm": 0.6761577097189633, "learning_rate": 1.8888829763290605e-05, "loss": 0.0801, "step": 19944 }, { "epoch": 2.365113245582829, "grad_norm": 0.6971905259958479, "learning_rate": 1.888650225079682e-05, "loss": 0.1074, "step": 19945 }, { "epoch": 2.3652318273449544, "grad_norm": 0.7663806181912163, "learning_rate": 1.888417479466199e-05, "loss": 0.1111, "step": 19946 }, { "epoch": 2.365350409107079, "grad_norm": 0.8295301504607903, "learning_rate": 1.8881847394907564e-05, "loss": 0.1129, "step": 19947 }, { "epoch": 2.3654689908692044, "grad_norm": 0.6733879538178824, "learning_rate": 1.8879520051555005e-05, "loss": 0.1107, "step": 19948 }, { "epoch": 2.365587572631329, "grad_norm": 0.9754037879394877, "learning_rate": 1.8877192764625767e-05, "loss": 0.1304, "step": 19949 }, { "epoch": 2.3657061543934543, "grad_norm": 0.6060083266017028, "learning_rate": 1.8874865534141296e-05, "loss": 0.091, "step": 19950 }, { "epoch": 2.365824736155579, "grad_norm": 0.7782708785301196, "learning_rate": 1.8872538360123064e-05, "loss": 0.1197, "step": 19951 }, { "epoch": 2.3659433179177043, "grad_norm": 0.6181480422484958, "learning_rate": 1.8870211242592514e-05, "loss": 0.099, "step": 19952 }, { "epoch": 2.366061899679829, "grad_norm": 0.6494849007931369, "learning_rate": 1.88678841815711e-05, "loss": 0.0725, "step": 19953 }, { "epoch": 2.3661804814419543, "grad_norm": 0.9626175748259966, "learning_rate": 1.886555717708026e-05, "loss": 0.1358, "step": 19954 }, { "epoch": 2.366299063204079, "grad_norm": 0.907583915773801, "learning_rate": 1.8863230229141475e-05, "loss": 0.1463, "step": 19955 }, { "epoch": 2.366417644966204, "grad_norm": 0.6635840868681778, "learning_rate": 1.8860903337776183e-05, "loss": 0.1219, "step": 19956 }, { "epoch": 2.3665362267283294, "grad_norm": 0.6226300110420255, "learning_rate": 1.8858576503005827e-05, "loss": 0.0837, "step": 19957 }, { "epoch": 2.366654808490454, "grad_norm": 0.7989249326444906, "learning_rate": 1.885624972485186e-05, "loss": 0.1125, "step": 19958 }, { "epoch": 2.366773390252579, "grad_norm": 0.8603938491010702, "learning_rate": 1.8853923003335743e-05, "loss": 0.1051, "step": 19959 }, { "epoch": 2.366891972014704, "grad_norm": 0.5146153452773083, "learning_rate": 1.8851596338478923e-05, "loss": 0.0843, "step": 19960 }, { "epoch": 2.3670105537768293, "grad_norm": 0.7405172860890489, "learning_rate": 1.8849269730302833e-05, "loss": 0.0979, "step": 19961 }, { "epoch": 2.367129135538954, "grad_norm": 0.6124937029173426, "learning_rate": 1.8846943178828945e-05, "loss": 0.0981, "step": 19962 }, { "epoch": 2.367247717301079, "grad_norm": 0.775286751039157, "learning_rate": 1.8844616684078695e-05, "loss": 0.1241, "step": 19963 }, { "epoch": 2.367366299063204, "grad_norm": 0.7923354704395553, "learning_rate": 1.884229024607353e-05, "loss": 0.1004, "step": 19964 }, { "epoch": 2.3674848808253293, "grad_norm": 0.4930555783615494, "learning_rate": 1.8839963864834888e-05, "loss": 0.0853, "step": 19965 }, { "epoch": 2.367603462587454, "grad_norm": 0.5344725032971978, "learning_rate": 1.8837637540384237e-05, "loss": 0.077, "step": 19966 }, { "epoch": 2.3677220443495792, "grad_norm": 0.7722218934976957, "learning_rate": 1.883531127274301e-05, "loss": 0.0956, "step": 19967 }, { "epoch": 2.367840626111704, "grad_norm": 0.5752518975705441, "learning_rate": 1.8832985061932647e-05, "loss": 0.0822, "step": 19968 }, { "epoch": 2.367959207873829, "grad_norm": 0.5175758134146375, "learning_rate": 1.88306589079746e-05, "loss": 0.0672, "step": 19969 }, { "epoch": 2.368077789635954, "grad_norm": 0.6570221986468157, "learning_rate": 1.8828332810890314e-05, "loss": 0.1005, "step": 19970 }, { "epoch": 2.368196371398079, "grad_norm": 0.611769797492702, "learning_rate": 1.8826006770701234e-05, "loss": 0.0766, "step": 19971 }, { "epoch": 2.368314953160204, "grad_norm": 0.5617197751643379, "learning_rate": 1.8823680787428803e-05, "loss": 0.0755, "step": 19972 }, { "epoch": 2.368433534922329, "grad_norm": 0.7877788952640095, "learning_rate": 1.882135486109445e-05, "loss": 0.1043, "step": 19973 }, { "epoch": 2.368552116684454, "grad_norm": 0.9514509479428287, "learning_rate": 1.881902899171964e-05, "loss": 0.121, "step": 19974 }, { "epoch": 2.368670698446579, "grad_norm": 1.0356955371518892, "learning_rate": 1.88167031793258e-05, "loss": 0.0901, "step": 19975 }, { "epoch": 2.368789280208704, "grad_norm": 0.8150220276802356, "learning_rate": 1.8814377423934363e-05, "loss": 0.1124, "step": 19976 }, { "epoch": 2.368907861970829, "grad_norm": 0.8076262178988202, "learning_rate": 1.8812051725566798e-05, "loss": 0.1193, "step": 19977 }, { "epoch": 2.369026443732954, "grad_norm": 0.7509835040646926, "learning_rate": 1.8809726084244516e-05, "loss": 0.104, "step": 19978 }, { "epoch": 2.369145025495079, "grad_norm": 0.46853320952499655, "learning_rate": 1.880740049998898e-05, "loss": 0.059, "step": 19979 }, { "epoch": 2.3692636072572038, "grad_norm": 0.9098227811272487, "learning_rate": 1.88050749728216e-05, "loss": 0.1448, "step": 19980 }, { "epoch": 2.369382189019329, "grad_norm": 0.5427449191678437, "learning_rate": 1.8802749502763846e-05, "loss": 0.0734, "step": 19981 }, { "epoch": 2.3695007707814537, "grad_norm": 0.4884119532013986, "learning_rate": 1.8800424089837144e-05, "loss": 0.0688, "step": 19982 }, { "epoch": 2.369619352543579, "grad_norm": 0.8124719565471314, "learning_rate": 1.8798098734062926e-05, "loss": 0.1323, "step": 19983 }, { "epoch": 2.3697379343057037, "grad_norm": 0.6369246503169449, "learning_rate": 1.8795773435462623e-05, "loss": 0.0851, "step": 19984 }, { "epoch": 2.369856516067829, "grad_norm": 0.7712196186113499, "learning_rate": 1.879344819405769e-05, "loss": 0.1618, "step": 19985 }, { "epoch": 2.3699750978299536, "grad_norm": 0.7075111478268126, "learning_rate": 1.8791123009869557e-05, "loss": 0.0873, "step": 19986 }, { "epoch": 2.370093679592079, "grad_norm": 0.6642668229008403, "learning_rate": 1.878879788291965e-05, "loss": 0.0973, "step": 19987 }, { "epoch": 2.3702122613542036, "grad_norm": 0.7724202527018885, "learning_rate": 1.8786472813229408e-05, "loss": 0.0854, "step": 19988 }, { "epoch": 2.370330843116329, "grad_norm": 0.6479160696632843, "learning_rate": 1.8784147800820267e-05, "loss": 0.0813, "step": 19989 }, { "epoch": 2.3704494248784536, "grad_norm": 0.5513009510332562, "learning_rate": 1.878182284571367e-05, "loss": 0.0868, "step": 19990 }, { "epoch": 2.3705680066405788, "grad_norm": 0.4597233408655208, "learning_rate": 1.8779497947931028e-05, "loss": 0.0607, "step": 19991 }, { "epoch": 2.3706865884027035, "grad_norm": 0.6797456376956849, "learning_rate": 1.8777173107493793e-05, "loss": 0.0865, "step": 19992 }, { "epoch": 2.3708051701648287, "grad_norm": 0.6627717403628137, "learning_rate": 1.8774848324423397e-05, "loss": 0.0996, "step": 19993 }, { "epoch": 2.3709237519269535, "grad_norm": 0.4259367205894566, "learning_rate": 1.8772523598741264e-05, "loss": 0.0698, "step": 19994 }, { "epoch": 2.3710423336890787, "grad_norm": 0.7483280847053476, "learning_rate": 1.8770198930468816e-05, "loss": 0.0812, "step": 19995 }, { "epoch": 2.3711609154512034, "grad_norm": 0.8425786695993075, "learning_rate": 1.8767874319627506e-05, "loss": 0.1266, "step": 19996 }, { "epoch": 2.3712794972133286, "grad_norm": 0.6904726069700745, "learning_rate": 1.8765549766238753e-05, "loss": 0.0985, "step": 19997 }, { "epoch": 2.3713980789754534, "grad_norm": 0.7439175748092133, "learning_rate": 1.8763225270323977e-05, "loss": 0.0903, "step": 19998 }, { "epoch": 2.3715166607375786, "grad_norm": 0.6079903772508615, "learning_rate": 1.876090083190462e-05, "loss": 0.1002, "step": 19999 }, { "epoch": 2.3716352424997034, "grad_norm": 0.5023677329147301, "learning_rate": 1.87585764510021e-05, "loss": 0.0686, "step": 20000 }, { "epoch": 2.3717538242618286, "grad_norm": 0.6833155940781851, "learning_rate": 1.8756252127637864e-05, "loss": 0.1008, "step": 20001 }, { "epoch": 2.3718724060239533, "grad_norm": 0.6187885239704627, "learning_rate": 1.8753927861833324e-05, "loss": 0.1041, "step": 20002 }, { "epoch": 2.3719909877860785, "grad_norm": 0.7107883772514625, "learning_rate": 1.8751603653609897e-05, "loss": 0.115, "step": 20003 }, { "epoch": 2.3721095695482033, "grad_norm": 0.7619485316360244, "learning_rate": 1.8749279502989038e-05, "loss": 0.1139, "step": 20004 }, { "epoch": 2.3722281513103285, "grad_norm": 0.6603883485460309, "learning_rate": 1.874695540999215e-05, "loss": 0.096, "step": 20005 }, { "epoch": 2.3723467330724537, "grad_norm": 0.7199818399211823, "learning_rate": 1.8744631374640658e-05, "loss": 0.1054, "step": 20006 }, { "epoch": 2.3724653148345785, "grad_norm": 0.5125007184360268, "learning_rate": 1.8742307396956005e-05, "loss": 0.0682, "step": 20007 }, { "epoch": 2.372583896596703, "grad_norm": 0.8531313601882597, "learning_rate": 1.87399834769596e-05, "loss": 0.137, "step": 20008 }, { "epoch": 2.3727024783588284, "grad_norm": 0.8474209415371391, "learning_rate": 1.8737659614672877e-05, "loss": 0.1373, "step": 20009 }, { "epoch": 2.3728210601209536, "grad_norm": 0.6745005106913177, "learning_rate": 1.8735335810117238e-05, "loss": 0.0788, "step": 20010 }, { "epoch": 2.3729396418830784, "grad_norm": 0.9650746860494496, "learning_rate": 1.8733012063314132e-05, "loss": 0.1137, "step": 20011 }, { "epoch": 2.373058223645203, "grad_norm": 0.6071916149519339, "learning_rate": 1.873068837428497e-05, "loss": 0.0817, "step": 20012 }, { "epoch": 2.3731768054073283, "grad_norm": 0.6490907511035657, "learning_rate": 1.8728364743051176e-05, "loss": 0.1016, "step": 20013 }, { "epoch": 2.3732953871694535, "grad_norm": 0.618994327912844, "learning_rate": 1.8726041169634152e-05, "loss": 0.0996, "step": 20014 }, { "epoch": 2.3734139689315783, "grad_norm": 0.7522258510340007, "learning_rate": 1.872371765405535e-05, "loss": 0.11, "step": 20015 }, { "epoch": 2.3735325506937035, "grad_norm": 0.7696799759593597, "learning_rate": 1.8721394196336173e-05, "loss": 0.0949, "step": 20016 }, { "epoch": 2.3736511324558283, "grad_norm": 0.8883080679971082, "learning_rate": 1.871907079649804e-05, "loss": 0.125, "step": 20017 }, { "epoch": 2.3737697142179535, "grad_norm": 0.6872628592380868, "learning_rate": 1.871674745456237e-05, "loss": 0.1013, "step": 20018 }, { "epoch": 2.3738882959800782, "grad_norm": 0.7921745092339323, "learning_rate": 1.8714424170550583e-05, "loss": 0.1244, "step": 20019 }, { "epoch": 2.3740068777422034, "grad_norm": 0.7793545829117099, "learning_rate": 1.87121009444841e-05, "loss": 0.0878, "step": 20020 }, { "epoch": 2.374125459504328, "grad_norm": 0.8684786676302539, "learning_rate": 1.870977777638433e-05, "loss": 0.1199, "step": 20021 }, { "epoch": 2.3742440412664534, "grad_norm": 0.6407039448041054, "learning_rate": 1.8707454666272702e-05, "loss": 0.0742, "step": 20022 }, { "epoch": 2.374362623028578, "grad_norm": 0.4610225208020165, "learning_rate": 1.870513161417063e-05, "loss": 0.0629, "step": 20023 }, { "epoch": 2.3744812047907033, "grad_norm": 0.7673068371138112, "learning_rate": 1.870280862009952e-05, "loss": 0.1029, "step": 20024 }, { "epoch": 2.374599786552828, "grad_norm": 1.0252714015606006, "learning_rate": 1.8700485684080783e-05, "loss": 0.1533, "step": 20025 }, { "epoch": 2.3747183683149533, "grad_norm": 0.45361727481259223, "learning_rate": 1.8698162806135855e-05, "loss": 0.0745, "step": 20026 }, { "epoch": 2.374836950077078, "grad_norm": 0.7798144307738056, "learning_rate": 1.8695839986286134e-05, "loss": 0.1067, "step": 20027 }, { "epoch": 2.3749555318392033, "grad_norm": 0.7710058113181061, "learning_rate": 1.8693517224553042e-05, "loss": 0.1056, "step": 20028 }, { "epoch": 2.375074113601328, "grad_norm": 0.9960954006215434, "learning_rate": 1.8691194520957977e-05, "loss": 0.1222, "step": 20029 }, { "epoch": 2.3751926953634532, "grad_norm": 0.8253639992451972, "learning_rate": 1.868887187552237e-05, "loss": 0.1041, "step": 20030 }, { "epoch": 2.375311277125578, "grad_norm": 0.5603990856004272, "learning_rate": 1.868654928826763e-05, "loss": 0.0811, "step": 20031 }, { "epoch": 2.375429858887703, "grad_norm": 0.7315301939088297, "learning_rate": 1.8684226759215147e-05, "loss": 0.0941, "step": 20032 }, { "epoch": 2.375548440649828, "grad_norm": 0.6786204561647634, "learning_rate": 1.8681904288386366e-05, "loss": 0.0872, "step": 20033 }, { "epoch": 2.375667022411953, "grad_norm": 0.5846913753469047, "learning_rate": 1.8679581875802673e-05, "loss": 0.0891, "step": 20034 }, { "epoch": 2.375785604174078, "grad_norm": 0.8852418012716938, "learning_rate": 1.867725952148549e-05, "loss": 0.1002, "step": 20035 }, { "epoch": 2.375904185936203, "grad_norm": 0.5460127583227511, "learning_rate": 1.8674937225456207e-05, "loss": 0.0797, "step": 20036 }, { "epoch": 2.376022767698328, "grad_norm": 0.7141657086561574, "learning_rate": 1.8672614987736256e-05, "loss": 0.0756, "step": 20037 }, { "epoch": 2.376141349460453, "grad_norm": 0.6768208917454803, "learning_rate": 1.8670292808347033e-05, "loss": 0.0904, "step": 20038 }, { "epoch": 2.376259931222578, "grad_norm": 0.6809921578826068, "learning_rate": 1.8667970687309953e-05, "loss": 0.1285, "step": 20039 }, { "epoch": 2.376378512984703, "grad_norm": 0.5420894548196419, "learning_rate": 1.8665648624646406e-05, "loss": 0.0645, "step": 20040 }, { "epoch": 2.376497094746828, "grad_norm": 0.7304202582181731, "learning_rate": 1.8663326620377826e-05, "loss": 0.1126, "step": 20041 }, { "epoch": 2.376615676508953, "grad_norm": 0.6503655707257104, "learning_rate": 1.86610046745256e-05, "loss": 0.0821, "step": 20042 }, { "epoch": 2.3767342582710778, "grad_norm": 0.6129648842904731, "learning_rate": 1.865868278711114e-05, "loss": 0.0705, "step": 20043 }, { "epoch": 2.376852840033203, "grad_norm": 0.5945242063665328, "learning_rate": 1.8656360958155834e-05, "loss": 0.092, "step": 20044 }, { "epoch": 2.3769714217953277, "grad_norm": 0.4965356396674816, "learning_rate": 1.8654039187681117e-05, "loss": 0.0709, "step": 20045 }, { "epoch": 2.377090003557453, "grad_norm": 0.7081069488984314, "learning_rate": 1.865171747570837e-05, "loss": 0.0757, "step": 20046 }, { "epoch": 2.3772085853195777, "grad_norm": 0.824380649449185, "learning_rate": 1.8649395822258996e-05, "loss": 0.115, "step": 20047 }, { "epoch": 2.377327167081703, "grad_norm": 0.7254372060162212, "learning_rate": 1.8647074227354416e-05, "loss": 0.0995, "step": 20048 }, { "epoch": 2.3774457488438276, "grad_norm": 0.9314634499926518, "learning_rate": 1.8644752691016014e-05, "loss": 0.1129, "step": 20049 }, { "epoch": 2.377564330605953, "grad_norm": 0.671753283320527, "learning_rate": 1.8642431213265208e-05, "loss": 0.0881, "step": 20050 }, { "epoch": 2.3776829123680776, "grad_norm": 1.1805712281715173, "learning_rate": 1.864010979412338e-05, "loss": 0.1617, "step": 20051 }, { "epoch": 2.377801494130203, "grad_norm": 0.8086816647047935, "learning_rate": 1.8637788433611948e-05, "loss": 0.1072, "step": 20052 }, { "epoch": 2.3779200758923276, "grad_norm": 0.9277793351333308, "learning_rate": 1.8635467131752303e-05, "loss": 0.123, "step": 20053 }, { "epoch": 2.3780386576544528, "grad_norm": 0.936608419437902, "learning_rate": 1.8633145888565852e-05, "loss": 0.1445, "step": 20054 }, { "epoch": 2.378157239416578, "grad_norm": 0.6109312966984188, "learning_rate": 1.8630824704073972e-05, "loss": 0.081, "step": 20055 }, { "epoch": 2.3782758211787027, "grad_norm": 0.6592282112881449, "learning_rate": 1.862850357829809e-05, "loss": 0.0877, "step": 20056 }, { "epoch": 2.3783944029408275, "grad_norm": 0.6752436648790341, "learning_rate": 1.862618251125959e-05, "loss": 0.0819, "step": 20057 }, { "epoch": 2.3785129847029527, "grad_norm": 1.0437211360078944, "learning_rate": 1.8623861502979873e-05, "loss": 0.1431, "step": 20058 }, { "epoch": 2.378631566465078, "grad_norm": 0.6676588929151782, "learning_rate": 1.862154055348032e-05, "loss": 0.0908, "step": 20059 }, { "epoch": 2.3787501482272027, "grad_norm": 0.5702793301403799, "learning_rate": 1.8619219662782354e-05, "loss": 0.0693, "step": 20060 }, { "epoch": 2.3788687299893274, "grad_norm": 0.7336422261197367, "learning_rate": 1.8616898830907363e-05, "loss": 0.1041, "step": 20061 }, { "epoch": 2.3789873117514526, "grad_norm": 0.41698981607663144, "learning_rate": 1.8614578057876724e-05, "loss": 0.0696, "step": 20062 }, { "epoch": 2.379105893513578, "grad_norm": 0.8740310957967917, "learning_rate": 1.8612257343711855e-05, "loss": 0.0984, "step": 20063 }, { "epoch": 2.3792244752757026, "grad_norm": 0.6751166075426867, "learning_rate": 1.8609936688434144e-05, "loss": 0.0925, "step": 20064 }, { "epoch": 2.3793430570378273, "grad_norm": 0.7952344454621917, "learning_rate": 1.8607616092064975e-05, "loss": 0.1003, "step": 20065 }, { "epoch": 2.3794616387999525, "grad_norm": 1.154005013267378, "learning_rate": 1.8605295554625738e-05, "loss": 0.1292, "step": 20066 }, { "epoch": 2.3795802205620777, "grad_norm": 1.0289451292869782, "learning_rate": 1.8602975076137846e-05, "loss": 0.1543, "step": 20067 }, { "epoch": 2.3796988023242025, "grad_norm": 0.7788258309354711, "learning_rate": 1.8600654656622672e-05, "loss": 0.0962, "step": 20068 }, { "epoch": 2.3798173840863277, "grad_norm": 0.6317958394057012, "learning_rate": 1.859833429610162e-05, "loss": 0.0963, "step": 20069 }, { "epoch": 2.3799359658484525, "grad_norm": 0.6946637552595233, "learning_rate": 1.8596013994596066e-05, "loss": 0.087, "step": 20070 }, { "epoch": 2.3800545476105777, "grad_norm": 0.8769487903987615, "learning_rate": 1.8593693752127424e-05, "loss": 0.1369, "step": 20071 }, { "epoch": 2.3801731293727024, "grad_norm": 0.9252076348023556, "learning_rate": 1.8591373568717063e-05, "loss": 0.1562, "step": 20072 }, { "epoch": 2.3802917111348276, "grad_norm": 0.6480870293235775, "learning_rate": 1.8589053444386386e-05, "loss": 0.0985, "step": 20073 }, { "epoch": 2.3804102928969524, "grad_norm": 0.846661432174298, "learning_rate": 1.8586733379156758e-05, "loss": 0.1321, "step": 20074 }, { "epoch": 2.3805288746590776, "grad_norm": 0.7394596556416534, "learning_rate": 1.8584413373049597e-05, "loss": 0.114, "step": 20075 }, { "epoch": 2.3806474564212023, "grad_norm": 1.0010875859540005, "learning_rate": 1.8582093426086277e-05, "loss": 0.1298, "step": 20076 }, { "epoch": 2.3807660381833275, "grad_norm": 0.7308638998002007, "learning_rate": 1.8579773538288175e-05, "loss": 0.0967, "step": 20077 }, { "epoch": 2.3808846199454523, "grad_norm": 0.714591273044114, "learning_rate": 1.8577453709676693e-05, "loss": 0.0989, "step": 20078 }, { "epoch": 2.3810032017075775, "grad_norm": 0.7902299195135827, "learning_rate": 1.8575133940273214e-05, "loss": 0.1005, "step": 20079 }, { "epoch": 2.3811217834697023, "grad_norm": 0.9537961211179414, "learning_rate": 1.8572814230099127e-05, "loss": 0.1363, "step": 20080 }, { "epoch": 2.3812403652318275, "grad_norm": 0.5100236929041791, "learning_rate": 1.8570494579175798e-05, "loss": 0.0636, "step": 20081 }, { "epoch": 2.3813589469939522, "grad_norm": 0.9179879961126634, "learning_rate": 1.8568174987524636e-05, "loss": 0.1027, "step": 20082 }, { "epoch": 2.3814775287560774, "grad_norm": 0.7645832329311963, "learning_rate": 1.8565855455167013e-05, "loss": 0.0884, "step": 20083 }, { "epoch": 2.381596110518202, "grad_norm": 0.6962922121066605, "learning_rate": 1.8563535982124313e-05, "loss": 0.1011, "step": 20084 }, { "epoch": 2.3817146922803274, "grad_norm": 0.7865002424786374, "learning_rate": 1.8561216568417907e-05, "loss": 0.0939, "step": 20085 }, { "epoch": 2.381833274042452, "grad_norm": 0.7489400939392612, "learning_rate": 1.8558897214069198e-05, "loss": 0.1028, "step": 20086 }, { "epoch": 2.3819518558045774, "grad_norm": 0.5382695584284976, "learning_rate": 1.8556577919099556e-05, "loss": 0.073, "step": 20087 }, { "epoch": 2.382070437566702, "grad_norm": 0.7400212902354133, "learning_rate": 1.855425868353037e-05, "loss": 0.0956, "step": 20088 }, { "epoch": 2.3821890193288273, "grad_norm": 0.49158317711938265, "learning_rate": 1.8551939507383e-05, "loss": 0.0621, "step": 20089 }, { "epoch": 2.382307601090952, "grad_norm": 0.6803653029980141, "learning_rate": 1.854962039067885e-05, "loss": 0.0862, "step": 20090 }, { "epoch": 2.3824261828530773, "grad_norm": 0.606567152581531, "learning_rate": 1.8547301333439297e-05, "loss": 0.0797, "step": 20091 }, { "epoch": 2.382544764615202, "grad_norm": 0.6799834855839283, "learning_rate": 1.8544982335685696e-05, "loss": 0.0927, "step": 20092 }, { "epoch": 2.3826633463773272, "grad_norm": 0.6452006948029182, "learning_rate": 1.8542663397439456e-05, "loss": 0.1055, "step": 20093 }, { "epoch": 2.382781928139452, "grad_norm": 0.715877539285693, "learning_rate": 1.8540344518721943e-05, "loss": 0.1044, "step": 20094 }, { "epoch": 2.382900509901577, "grad_norm": 0.6146348137781142, "learning_rate": 1.8538025699554526e-05, "loss": 0.0773, "step": 20095 }, { "epoch": 2.383019091663702, "grad_norm": 0.7561514380298094, "learning_rate": 1.853570693995858e-05, "loss": 0.1019, "step": 20096 }, { "epoch": 2.383137673425827, "grad_norm": 0.8490290911103311, "learning_rate": 1.85333882399555e-05, "loss": 0.0785, "step": 20097 }, { "epoch": 2.383256255187952, "grad_norm": 0.6031138654592563, "learning_rate": 1.853106959956665e-05, "loss": 0.0729, "step": 20098 }, { "epoch": 2.383374836950077, "grad_norm": 0.5083160176213388, "learning_rate": 1.8528751018813402e-05, "loss": 0.0597, "step": 20099 }, { "epoch": 2.383493418712202, "grad_norm": 0.5828003995040271, "learning_rate": 1.852643249771713e-05, "loss": 0.086, "step": 20100 }, { "epoch": 2.383612000474327, "grad_norm": 0.8730734543627151, "learning_rate": 1.8524114036299217e-05, "loss": 0.1217, "step": 20101 }, { "epoch": 2.383730582236452, "grad_norm": 0.8862285314833737, "learning_rate": 1.8521795634581034e-05, "loss": 0.1263, "step": 20102 }, { "epoch": 2.383849163998577, "grad_norm": 0.547920223294931, "learning_rate": 1.8519477292583952e-05, "loss": 0.07, "step": 20103 }, { "epoch": 2.3839677457607023, "grad_norm": 0.5784012338015486, "learning_rate": 1.8517159010329334e-05, "loss": 0.0858, "step": 20104 }, { "epoch": 2.384086327522827, "grad_norm": 0.6928142497916311, "learning_rate": 1.8514840787838565e-05, "loss": 0.0909, "step": 20105 }, { "epoch": 2.3842049092849518, "grad_norm": 0.7270575658989316, "learning_rate": 1.8512522625133012e-05, "loss": 0.1023, "step": 20106 }, { "epoch": 2.384323491047077, "grad_norm": 0.7665738236075885, "learning_rate": 1.851020452223404e-05, "loss": 0.0845, "step": 20107 }, { "epoch": 2.384442072809202, "grad_norm": 0.571308994529564, "learning_rate": 1.850788647916303e-05, "loss": 0.0697, "step": 20108 }, { "epoch": 2.384560654571327, "grad_norm": 1.0036755923705976, "learning_rate": 1.8505568495941345e-05, "loss": 0.1415, "step": 20109 }, { "epoch": 2.3846792363334517, "grad_norm": 0.6579777939156916, "learning_rate": 1.850325057259035e-05, "loss": 0.0845, "step": 20110 }, { "epoch": 2.384797818095577, "grad_norm": 0.7550888607147503, "learning_rate": 1.8500932709131418e-05, "loss": 0.1041, "step": 20111 }, { "epoch": 2.384916399857702, "grad_norm": 0.7802694622606636, "learning_rate": 1.8498614905585924e-05, "loss": 0.1114, "step": 20112 }, { "epoch": 2.385034981619827, "grad_norm": 0.8830622364728647, "learning_rate": 1.8496297161975227e-05, "loss": 0.148, "step": 20113 }, { "epoch": 2.3851535633819516, "grad_norm": 0.9390721423182212, "learning_rate": 1.8493979478320693e-05, "loss": 0.1088, "step": 20114 }, { "epoch": 2.385272145144077, "grad_norm": 0.7759337102611471, "learning_rate": 1.8491661854643682e-05, "loss": 0.1073, "step": 20115 }, { "epoch": 2.385390726906202, "grad_norm": 0.633293861692498, "learning_rate": 1.8489344290965578e-05, "loss": 0.094, "step": 20116 }, { "epoch": 2.3855093086683268, "grad_norm": 1.069678331844954, "learning_rate": 1.8487026787307728e-05, "loss": 0.1248, "step": 20117 }, { "epoch": 2.385627890430452, "grad_norm": 0.8635212812352847, "learning_rate": 1.848470934369151e-05, "loss": 0.1101, "step": 20118 }, { "epoch": 2.3857464721925767, "grad_norm": 0.6179964084225661, "learning_rate": 1.8482391960138276e-05, "loss": 0.0898, "step": 20119 }, { "epoch": 2.385865053954702, "grad_norm": 0.6034043302271764, "learning_rate": 1.8480074636669403e-05, "loss": 0.0848, "step": 20120 }, { "epoch": 2.3859836357168267, "grad_norm": 0.7443436211880056, "learning_rate": 1.8477757373306248e-05, "loss": 0.0879, "step": 20121 }, { "epoch": 2.386102217478952, "grad_norm": 0.7370646218346185, "learning_rate": 1.847544017007016e-05, "loss": 0.1, "step": 20122 }, { "epoch": 2.3862207992410767, "grad_norm": 0.76969332414105, "learning_rate": 1.8473123026982527e-05, "loss": 0.0908, "step": 20123 }, { "epoch": 2.386339381003202, "grad_norm": 0.8140302772481701, "learning_rate": 1.847080594406469e-05, "loss": 0.1097, "step": 20124 }, { "epoch": 2.3864579627653266, "grad_norm": 0.7572864845930481, "learning_rate": 1.8468488921338016e-05, "loss": 0.1133, "step": 20125 }, { "epoch": 2.386576544527452, "grad_norm": 0.6819373369272999, "learning_rate": 1.8466171958823858e-05, "loss": 0.0957, "step": 20126 }, { "epoch": 2.3866951262895766, "grad_norm": 0.7114992538091188, "learning_rate": 1.846385505654359e-05, "loss": 0.1, "step": 20127 }, { "epoch": 2.386813708051702, "grad_norm": 0.8720320332960965, "learning_rate": 1.8461538214518558e-05, "loss": 0.1153, "step": 20128 }, { "epoch": 2.3869322898138265, "grad_norm": 0.5325852206775019, "learning_rate": 1.8459221432770134e-05, "loss": 0.084, "step": 20129 }, { "epoch": 2.3870508715759517, "grad_norm": 0.8559706578497095, "learning_rate": 1.8456904711319655e-05, "loss": 0.1091, "step": 20130 }, { "epoch": 2.3871694533380765, "grad_norm": 0.8900713036895793, "learning_rate": 1.84545880501885e-05, "loss": 0.1096, "step": 20131 }, { "epoch": 2.3872880351002017, "grad_norm": 0.7227316517801932, "learning_rate": 1.8452271449398016e-05, "loss": 0.0876, "step": 20132 }, { "epoch": 2.3874066168623265, "grad_norm": 0.6601361169528078, "learning_rate": 1.8449954908969554e-05, "loss": 0.0884, "step": 20133 }, { "epoch": 2.3875251986244517, "grad_norm": 0.7769074136119168, "learning_rate": 1.8447638428924484e-05, "loss": 0.1076, "step": 20134 }, { "epoch": 2.3876437803865764, "grad_norm": 0.7546398331725148, "learning_rate": 1.8445322009284153e-05, "loss": 0.0726, "step": 20135 }, { "epoch": 2.3877623621487016, "grad_norm": 0.7113727449694359, "learning_rate": 1.844300565006991e-05, "loss": 0.1104, "step": 20136 }, { "epoch": 2.3878809439108264, "grad_norm": 0.6342722037012878, "learning_rate": 1.844068935130311e-05, "loss": 0.075, "step": 20137 }, { "epoch": 2.3879995256729516, "grad_norm": 0.8984795034638305, "learning_rate": 1.8438373113005114e-05, "loss": 0.0923, "step": 20138 }, { "epoch": 2.3881181074350764, "grad_norm": 0.6127748659194642, "learning_rate": 1.8436056935197277e-05, "loss": 0.0672, "step": 20139 }, { "epoch": 2.3882366891972016, "grad_norm": 0.8484250993793856, "learning_rate": 1.8433740817900945e-05, "loss": 0.1018, "step": 20140 }, { "epoch": 2.3883552709593263, "grad_norm": 0.7660280322166166, "learning_rate": 1.843142476113746e-05, "loss": 0.117, "step": 20141 }, { "epoch": 2.3884738527214515, "grad_norm": 0.9378746180522559, "learning_rate": 1.8429108764928195e-05, "loss": 0.1417, "step": 20142 }, { "epoch": 2.3885924344835763, "grad_norm": 0.7199514271976312, "learning_rate": 1.842679282929449e-05, "loss": 0.098, "step": 20143 }, { "epoch": 2.3887110162457015, "grad_norm": 0.6814133727203312, "learning_rate": 1.8424476954257697e-05, "loss": 0.0888, "step": 20144 }, { "epoch": 2.3888295980078262, "grad_norm": 0.6722072537453876, "learning_rate": 1.8422161139839147e-05, "loss": 0.1106, "step": 20145 }, { "epoch": 2.3889481797699514, "grad_norm": 0.9524938679253532, "learning_rate": 1.841984538606022e-05, "loss": 0.1293, "step": 20146 }, { "epoch": 2.389066761532076, "grad_norm": 0.7070416198910837, "learning_rate": 1.841752969294224e-05, "loss": 0.1159, "step": 20147 }, { "epoch": 2.3891853432942014, "grad_norm": 0.7464184682673104, "learning_rate": 1.8415214060506566e-05, "loss": 0.091, "step": 20148 }, { "epoch": 2.389303925056326, "grad_norm": 0.6283072751324671, "learning_rate": 1.8412898488774544e-05, "loss": 0.0866, "step": 20149 }, { "epoch": 2.3894225068184514, "grad_norm": 0.5021124399646364, "learning_rate": 1.8410582977767526e-05, "loss": 0.0641, "step": 20150 }, { "epoch": 2.389541088580576, "grad_norm": 0.6896894174073899, "learning_rate": 1.8408267527506852e-05, "loss": 0.1122, "step": 20151 }, { "epoch": 2.3896596703427013, "grad_norm": 0.8128090666106595, "learning_rate": 1.8405952138013856e-05, "loss": 0.1066, "step": 20152 }, { "epoch": 2.389778252104826, "grad_norm": 0.6807739981026396, "learning_rate": 1.8403636809309908e-05, "loss": 0.0846, "step": 20153 }, { "epoch": 2.3898968338669513, "grad_norm": 0.6607749850341189, "learning_rate": 1.8401321541416338e-05, "loss": 0.0895, "step": 20154 }, { "epoch": 2.390015415629076, "grad_norm": 1.0606973886080773, "learning_rate": 1.8399006334354487e-05, "loss": 0.1386, "step": 20155 }, { "epoch": 2.3901339973912012, "grad_norm": 0.5479365974117776, "learning_rate": 1.8396691188145704e-05, "loss": 0.0654, "step": 20156 }, { "epoch": 2.3902525791533265, "grad_norm": 1.1454007980736631, "learning_rate": 1.8394376102811327e-05, "loss": 0.165, "step": 20157 }, { "epoch": 2.390371160915451, "grad_norm": 0.7050448707201571, "learning_rate": 1.8392061078372712e-05, "loss": 0.1016, "step": 20158 }, { "epoch": 2.390489742677576, "grad_norm": 0.733314181156609, "learning_rate": 1.8389746114851186e-05, "loss": 0.1158, "step": 20159 }, { "epoch": 2.390608324439701, "grad_norm": 0.7058487377720404, "learning_rate": 1.8387431212268087e-05, "loss": 0.0818, "step": 20160 }, { "epoch": 2.3907269062018264, "grad_norm": 0.8188609207654292, "learning_rate": 1.8385116370644777e-05, "loss": 0.1267, "step": 20161 }, { "epoch": 2.390845487963951, "grad_norm": 0.7790449431345251, "learning_rate": 1.8382801590002577e-05, "loss": 0.1062, "step": 20162 }, { "epoch": 2.390964069726076, "grad_norm": 0.7622456388919014, "learning_rate": 1.8380486870362823e-05, "loss": 0.0921, "step": 20163 }, { "epoch": 2.391082651488201, "grad_norm": 0.6218911706626589, "learning_rate": 1.8378172211746876e-05, "loss": 0.0874, "step": 20164 }, { "epoch": 2.3912012332503263, "grad_norm": 0.8106494735629298, "learning_rate": 1.8375857614176055e-05, "loss": 0.0989, "step": 20165 }, { "epoch": 2.391319815012451, "grad_norm": 0.8403933660181092, "learning_rate": 1.8373543077671705e-05, "loss": 0.0999, "step": 20166 }, { "epoch": 2.3914383967745763, "grad_norm": 0.8096917198541487, "learning_rate": 1.8371228602255158e-05, "loss": 0.1061, "step": 20167 }, { "epoch": 2.391556978536701, "grad_norm": 0.8539334635748179, "learning_rate": 1.8368914187947756e-05, "loss": 0.1041, "step": 20168 }, { "epoch": 2.391675560298826, "grad_norm": 0.6872406873429642, "learning_rate": 1.8366599834770836e-05, "loss": 0.1005, "step": 20169 }, { "epoch": 2.391794142060951, "grad_norm": 0.6998399206428075, "learning_rate": 1.8364285542745735e-05, "loss": 0.0907, "step": 20170 }, { "epoch": 2.391912723823076, "grad_norm": 0.7878944488121976, "learning_rate": 1.8361971311893773e-05, "loss": 0.1133, "step": 20171 }, { "epoch": 2.392031305585201, "grad_norm": 0.8912595980965038, "learning_rate": 1.8359657142236302e-05, "loss": 0.1126, "step": 20172 }, { "epoch": 2.392149887347326, "grad_norm": 0.6450963853434746, "learning_rate": 1.8357343033794655e-05, "loss": 0.0815, "step": 20173 }, { "epoch": 2.392268469109451, "grad_norm": 0.7305521201649157, "learning_rate": 1.8355028986590156e-05, "loss": 0.105, "step": 20174 }, { "epoch": 2.392387050871576, "grad_norm": 0.9125933819244482, "learning_rate": 1.835271500064413e-05, "loss": 0.1096, "step": 20175 }, { "epoch": 2.392505632633701, "grad_norm": 0.4820707428904465, "learning_rate": 1.8350401075977933e-05, "loss": 0.069, "step": 20176 }, { "epoch": 2.392624214395826, "grad_norm": 0.8702795776093413, "learning_rate": 1.834808721261288e-05, "loss": 0.1304, "step": 20177 }, { "epoch": 2.392742796157951, "grad_norm": 0.852161136214693, "learning_rate": 1.83457734105703e-05, "loss": 0.0977, "step": 20178 }, { "epoch": 2.392861377920076, "grad_norm": 0.6986128126588067, "learning_rate": 1.8343459669871533e-05, "loss": 0.0888, "step": 20179 }, { "epoch": 2.392979959682201, "grad_norm": 0.5752810955632451, "learning_rate": 1.8341145990537912e-05, "loss": 0.0728, "step": 20180 }, { "epoch": 2.393098541444326, "grad_norm": 1.079436702713155, "learning_rate": 1.8338832372590754e-05, "loss": 0.1676, "step": 20181 }, { "epoch": 2.3932171232064507, "grad_norm": 0.7425203041594929, "learning_rate": 1.8336518816051385e-05, "loss": 0.096, "step": 20182 }, { "epoch": 2.393335704968576, "grad_norm": 0.7616232576644861, "learning_rate": 1.833420532094115e-05, "loss": 0.1142, "step": 20183 }, { "epoch": 2.3934542867307007, "grad_norm": 0.6964163997229822, "learning_rate": 1.8331891887281373e-05, "loss": 0.0931, "step": 20184 }, { "epoch": 2.393572868492826, "grad_norm": 0.6110957423807838, "learning_rate": 1.8329578515093367e-05, "loss": 0.0821, "step": 20185 }, { "epoch": 2.3936914502549507, "grad_norm": 0.8454678092906918, "learning_rate": 1.832726520439847e-05, "loss": 0.118, "step": 20186 }, { "epoch": 2.393810032017076, "grad_norm": 0.8189225310664142, "learning_rate": 1.8324951955218002e-05, "loss": 0.1067, "step": 20187 }, { "epoch": 2.3939286137792006, "grad_norm": 0.5771304777291457, "learning_rate": 1.83226387675733e-05, "loss": 0.0698, "step": 20188 }, { "epoch": 2.394047195541326, "grad_norm": 0.6142344193327101, "learning_rate": 1.8320325641485678e-05, "loss": 0.1012, "step": 20189 }, { "epoch": 2.3941657773034506, "grad_norm": 0.6959814582365136, "learning_rate": 1.831801257697645e-05, "loss": 0.0746, "step": 20190 }, { "epoch": 2.394284359065576, "grad_norm": 0.9340565941007684, "learning_rate": 1.8315699574066967e-05, "loss": 0.1338, "step": 20191 }, { "epoch": 2.3944029408277006, "grad_norm": 0.720978300244913, "learning_rate": 1.8313386632778533e-05, "loss": 0.0976, "step": 20192 }, { "epoch": 2.3945215225898258, "grad_norm": 0.8015433482908784, "learning_rate": 1.8311073753132468e-05, "loss": 0.1069, "step": 20193 }, { "epoch": 2.3946401043519505, "grad_norm": 0.9327580341810934, "learning_rate": 1.8308760935150107e-05, "loss": 0.131, "step": 20194 }, { "epoch": 2.3947586861140757, "grad_norm": 0.9035398903111694, "learning_rate": 1.8306448178852768e-05, "loss": 0.1395, "step": 20195 }, { "epoch": 2.3948772678762005, "grad_norm": 0.8906762321184629, "learning_rate": 1.8304135484261762e-05, "loss": 0.1255, "step": 20196 }, { "epoch": 2.3949958496383257, "grad_norm": 0.6518388451696009, "learning_rate": 1.8301822851398414e-05, "loss": 0.0974, "step": 20197 }, { "epoch": 2.3951144314004504, "grad_norm": 0.7914938004378994, "learning_rate": 1.829951028028405e-05, "loss": 0.1105, "step": 20198 }, { "epoch": 2.3952330131625756, "grad_norm": 0.5831025011964127, "learning_rate": 1.8297197770939986e-05, "loss": 0.0865, "step": 20199 }, { "epoch": 2.3953515949247004, "grad_norm": 0.6483507161309409, "learning_rate": 1.829488532338754e-05, "loss": 0.1092, "step": 20200 }, { "epoch": 2.3954701766868256, "grad_norm": 0.7720660789455756, "learning_rate": 1.829257293764802e-05, "loss": 0.108, "step": 20201 }, { "epoch": 2.3955887584489504, "grad_norm": 0.608105796589993, "learning_rate": 1.829026061374276e-05, "loss": 0.0695, "step": 20202 }, { "epoch": 2.3957073402110756, "grad_norm": 0.6322678578394265, "learning_rate": 1.828794835169307e-05, "loss": 0.0904, "step": 20203 }, { "epoch": 2.3958259219732003, "grad_norm": 0.5383261242506496, "learning_rate": 1.8285636151520254e-05, "loss": 0.0875, "step": 20204 }, { "epoch": 2.3959445037353255, "grad_norm": 0.608096844237224, "learning_rate": 1.828332401324565e-05, "loss": 0.0673, "step": 20205 }, { "epoch": 2.3960630854974507, "grad_norm": 0.6930222564636366, "learning_rate": 1.8281011936890566e-05, "loss": 0.0992, "step": 20206 }, { "epoch": 2.3961816672595755, "grad_norm": 0.9193628072341307, "learning_rate": 1.8278699922476304e-05, "loss": 0.1314, "step": 20207 }, { "epoch": 2.3963002490217002, "grad_norm": 1.2028738665327825, "learning_rate": 1.8276387970024186e-05, "loss": 0.1753, "step": 20208 }, { "epoch": 2.3964188307838254, "grad_norm": 1.2614226362823981, "learning_rate": 1.8274076079555526e-05, "loss": 0.0881, "step": 20209 }, { "epoch": 2.3965374125459507, "grad_norm": 0.9380052023510161, "learning_rate": 1.8271764251091643e-05, "loss": 0.1376, "step": 20210 }, { "epoch": 2.3966559943080754, "grad_norm": 0.5306159867315311, "learning_rate": 1.8269452484653842e-05, "loss": 0.0669, "step": 20211 }, { "epoch": 2.3967745760702, "grad_norm": 0.7831651722768345, "learning_rate": 1.8267140780263427e-05, "loss": 0.0973, "step": 20212 }, { "epoch": 2.3968931578323254, "grad_norm": 0.9719080156719679, "learning_rate": 1.8264829137941725e-05, "loss": 0.1298, "step": 20213 }, { "epoch": 2.3970117395944506, "grad_norm": 0.5944239320543169, "learning_rate": 1.826251755771004e-05, "loss": 0.0892, "step": 20214 }, { "epoch": 2.3971303213565753, "grad_norm": 0.6146054451290544, "learning_rate": 1.8260206039589678e-05, "loss": 0.0729, "step": 20215 }, { "epoch": 2.3972489031187005, "grad_norm": 0.6886656749621559, "learning_rate": 1.825789458360195e-05, "loss": 0.0646, "step": 20216 }, { "epoch": 2.3973674848808253, "grad_norm": 0.6470472843153998, "learning_rate": 1.8255583189768166e-05, "loss": 0.0694, "step": 20217 }, { "epoch": 2.3974860666429505, "grad_norm": 0.7780576826309862, "learning_rate": 1.8253271858109642e-05, "loss": 0.1146, "step": 20218 }, { "epoch": 2.3976046484050753, "grad_norm": 0.5343725792360327, "learning_rate": 1.8250960588647663e-05, "loss": 0.0899, "step": 20219 }, { "epoch": 2.3977232301672005, "grad_norm": 0.42819331639618413, "learning_rate": 1.8248649381403565e-05, "loss": 0.0657, "step": 20220 }, { "epoch": 2.397841811929325, "grad_norm": 0.5697335403239753, "learning_rate": 1.824633823639864e-05, "loss": 0.0963, "step": 20221 }, { "epoch": 2.3979603936914504, "grad_norm": 0.8334343324950962, "learning_rate": 1.8244027153654196e-05, "loss": 0.1495, "step": 20222 }, { "epoch": 2.398078975453575, "grad_norm": 0.6392587644905395, "learning_rate": 1.8241716133191522e-05, "loss": 0.0838, "step": 20223 }, { "epoch": 2.3981975572157004, "grad_norm": 0.7595413064720618, "learning_rate": 1.8239405175031953e-05, "loss": 0.0953, "step": 20224 }, { "epoch": 2.398316138977825, "grad_norm": 0.5863932203033548, "learning_rate": 1.8237094279196776e-05, "loss": 0.0962, "step": 20225 }, { "epoch": 2.3984347207399503, "grad_norm": 0.9070849128170728, "learning_rate": 1.8234783445707294e-05, "loss": 0.1509, "step": 20226 }, { "epoch": 2.398553302502075, "grad_norm": 0.5121760376522431, "learning_rate": 1.8232472674584804e-05, "loss": 0.073, "step": 20227 }, { "epoch": 2.3986718842642003, "grad_norm": 0.7619461116100906, "learning_rate": 1.8230161965850626e-05, "loss": 0.1249, "step": 20228 }, { "epoch": 2.398790466026325, "grad_norm": 0.7895902224923023, "learning_rate": 1.8227851319526057e-05, "loss": 0.1039, "step": 20229 }, { "epoch": 2.3989090477884503, "grad_norm": 0.6155987870310066, "learning_rate": 1.822554073563239e-05, "loss": 0.105, "step": 20230 }, { "epoch": 2.399027629550575, "grad_norm": 0.6428010846888255, "learning_rate": 1.8223230214190923e-05, "loss": 0.0956, "step": 20231 }, { "epoch": 2.3991462113127002, "grad_norm": 0.5540420363552103, "learning_rate": 1.822091975522297e-05, "loss": 0.0602, "step": 20232 }, { "epoch": 2.399264793074825, "grad_norm": 0.7667660368351557, "learning_rate": 1.8218609358749827e-05, "loss": 0.09, "step": 20233 }, { "epoch": 2.39938337483695, "grad_norm": 0.7680139255877487, "learning_rate": 1.821629902479278e-05, "loss": 0.0906, "step": 20234 }, { "epoch": 2.399501956599075, "grad_norm": 0.7585755502828114, "learning_rate": 1.8213988753373146e-05, "loss": 0.0969, "step": 20235 }, { "epoch": 2.3996205383612, "grad_norm": 0.71150019056051, "learning_rate": 1.821167854451221e-05, "loss": 0.0817, "step": 20236 }, { "epoch": 2.399739120123325, "grad_norm": 0.6523919549730853, "learning_rate": 1.8209368398231278e-05, "loss": 0.0838, "step": 20237 }, { "epoch": 2.39985770188545, "grad_norm": 0.5185640564691125, "learning_rate": 1.820705831455163e-05, "loss": 0.0608, "step": 20238 }, { "epoch": 2.399976283647575, "grad_norm": 0.9012230812888348, "learning_rate": 1.820474829349459e-05, "loss": 0.1518, "step": 20239 }, { "epoch": 2.4000948654097, "grad_norm": 0.8628843375187636, "learning_rate": 1.820243833508143e-05, "loss": 0.0947, "step": 20240 }, { "epoch": 2.400213447171825, "grad_norm": 0.7848530143345541, "learning_rate": 1.8200128439333457e-05, "loss": 0.1137, "step": 20241 }, { "epoch": 2.40033202893395, "grad_norm": 0.395766160262144, "learning_rate": 1.8197818606271953e-05, "loss": 0.0534, "step": 20242 }, { "epoch": 2.400450610696075, "grad_norm": 0.7554848909622699, "learning_rate": 1.819550883591823e-05, "loss": 0.0953, "step": 20243 }, { "epoch": 2.4005691924582, "grad_norm": 0.5696078136193001, "learning_rate": 1.819319912829357e-05, "loss": 0.0783, "step": 20244 }, { "epoch": 2.4006877742203248, "grad_norm": 0.8611194799947217, "learning_rate": 1.819088948341926e-05, "loss": 0.109, "step": 20245 }, { "epoch": 2.40080635598245, "grad_norm": 0.808389714861101, "learning_rate": 1.81885799013166e-05, "loss": 0.1086, "step": 20246 }, { "epoch": 2.4009249377445747, "grad_norm": 0.9093187969641613, "learning_rate": 1.8186270382006882e-05, "loss": 0.1309, "step": 20247 }, { "epoch": 2.4010435195067, "grad_norm": 0.6545186402923558, "learning_rate": 1.81839609255114e-05, "loss": 0.0952, "step": 20248 }, { "epoch": 2.4011621012688247, "grad_norm": 0.4448987227150256, "learning_rate": 1.818165153185143e-05, "loss": 0.0697, "step": 20249 }, { "epoch": 2.40128068303095, "grad_norm": 0.6612335969842862, "learning_rate": 1.817934220104828e-05, "loss": 0.105, "step": 20250 }, { "epoch": 2.4013992647930746, "grad_norm": 0.6239384863150024, "learning_rate": 1.8177032933123235e-05, "loss": 0.0811, "step": 20251 }, { "epoch": 2.4015178465552, "grad_norm": 0.7528620249478584, "learning_rate": 1.8174723728097575e-05, "loss": 0.0889, "step": 20252 }, { "epoch": 2.4016364283173246, "grad_norm": 0.8704089126696227, "learning_rate": 1.8172414585992583e-05, "loss": 0.1259, "step": 20253 }, { "epoch": 2.40175501007945, "grad_norm": 0.8046961026245355, "learning_rate": 1.8170105506829565e-05, "loss": 0.1042, "step": 20254 }, { "epoch": 2.401873591841575, "grad_norm": 0.6143015146758154, "learning_rate": 1.8167796490629803e-05, "loss": 0.0867, "step": 20255 }, { "epoch": 2.4019921736036998, "grad_norm": 0.451299354282776, "learning_rate": 1.816548753741457e-05, "loss": 0.0576, "step": 20256 }, { "epoch": 2.4021107553658245, "grad_norm": 0.6569404228745713, "learning_rate": 1.8163178647205165e-05, "loss": 0.0865, "step": 20257 }, { "epoch": 2.4022293371279497, "grad_norm": 0.8051712221091767, "learning_rate": 1.816086982002287e-05, "loss": 0.0843, "step": 20258 }, { "epoch": 2.402347918890075, "grad_norm": 0.9204449173894176, "learning_rate": 1.8158561055888968e-05, "loss": 0.1215, "step": 20259 }, { "epoch": 2.4024665006521997, "grad_norm": 0.6151187472691754, "learning_rate": 1.8156252354824747e-05, "loss": 0.0808, "step": 20260 }, { "epoch": 2.4025850824143244, "grad_norm": 0.6195039663614463, "learning_rate": 1.8153943716851474e-05, "loss": 0.0937, "step": 20261 }, { "epoch": 2.4027036641764496, "grad_norm": 0.8179649549813086, "learning_rate": 1.8151635141990457e-05, "loss": 0.1129, "step": 20262 }, { "epoch": 2.402822245938575, "grad_norm": 0.5767144441753485, "learning_rate": 1.8149326630262966e-05, "loss": 0.0569, "step": 20263 }, { "epoch": 2.4029408277006996, "grad_norm": 0.862229218455208, "learning_rate": 1.814701818169027e-05, "loss": 0.1202, "step": 20264 }, { "epoch": 2.4030594094628244, "grad_norm": 0.7929866410648235, "learning_rate": 1.814470979629368e-05, "loss": 0.117, "step": 20265 }, { "epoch": 2.4031779912249496, "grad_norm": 0.7246571599252207, "learning_rate": 1.8142401474094448e-05, "loss": 0.0956, "step": 20266 }, { "epoch": 2.4032965729870748, "grad_norm": 0.8485486527499116, "learning_rate": 1.814009321511387e-05, "loss": 0.1348, "step": 20267 }, { "epoch": 2.4034151547491995, "grad_norm": 0.7095158743623139, "learning_rate": 1.8137785019373212e-05, "loss": 0.0962, "step": 20268 }, { "epoch": 2.4035337365113247, "grad_norm": 0.7311967612706811, "learning_rate": 1.813547688689377e-05, "loss": 0.1142, "step": 20269 }, { "epoch": 2.4036523182734495, "grad_norm": 0.889555435554713, "learning_rate": 1.8133168817696815e-05, "loss": 0.1197, "step": 20270 }, { "epoch": 2.4037709000355747, "grad_norm": 0.7499216462521076, "learning_rate": 1.8130860811803624e-05, "loss": 0.1083, "step": 20271 }, { "epoch": 2.4038894817976995, "grad_norm": 0.4865696765007904, "learning_rate": 1.812855286923546e-05, "loss": 0.0691, "step": 20272 }, { "epoch": 2.4040080635598247, "grad_norm": 0.7936154334613709, "learning_rate": 1.8126244990013623e-05, "loss": 0.1218, "step": 20273 }, { "epoch": 2.4041266453219494, "grad_norm": 0.49068779666559714, "learning_rate": 1.812393717415938e-05, "loss": 0.0714, "step": 20274 }, { "epoch": 2.4042452270840746, "grad_norm": 0.5555146549581277, "learning_rate": 1.8121629421693996e-05, "loss": 0.0668, "step": 20275 }, { "epoch": 2.4043638088461994, "grad_norm": 0.46195251749620236, "learning_rate": 1.8119321732638754e-05, "loss": 0.0653, "step": 20276 }, { "epoch": 2.4044823906083246, "grad_norm": 0.5497777448237029, "learning_rate": 1.811701410701493e-05, "loss": 0.062, "step": 20277 }, { "epoch": 2.4046009723704493, "grad_norm": 0.9721381634825974, "learning_rate": 1.81147065448438e-05, "loss": 0.1451, "step": 20278 }, { "epoch": 2.4047195541325745, "grad_norm": 1.205398533873662, "learning_rate": 1.8112399046146623e-05, "loss": 0.156, "step": 20279 }, { "epoch": 2.4048381358946993, "grad_norm": 0.7470030294455692, "learning_rate": 1.811009161094469e-05, "loss": 0.0946, "step": 20280 }, { "epoch": 2.4049567176568245, "grad_norm": 0.6455839224126199, "learning_rate": 1.810778423925926e-05, "loss": 0.089, "step": 20281 }, { "epoch": 2.4050752994189493, "grad_norm": 0.5713770791967856, "learning_rate": 1.8105476931111612e-05, "loss": 0.0932, "step": 20282 }, { "epoch": 2.4051938811810745, "grad_norm": 0.7432208347462781, "learning_rate": 1.8103169686523e-05, "loss": 0.0812, "step": 20283 }, { "epoch": 2.4053124629431992, "grad_norm": 1.1168451610322907, "learning_rate": 1.8100862505514715e-05, "loss": 0.1337, "step": 20284 }, { "epoch": 2.4054310447053244, "grad_norm": 0.7296503454066819, "learning_rate": 1.8098555388108016e-05, "loss": 0.0824, "step": 20285 }, { "epoch": 2.405549626467449, "grad_norm": 1.0157457081905075, "learning_rate": 1.8096248334324175e-05, "loss": 0.1222, "step": 20286 }, { "epoch": 2.4056682082295744, "grad_norm": 0.5496706056616437, "learning_rate": 1.809394134418445e-05, "loss": 0.0773, "step": 20287 }, { "epoch": 2.405786789991699, "grad_norm": 0.9326010145920656, "learning_rate": 1.8091634417710128e-05, "loss": 0.1277, "step": 20288 }, { "epoch": 2.4059053717538244, "grad_norm": 0.5526212424774384, "learning_rate": 1.8089327554922465e-05, "loss": 0.0738, "step": 20289 }, { "epoch": 2.406023953515949, "grad_norm": 0.832048241824183, "learning_rate": 1.8087020755842715e-05, "loss": 0.1142, "step": 20290 }, { "epoch": 2.4061425352780743, "grad_norm": 0.7039024507934304, "learning_rate": 1.8084714020492165e-05, "loss": 0.0744, "step": 20291 }, { "epoch": 2.406261117040199, "grad_norm": 0.8461714939991171, "learning_rate": 1.808240734889208e-05, "loss": 0.1027, "step": 20292 }, { "epoch": 2.4063796988023243, "grad_norm": 0.664733199278252, "learning_rate": 1.8080100741063708e-05, "loss": 0.0984, "step": 20293 }, { "epoch": 2.406498280564449, "grad_norm": 0.5455124217545693, "learning_rate": 1.8077794197028315e-05, "loss": 0.0897, "step": 20294 }, { "epoch": 2.4066168623265742, "grad_norm": 0.6665608524809564, "learning_rate": 1.8075487716807176e-05, "loss": 0.0962, "step": 20295 }, { "epoch": 2.406735444088699, "grad_norm": 0.7377360885691243, "learning_rate": 1.807318130042155e-05, "loss": 0.0819, "step": 20296 }, { "epoch": 2.406854025850824, "grad_norm": 0.5665908120815473, "learning_rate": 1.8070874947892703e-05, "loss": 0.0731, "step": 20297 }, { "epoch": 2.406972607612949, "grad_norm": 0.44184085929687356, "learning_rate": 1.8068568659241878e-05, "loss": 0.0558, "step": 20298 }, { "epoch": 2.407091189375074, "grad_norm": 0.728125369270018, "learning_rate": 1.806626243449036e-05, "loss": 0.0915, "step": 20299 }, { "epoch": 2.407209771137199, "grad_norm": 0.5531013929079277, "learning_rate": 1.8063956273659406e-05, "loss": 0.0792, "step": 20300 }, { "epoch": 2.407328352899324, "grad_norm": 1.0796998474877508, "learning_rate": 1.806165017677026e-05, "loss": 0.1309, "step": 20301 }, { "epoch": 2.407446934661449, "grad_norm": 0.6900631384691506, "learning_rate": 1.805934414384419e-05, "loss": 0.0896, "step": 20302 }, { "epoch": 2.407565516423574, "grad_norm": 0.5823068681293946, "learning_rate": 1.8057038174902462e-05, "loss": 0.093, "step": 20303 }, { "epoch": 2.4076840981856993, "grad_norm": 0.7142500215869957, "learning_rate": 1.805473226996633e-05, "loss": 0.0836, "step": 20304 }, { "epoch": 2.407802679947824, "grad_norm": 0.6384902967100908, "learning_rate": 1.8052426429057036e-05, "loss": 0.1004, "step": 20305 }, { "epoch": 2.407921261709949, "grad_norm": 0.7859206484258509, "learning_rate": 1.8050120652195864e-05, "loss": 0.1007, "step": 20306 }, { "epoch": 2.408039843472074, "grad_norm": 0.6133774415717719, "learning_rate": 1.8047814939404047e-05, "loss": 0.0709, "step": 20307 }, { "epoch": 2.408158425234199, "grad_norm": 0.7922114813952226, "learning_rate": 1.8045509290702863e-05, "loss": 0.1136, "step": 20308 }, { "epoch": 2.408277006996324, "grad_norm": 0.6222953194908614, "learning_rate": 1.8043203706113538e-05, "loss": 0.0952, "step": 20309 }, { "epoch": 2.4083955887584487, "grad_norm": 0.7691824160959038, "learning_rate": 1.804089818565736e-05, "loss": 0.1142, "step": 20310 }, { "epoch": 2.408514170520574, "grad_norm": 0.7043058293773815, "learning_rate": 1.8038592729355563e-05, "loss": 0.083, "step": 20311 }, { "epoch": 2.408632752282699, "grad_norm": 0.9097585254286246, "learning_rate": 1.8036287337229407e-05, "loss": 0.1158, "step": 20312 }, { "epoch": 2.408751334044824, "grad_norm": 0.7029099799155933, "learning_rate": 1.803398200930013e-05, "loss": 0.0804, "step": 20313 }, { "epoch": 2.4088699158069486, "grad_norm": 0.641806891222451, "learning_rate": 1.803167674558901e-05, "loss": 0.0884, "step": 20314 }, { "epoch": 2.408988497569074, "grad_norm": 0.5998150379625741, "learning_rate": 1.8029371546117274e-05, "loss": 0.0913, "step": 20315 }, { "epoch": 2.409107079331199, "grad_norm": 0.47997620134672975, "learning_rate": 1.8027066410906195e-05, "loss": 0.0512, "step": 20316 }, { "epoch": 2.409225661093324, "grad_norm": 0.7131111061415353, "learning_rate": 1.8024761339977e-05, "loss": 0.0933, "step": 20317 }, { "epoch": 2.409344242855449, "grad_norm": 0.8215834974774233, "learning_rate": 1.8022456333350964e-05, "loss": 0.1027, "step": 20318 }, { "epoch": 2.4094628246175738, "grad_norm": 0.685318405419804, "learning_rate": 1.802015139104932e-05, "loss": 0.0828, "step": 20319 }, { "epoch": 2.409581406379699, "grad_norm": 0.691091199619396, "learning_rate": 1.8017846513093316e-05, "loss": 0.0781, "step": 20320 }, { "epoch": 2.4096999881418237, "grad_norm": 0.6891089505263236, "learning_rate": 1.8015541699504215e-05, "loss": 0.0764, "step": 20321 }, { "epoch": 2.409818569903949, "grad_norm": 0.79587171835386, "learning_rate": 1.8013236950303252e-05, "loss": 0.1178, "step": 20322 }, { "epoch": 2.4099371516660737, "grad_norm": 0.8249123459010869, "learning_rate": 1.8010932265511678e-05, "loss": 0.0771, "step": 20323 }, { "epoch": 2.410055733428199, "grad_norm": 0.5841417314090225, "learning_rate": 1.8008627645150728e-05, "loss": 0.0835, "step": 20324 }, { "epoch": 2.4101743151903237, "grad_norm": 0.8205867546376542, "learning_rate": 1.8006323089241667e-05, "loss": 0.1062, "step": 20325 }, { "epoch": 2.410292896952449, "grad_norm": 0.6887145093461642, "learning_rate": 1.800401859780573e-05, "loss": 0.0783, "step": 20326 }, { "epoch": 2.4104114787145736, "grad_norm": 1.1034546289688203, "learning_rate": 1.800171417086416e-05, "loss": 0.1503, "step": 20327 }, { "epoch": 2.410530060476699, "grad_norm": 0.5540826900876453, "learning_rate": 1.7999409808438193e-05, "loss": 0.0754, "step": 20328 }, { "epoch": 2.4106486422388236, "grad_norm": 0.8487158204589229, "learning_rate": 1.79971055105491e-05, "loss": 0.1134, "step": 20329 }, { "epoch": 2.410767224000949, "grad_norm": 1.4232341533663748, "learning_rate": 1.7994801277218104e-05, "loss": 0.1521, "step": 20330 }, { "epoch": 2.4108858057630735, "grad_norm": 0.6553119424182533, "learning_rate": 1.799249710846645e-05, "loss": 0.0847, "step": 20331 }, { "epoch": 2.4110043875251987, "grad_norm": 0.7234114381848568, "learning_rate": 1.799019300431537e-05, "loss": 0.0934, "step": 20332 }, { "epoch": 2.4111229692873235, "grad_norm": 0.5889199244837947, "learning_rate": 1.798788896478612e-05, "loss": 0.0697, "step": 20333 }, { "epoch": 2.4112415510494487, "grad_norm": 0.6658008510574517, "learning_rate": 1.798558498989994e-05, "loss": 0.0909, "step": 20334 }, { "epoch": 2.4113601328115735, "grad_norm": 0.5523731498855026, "learning_rate": 1.798328107967805e-05, "loss": 0.07, "step": 20335 }, { "epoch": 2.4114787145736987, "grad_norm": 0.842829892505244, "learning_rate": 1.7980977234141723e-05, "loss": 0.0907, "step": 20336 }, { "epoch": 2.4115972963358234, "grad_norm": 0.7290979108624341, "learning_rate": 1.7978673453312163e-05, "loss": 0.1044, "step": 20337 }, { "epoch": 2.4117158780979486, "grad_norm": 0.8632725002381179, "learning_rate": 1.7976369737210634e-05, "loss": 0.0785, "step": 20338 }, { "epoch": 2.4118344598600734, "grad_norm": 0.877089042745193, "learning_rate": 1.7974066085858354e-05, "loss": 0.129, "step": 20339 }, { "epoch": 2.4119530416221986, "grad_norm": 0.9413547980998684, "learning_rate": 1.7971762499276577e-05, "loss": 0.1345, "step": 20340 }, { "epoch": 2.4120716233843233, "grad_norm": 0.6403470033201398, "learning_rate": 1.796945897748653e-05, "loss": 0.1115, "step": 20341 }, { "epoch": 2.4121902051464486, "grad_norm": 0.68543951834747, "learning_rate": 1.7967155520509454e-05, "loss": 0.1042, "step": 20342 }, { "epoch": 2.4123087869085733, "grad_norm": 0.721709613048537, "learning_rate": 1.7964852128366566e-05, "loss": 0.1137, "step": 20343 }, { "epoch": 2.4124273686706985, "grad_norm": 0.7114750916648644, "learning_rate": 1.7962548801079127e-05, "loss": 0.0806, "step": 20344 }, { "epoch": 2.4125459504328233, "grad_norm": 0.7116440082954338, "learning_rate": 1.796024553866835e-05, "loss": 0.105, "step": 20345 }, { "epoch": 2.4126645321949485, "grad_norm": 0.6440824914983375, "learning_rate": 1.795794234115548e-05, "loss": 0.0795, "step": 20346 }, { "epoch": 2.4127831139570732, "grad_norm": 0.7659315024717536, "learning_rate": 1.7955639208561743e-05, "loss": 0.1256, "step": 20347 }, { "epoch": 2.4129016957191984, "grad_norm": 1.1032284105133852, "learning_rate": 1.7953336140908378e-05, "loss": 0.1564, "step": 20348 }, { "epoch": 2.413020277481323, "grad_norm": 0.9906175694673709, "learning_rate": 1.7951033138216615e-05, "loss": 0.1369, "step": 20349 }, { "epoch": 2.4131388592434484, "grad_norm": 0.4968447384434471, "learning_rate": 1.7948730200507673e-05, "loss": 0.0719, "step": 20350 }, { "epoch": 2.413257441005573, "grad_norm": 0.6794086311751871, "learning_rate": 1.79464273278028e-05, "loss": 0.103, "step": 20351 }, { "epoch": 2.4133760227676984, "grad_norm": 0.6852265704928223, "learning_rate": 1.794412452012322e-05, "loss": 0.0947, "step": 20352 }, { "epoch": 2.4134946045298236, "grad_norm": 0.7242891244087203, "learning_rate": 1.794182177749016e-05, "loss": 0.1058, "step": 20353 }, { "epoch": 2.4136131862919483, "grad_norm": 0.8661195022626802, "learning_rate": 1.7939519099924834e-05, "loss": 0.0897, "step": 20354 }, { "epoch": 2.413731768054073, "grad_norm": 0.8544060886565612, "learning_rate": 1.7937216487448493e-05, "loss": 0.1093, "step": 20355 }, { "epoch": 2.4138503498161983, "grad_norm": 0.763056819559556, "learning_rate": 1.7934913940082355e-05, "loss": 0.0823, "step": 20356 }, { "epoch": 2.4139689315783235, "grad_norm": 0.5577270620659378, "learning_rate": 1.793261145784765e-05, "loss": 0.0645, "step": 20357 }, { "epoch": 2.4140875133404482, "grad_norm": 0.8426332042798265, "learning_rate": 1.7930309040765586e-05, "loss": 0.0959, "step": 20358 }, { "epoch": 2.414206095102573, "grad_norm": 0.9039182241560071, "learning_rate": 1.792800668885742e-05, "loss": 0.1041, "step": 20359 }, { "epoch": 2.414324676864698, "grad_norm": 0.5275264617815396, "learning_rate": 1.7925704402144356e-05, "loss": 0.0767, "step": 20360 }, { "epoch": 2.4144432586268234, "grad_norm": 0.4930812196812944, "learning_rate": 1.7923402180647615e-05, "loss": 0.0857, "step": 20361 }, { "epoch": 2.414561840388948, "grad_norm": 0.6781076066669152, "learning_rate": 1.7921100024388434e-05, "loss": 0.0791, "step": 20362 }, { "epoch": 2.414680422151073, "grad_norm": 0.6943628233344732, "learning_rate": 1.7918797933388035e-05, "loss": 0.1052, "step": 20363 }, { "epoch": 2.414799003913198, "grad_norm": 0.7777562089045689, "learning_rate": 1.7916495907667628e-05, "loss": 0.104, "step": 20364 }, { "epoch": 2.4149175856753233, "grad_norm": 0.9008085768548023, "learning_rate": 1.7914193947248444e-05, "loss": 0.1238, "step": 20365 }, { "epoch": 2.415036167437448, "grad_norm": 0.6889801752137291, "learning_rate": 1.7911892052151695e-05, "loss": 0.108, "step": 20366 }, { "epoch": 2.4151547491995733, "grad_norm": 0.6335280096932957, "learning_rate": 1.7909590222398625e-05, "loss": 0.0872, "step": 20367 }, { "epoch": 2.415273330961698, "grad_norm": 0.6755504505486465, "learning_rate": 1.790728845801043e-05, "loss": 0.0895, "step": 20368 }, { "epoch": 2.4153919127238233, "grad_norm": 0.589022565588989, "learning_rate": 1.7904986759008335e-05, "loss": 0.0826, "step": 20369 }, { "epoch": 2.415510494485948, "grad_norm": 1.0294705222915697, "learning_rate": 1.7902685125413565e-05, "loss": 0.1218, "step": 20370 }, { "epoch": 2.415629076248073, "grad_norm": 0.6921235899973825, "learning_rate": 1.7900383557247342e-05, "loss": 0.0929, "step": 20371 }, { "epoch": 2.415747658010198, "grad_norm": 0.6715519429136312, "learning_rate": 1.789808205453087e-05, "loss": 0.0685, "step": 20372 }, { "epoch": 2.415866239772323, "grad_norm": 0.7600351953488707, "learning_rate": 1.7895780617285365e-05, "loss": 0.1299, "step": 20373 }, { "epoch": 2.415984821534448, "grad_norm": 0.7894240052347854, "learning_rate": 1.7893479245532063e-05, "loss": 0.1247, "step": 20374 }, { "epoch": 2.416103403296573, "grad_norm": 1.0226253125109874, "learning_rate": 1.7891177939292158e-05, "loss": 0.1343, "step": 20375 }, { "epoch": 2.416221985058698, "grad_norm": 0.710551923654119, "learning_rate": 1.7888876698586878e-05, "loss": 0.113, "step": 20376 }, { "epoch": 2.416340566820823, "grad_norm": 0.6688259011651109, "learning_rate": 1.7886575523437434e-05, "loss": 0.0836, "step": 20377 }, { "epoch": 2.416459148582948, "grad_norm": 0.42604752737069523, "learning_rate": 1.7884274413865045e-05, "loss": 0.056, "step": 20378 }, { "epoch": 2.416577730345073, "grad_norm": 0.5550142514747911, "learning_rate": 1.788197336989092e-05, "loss": 0.0887, "step": 20379 }, { "epoch": 2.416696312107198, "grad_norm": 0.4296732299265416, "learning_rate": 1.7879672391536265e-05, "loss": 0.0591, "step": 20380 }, { "epoch": 2.416814893869323, "grad_norm": 0.7010178675539561, "learning_rate": 1.7877371478822307e-05, "loss": 0.0969, "step": 20381 }, { "epoch": 2.416933475631448, "grad_norm": 0.7438365413252666, "learning_rate": 1.787507063177025e-05, "loss": 0.1063, "step": 20382 }, { "epoch": 2.417052057393573, "grad_norm": 0.7171978055889504, "learning_rate": 1.7872769850401304e-05, "loss": 0.1006, "step": 20383 }, { "epoch": 2.4171706391556977, "grad_norm": 0.6490827457707088, "learning_rate": 1.7870469134736667e-05, "loss": 0.0819, "step": 20384 }, { "epoch": 2.417289220917823, "grad_norm": 0.6544473572133686, "learning_rate": 1.7868168484797575e-05, "loss": 0.0819, "step": 20385 }, { "epoch": 2.4174078026799477, "grad_norm": 0.8495328525011101, "learning_rate": 1.786586790060522e-05, "loss": 0.1459, "step": 20386 }, { "epoch": 2.417526384442073, "grad_norm": 0.7150343998791518, "learning_rate": 1.7863567382180818e-05, "loss": 0.0945, "step": 20387 }, { "epoch": 2.4176449662041977, "grad_norm": 0.7351382920192172, "learning_rate": 1.7861266929545562e-05, "loss": 0.1095, "step": 20388 }, { "epoch": 2.417763547966323, "grad_norm": 0.7400104880740804, "learning_rate": 1.7858966542720684e-05, "loss": 0.078, "step": 20389 }, { "epoch": 2.4178821297284476, "grad_norm": 0.7961161399815287, "learning_rate": 1.7856666221727376e-05, "loss": 0.0862, "step": 20390 }, { "epoch": 2.418000711490573, "grad_norm": 0.6925610593302615, "learning_rate": 1.7854365966586834e-05, "loss": 0.0818, "step": 20391 }, { "epoch": 2.4181192932526976, "grad_norm": 0.650273287667905, "learning_rate": 1.7852065777320288e-05, "loss": 0.0999, "step": 20392 }, { "epoch": 2.418237875014823, "grad_norm": 0.8336432049026081, "learning_rate": 1.784976565394893e-05, "loss": 0.1372, "step": 20393 }, { "epoch": 2.4183564567769475, "grad_norm": 0.5410252484916681, "learning_rate": 1.7847465596493957e-05, "loss": 0.0783, "step": 20394 }, { "epoch": 2.4184750385390728, "grad_norm": 0.9507257596046369, "learning_rate": 1.7845165604976583e-05, "loss": 0.1223, "step": 20395 }, { "epoch": 2.4185936203011975, "grad_norm": 0.510126933971923, "learning_rate": 1.7842865679418008e-05, "loss": 0.0662, "step": 20396 }, { "epoch": 2.4187122020633227, "grad_norm": 0.8270115937971692, "learning_rate": 1.784056581983944e-05, "loss": 0.1383, "step": 20397 }, { "epoch": 2.4188307838254475, "grad_norm": 0.9823908801673128, "learning_rate": 1.7838266026262072e-05, "loss": 0.136, "step": 20398 }, { "epoch": 2.4189493655875727, "grad_norm": 0.6114626084487321, "learning_rate": 1.78359662987071e-05, "loss": 0.0786, "step": 20399 }, { "epoch": 2.4190679473496974, "grad_norm": 0.5774529712456424, "learning_rate": 1.7833666637195746e-05, "loss": 0.075, "step": 20400 }, { "epoch": 2.4191865291118226, "grad_norm": 0.701019160058869, "learning_rate": 1.7831367041749197e-05, "loss": 0.1063, "step": 20401 }, { "epoch": 2.4193051108739474, "grad_norm": 0.81391867151255, "learning_rate": 1.7829067512388653e-05, "loss": 0.104, "step": 20402 }, { "epoch": 2.4194236926360726, "grad_norm": 0.63205301895864, "learning_rate": 1.78267680491353e-05, "loss": 0.0773, "step": 20403 }, { "epoch": 2.4195422743981974, "grad_norm": 0.6703247130382289, "learning_rate": 1.7824468652010365e-05, "loss": 0.0861, "step": 20404 }, { "epoch": 2.4196608561603226, "grad_norm": 0.959164178360623, "learning_rate": 1.782216932103502e-05, "loss": 0.1269, "step": 20405 }, { "epoch": 2.4197794379224478, "grad_norm": 0.6592435798864569, "learning_rate": 1.7819870056230475e-05, "loss": 0.0994, "step": 20406 }, { "epoch": 2.4198980196845725, "grad_norm": 0.7560067435760086, "learning_rate": 1.781757085761792e-05, "loss": 0.0895, "step": 20407 }, { "epoch": 2.4200166014466973, "grad_norm": 0.536022058205427, "learning_rate": 1.7815271725218563e-05, "loss": 0.0671, "step": 20408 }, { "epoch": 2.4201351832088225, "grad_norm": 0.3719572601311168, "learning_rate": 1.7812972659053585e-05, "loss": 0.0566, "step": 20409 }, { "epoch": 2.4202537649709477, "grad_norm": 0.4961670031828941, "learning_rate": 1.7810673659144182e-05, "loss": 0.0639, "step": 20410 }, { "epoch": 2.4203723467330724, "grad_norm": 0.5275788035966217, "learning_rate": 1.7808374725511556e-05, "loss": 0.0691, "step": 20411 }, { "epoch": 2.420490928495197, "grad_norm": 0.9006486932335451, "learning_rate": 1.7806075858176903e-05, "loss": 0.1316, "step": 20412 }, { "epoch": 2.4206095102573224, "grad_norm": 0.7974682847932089, "learning_rate": 1.7803777057161396e-05, "loss": 0.1226, "step": 20413 }, { "epoch": 2.4207280920194476, "grad_norm": 0.5429000692507847, "learning_rate": 1.780147832248624e-05, "loss": 0.0699, "step": 20414 }, { "epoch": 2.4208466737815724, "grad_norm": 0.837050318534204, "learning_rate": 1.7799179654172632e-05, "loss": 0.1157, "step": 20415 }, { "epoch": 2.4209652555436976, "grad_norm": 0.869746107241255, "learning_rate": 1.779688105224176e-05, "loss": 0.1337, "step": 20416 }, { "epoch": 2.4210838373058223, "grad_norm": 0.9459285242669578, "learning_rate": 1.7794582516714812e-05, "loss": 0.1177, "step": 20417 }, { "epoch": 2.4212024190679475, "grad_norm": 1.2760387717024477, "learning_rate": 1.7792284047612967e-05, "loss": 0.1526, "step": 20418 }, { "epoch": 2.4213210008300723, "grad_norm": 0.6554226744419813, "learning_rate": 1.7789985644957435e-05, "loss": 0.081, "step": 20419 }, { "epoch": 2.4214395825921975, "grad_norm": 0.6248936978811822, "learning_rate": 1.7787687308769395e-05, "loss": 0.1108, "step": 20420 }, { "epoch": 2.4215581643543223, "grad_norm": 0.6061225149734762, "learning_rate": 1.7785389039070022e-05, "loss": 0.0865, "step": 20421 }, { "epoch": 2.4216767461164475, "grad_norm": 0.5961794824075508, "learning_rate": 1.7783090835880524e-05, "loss": 0.0655, "step": 20422 }, { "epoch": 2.421795327878572, "grad_norm": 0.5790243447593155, "learning_rate": 1.778079269922208e-05, "loss": 0.0857, "step": 20423 }, { "epoch": 2.4219139096406974, "grad_norm": 0.7490582063957336, "learning_rate": 1.777849462911587e-05, "loss": 0.1062, "step": 20424 }, { "epoch": 2.422032491402822, "grad_norm": 0.7254507227861589, "learning_rate": 1.777619662558308e-05, "loss": 0.1081, "step": 20425 }, { "epoch": 2.4221510731649474, "grad_norm": 0.716258434765107, "learning_rate": 1.7773898688644902e-05, "loss": 0.0937, "step": 20426 }, { "epoch": 2.422269654927072, "grad_norm": 1.0323222081850865, "learning_rate": 1.7771600818322523e-05, "loss": 0.1547, "step": 20427 }, { "epoch": 2.4223882366891973, "grad_norm": 1.001040233467527, "learning_rate": 1.7769303014637117e-05, "loss": 0.1148, "step": 20428 }, { "epoch": 2.422506818451322, "grad_norm": 0.7394296218509101, "learning_rate": 1.7767005277609863e-05, "loss": 0.0876, "step": 20429 }, { "epoch": 2.4226254002134473, "grad_norm": 0.7419737569404186, "learning_rate": 1.776470760726196e-05, "loss": 0.0987, "step": 20430 }, { "epoch": 2.422743981975572, "grad_norm": 0.8988042889422243, "learning_rate": 1.7762410003614583e-05, "loss": 0.1217, "step": 20431 }, { "epoch": 2.4228625637376973, "grad_norm": 0.6520278300568709, "learning_rate": 1.7760112466688905e-05, "loss": 0.0924, "step": 20432 }, { "epoch": 2.422981145499822, "grad_norm": 0.5610814896983861, "learning_rate": 1.7757814996506107e-05, "loss": 0.0676, "step": 20433 }, { "epoch": 2.4230997272619472, "grad_norm": 0.7734763896543018, "learning_rate": 1.775551759308738e-05, "loss": 0.122, "step": 20434 }, { "epoch": 2.423218309024072, "grad_norm": 0.6767679885070514, "learning_rate": 1.7753220256453895e-05, "loss": 0.0949, "step": 20435 }, { "epoch": 2.423336890786197, "grad_norm": 0.5738823060329283, "learning_rate": 1.7750922986626826e-05, "loss": 0.0798, "step": 20436 }, { "epoch": 2.423455472548322, "grad_norm": 0.7380471497712895, "learning_rate": 1.7748625783627364e-05, "loss": 0.0846, "step": 20437 }, { "epoch": 2.423574054310447, "grad_norm": 0.46494998978635366, "learning_rate": 1.7746328647476684e-05, "loss": 0.0575, "step": 20438 }, { "epoch": 2.423692636072572, "grad_norm": 1.014720570954471, "learning_rate": 1.7744031578195962e-05, "loss": 0.1361, "step": 20439 }, { "epoch": 2.423811217834697, "grad_norm": 0.5878083072812218, "learning_rate": 1.7741734575806356e-05, "loss": 0.0823, "step": 20440 }, { "epoch": 2.423929799596822, "grad_norm": 0.7053203856125371, "learning_rate": 1.7739437640329067e-05, "loss": 0.0882, "step": 20441 }, { "epoch": 2.424048381358947, "grad_norm": 0.5040214809223516, "learning_rate": 1.773714077178526e-05, "loss": 0.0653, "step": 20442 }, { "epoch": 2.424166963121072, "grad_norm": 0.5864368943995412, "learning_rate": 1.7734843970196108e-05, "loss": 0.0846, "step": 20443 }, { "epoch": 2.424285544883197, "grad_norm": 0.5665577715240618, "learning_rate": 1.7732547235582782e-05, "loss": 0.0675, "step": 20444 }, { "epoch": 2.424404126645322, "grad_norm": 0.9231916136463244, "learning_rate": 1.7730250567966456e-05, "loss": 0.1022, "step": 20445 }, { "epoch": 2.424522708407447, "grad_norm": 0.6333689132726998, "learning_rate": 1.7727953967368314e-05, "loss": 0.0825, "step": 20446 }, { "epoch": 2.4246412901695718, "grad_norm": 0.8066715878000948, "learning_rate": 1.7725657433809507e-05, "loss": 0.1134, "step": 20447 }, { "epoch": 2.424759871931697, "grad_norm": 0.9953450639603674, "learning_rate": 1.7723360967311227e-05, "loss": 0.15, "step": 20448 }, { "epoch": 2.4248784536938217, "grad_norm": 0.6018688472092898, "learning_rate": 1.772106456789464e-05, "loss": 0.0949, "step": 20449 }, { "epoch": 2.424997035455947, "grad_norm": 0.5345112968172905, "learning_rate": 1.7718768235580906e-05, "loss": 0.0672, "step": 20450 }, { "epoch": 2.4251156172180717, "grad_norm": 0.8309287743637392, "learning_rate": 1.771647197039119e-05, "loss": 0.1042, "step": 20451 }, { "epoch": 2.425234198980197, "grad_norm": 0.8281976748814304, "learning_rate": 1.7714175772346687e-05, "loss": 0.1002, "step": 20452 }, { "epoch": 2.4253527807423216, "grad_norm": 0.8822818981024819, "learning_rate": 1.7711879641468546e-05, "loss": 0.1465, "step": 20453 }, { "epoch": 2.425471362504447, "grad_norm": 0.5486536342817294, "learning_rate": 1.7709583577777932e-05, "loss": 0.0728, "step": 20454 }, { "epoch": 2.425589944266572, "grad_norm": 0.6644891326080791, "learning_rate": 1.7707287581296018e-05, "loss": 0.0889, "step": 20455 }, { "epoch": 2.425708526028697, "grad_norm": 0.8015843491333265, "learning_rate": 1.7704991652043967e-05, "loss": 0.0921, "step": 20456 }, { "epoch": 2.4258271077908216, "grad_norm": 1.0718291054538691, "learning_rate": 1.7702695790042957e-05, "loss": 0.1599, "step": 20457 }, { "epoch": 2.4259456895529468, "grad_norm": 0.5103521097160069, "learning_rate": 1.7700399995314137e-05, "loss": 0.071, "step": 20458 }, { "epoch": 2.426064271315072, "grad_norm": 0.6056492201556413, "learning_rate": 1.769810426787867e-05, "loss": 0.0839, "step": 20459 }, { "epoch": 2.4261828530771967, "grad_norm": 0.6689255210713124, "learning_rate": 1.769580860775774e-05, "loss": 0.0749, "step": 20460 }, { "epoch": 2.4263014348393215, "grad_norm": 0.7484535945669374, "learning_rate": 1.7693513014972495e-05, "loss": 0.116, "step": 20461 }, { "epoch": 2.4264200166014467, "grad_norm": 0.802917210262137, "learning_rate": 1.7691217489544092e-05, "loss": 0.1292, "step": 20462 }, { "epoch": 2.426538598363572, "grad_norm": 0.6574222088049687, "learning_rate": 1.768892203149371e-05, "loss": 0.0821, "step": 20463 }, { "epoch": 2.4266571801256966, "grad_norm": 0.42480374180660097, "learning_rate": 1.76866266408425e-05, "loss": 0.0601, "step": 20464 }, { "epoch": 2.426775761887822, "grad_norm": 0.8407009588273316, "learning_rate": 1.7684331317611623e-05, "loss": 0.0972, "step": 20465 }, { "epoch": 2.4268943436499466, "grad_norm": 0.4296580665971884, "learning_rate": 1.7682036061822237e-05, "loss": 0.0708, "step": 20466 }, { "epoch": 2.427012925412072, "grad_norm": 0.6027649520813165, "learning_rate": 1.7679740873495506e-05, "loss": 0.0793, "step": 20467 }, { "epoch": 2.4271315071741966, "grad_norm": 0.7801262814863572, "learning_rate": 1.7677445752652593e-05, "loss": 0.0975, "step": 20468 }, { "epoch": 2.4272500889363218, "grad_norm": 0.8135428835759627, "learning_rate": 1.767515069931465e-05, "loss": 0.1385, "step": 20469 }, { "epoch": 2.4273686706984465, "grad_norm": 0.7244333243267554, "learning_rate": 1.7672855713502823e-05, "loss": 0.0979, "step": 20470 }, { "epoch": 2.4274872524605717, "grad_norm": 0.8135420065650913, "learning_rate": 1.7670560795238296e-05, "loss": 0.0737, "step": 20471 }, { "epoch": 2.4276058342226965, "grad_norm": 0.5129902178920598, "learning_rate": 1.7668265944542207e-05, "loss": 0.0624, "step": 20472 }, { "epoch": 2.4277244159848217, "grad_norm": 0.8828272208090617, "learning_rate": 1.7665971161435712e-05, "loss": 0.1125, "step": 20473 }, { "epoch": 2.4278429977469465, "grad_norm": 0.5847147172305629, "learning_rate": 1.7663676445939966e-05, "loss": 0.0834, "step": 20474 }, { "epoch": 2.4279615795090717, "grad_norm": 0.6890617088399857, "learning_rate": 1.7661381798076128e-05, "loss": 0.0738, "step": 20475 }, { "epoch": 2.4280801612711964, "grad_norm": 0.5123387749273092, "learning_rate": 1.7659087217865357e-05, "loss": 0.0614, "step": 20476 }, { "epoch": 2.4281987430333216, "grad_norm": 0.6846570578830027, "learning_rate": 1.765679270532879e-05, "loss": 0.0995, "step": 20477 }, { "epoch": 2.4283173247954464, "grad_norm": 0.5804695092191787, "learning_rate": 1.76544982604876e-05, "loss": 0.0859, "step": 20478 }, { "epoch": 2.4284359065575716, "grad_norm": 0.4520663363328892, "learning_rate": 1.7652203883362927e-05, "loss": 0.0708, "step": 20479 }, { "epoch": 2.4285544883196963, "grad_norm": 0.9911754141011777, "learning_rate": 1.7649909573975925e-05, "loss": 0.1221, "step": 20480 }, { "epoch": 2.4286730700818215, "grad_norm": 0.5980720432339427, "learning_rate": 1.7647615332347734e-05, "loss": 0.091, "step": 20481 }, { "epoch": 2.4287916518439463, "grad_norm": 0.61412097770083, "learning_rate": 1.764532115849952e-05, "loss": 0.0723, "step": 20482 }, { "epoch": 2.4289102336060715, "grad_norm": 0.9661384126435733, "learning_rate": 1.7643027052452428e-05, "loss": 0.127, "step": 20483 }, { "epoch": 2.4290288153681963, "grad_norm": 0.909030466057467, "learning_rate": 1.76407330142276e-05, "loss": 0.1314, "step": 20484 }, { "epoch": 2.4291473971303215, "grad_norm": 0.7729020310182478, "learning_rate": 1.763843904384619e-05, "loss": 0.1083, "step": 20485 }, { "epoch": 2.429265978892446, "grad_norm": 0.5057066437562823, "learning_rate": 1.763614514132934e-05, "loss": 0.0889, "step": 20486 }, { "epoch": 2.4293845606545714, "grad_norm": 0.8305859150582953, "learning_rate": 1.7633851306698213e-05, "loss": 0.0848, "step": 20487 }, { "epoch": 2.429503142416696, "grad_norm": 0.6251824861875199, "learning_rate": 1.763155753997394e-05, "loss": 0.0789, "step": 20488 }, { "epoch": 2.4296217241788214, "grad_norm": 0.7973353018506113, "learning_rate": 1.7629263841177662e-05, "loss": 0.0783, "step": 20489 }, { "epoch": 2.429740305940946, "grad_norm": 0.8737040074358955, "learning_rate": 1.7626970210330546e-05, "loss": 0.1137, "step": 20490 }, { "epoch": 2.4298588877030713, "grad_norm": 0.848734323375219, "learning_rate": 1.7624676647453718e-05, "loss": 0.113, "step": 20491 }, { "epoch": 2.429977469465196, "grad_norm": 0.8180492089311855, "learning_rate": 1.762238315256832e-05, "loss": 0.1131, "step": 20492 }, { "epoch": 2.4300960512273213, "grad_norm": 0.6015019708668995, "learning_rate": 1.762008972569551e-05, "loss": 0.0621, "step": 20493 }, { "epoch": 2.430214632989446, "grad_norm": 0.9486168885582937, "learning_rate": 1.7617796366856422e-05, "loss": 0.1049, "step": 20494 }, { "epoch": 2.4303332147515713, "grad_norm": 0.6805806427266702, "learning_rate": 1.7615503076072198e-05, "loss": 0.1174, "step": 20495 }, { "epoch": 2.430451796513696, "grad_norm": 0.6817426364293354, "learning_rate": 1.7613209853363974e-05, "loss": 0.0755, "step": 20496 }, { "epoch": 2.4305703782758212, "grad_norm": 0.576965789304253, "learning_rate": 1.7610916698752906e-05, "loss": 0.098, "step": 20497 }, { "epoch": 2.430688960037946, "grad_norm": 1.0476580197457392, "learning_rate": 1.7608623612260126e-05, "loss": 0.1469, "step": 20498 }, { "epoch": 2.430807541800071, "grad_norm": 0.750791201144994, "learning_rate": 1.7606330593906773e-05, "loss": 0.0788, "step": 20499 }, { "epoch": 2.430926123562196, "grad_norm": 0.7888014268210428, "learning_rate": 1.760403764371397e-05, "loss": 0.0959, "step": 20500 }, { "epoch": 2.431044705324321, "grad_norm": 0.5323138821912876, "learning_rate": 1.7601744761702887e-05, "loss": 0.0921, "step": 20501 }, { "epoch": 2.431163287086446, "grad_norm": 0.6290232211007444, "learning_rate": 1.759945194789464e-05, "loss": 0.0774, "step": 20502 }, { "epoch": 2.431281868848571, "grad_norm": 0.5787941775963371, "learning_rate": 1.759715920231037e-05, "loss": 0.0794, "step": 20503 }, { "epoch": 2.4314004506106963, "grad_norm": 0.7025650212228934, "learning_rate": 1.7594866524971214e-05, "loss": 0.0894, "step": 20504 }, { "epoch": 2.431519032372821, "grad_norm": 0.6856215901444034, "learning_rate": 1.7592573915898302e-05, "loss": 0.0939, "step": 20505 }, { "epoch": 2.431637614134946, "grad_norm": 0.741660921528089, "learning_rate": 1.7590281375112787e-05, "loss": 0.1008, "step": 20506 }, { "epoch": 2.431756195897071, "grad_norm": 0.4849562069104137, "learning_rate": 1.7587988902635777e-05, "loss": 0.0723, "step": 20507 }, { "epoch": 2.4318747776591962, "grad_norm": 0.4608767050801377, "learning_rate": 1.758569649848843e-05, "loss": 0.0609, "step": 20508 }, { "epoch": 2.431993359421321, "grad_norm": 0.7666960686737548, "learning_rate": 1.7583404162691874e-05, "loss": 0.1115, "step": 20509 }, { "epoch": 2.4321119411834458, "grad_norm": 0.6653772629164727, "learning_rate": 1.7581111895267232e-05, "loss": 0.0944, "step": 20510 }, { "epoch": 2.432230522945571, "grad_norm": 0.5733993081499409, "learning_rate": 1.757881969623563e-05, "loss": 0.086, "step": 20511 }, { "epoch": 2.432349104707696, "grad_norm": 0.6918144156262968, "learning_rate": 1.757652756561822e-05, "loss": 0.1013, "step": 20512 }, { "epoch": 2.432467686469821, "grad_norm": 0.7400317373177974, "learning_rate": 1.757423550343613e-05, "loss": 0.0991, "step": 20513 }, { "epoch": 2.4325862682319457, "grad_norm": 0.7823815261385417, "learning_rate": 1.757194350971047e-05, "loss": 0.1197, "step": 20514 }, { "epoch": 2.432704849994071, "grad_norm": 0.5187802642625817, "learning_rate": 1.7569651584462385e-05, "loss": 0.0696, "step": 20515 }, { "epoch": 2.432823431756196, "grad_norm": 0.656669837566061, "learning_rate": 1.7567359727713e-05, "loss": 0.1069, "step": 20516 }, { "epoch": 2.432942013518321, "grad_norm": 0.688817851343126, "learning_rate": 1.7565067939483448e-05, "loss": 0.0875, "step": 20517 }, { "epoch": 2.433060595280446, "grad_norm": 0.8741790709733156, "learning_rate": 1.7562776219794853e-05, "loss": 0.1056, "step": 20518 }, { "epoch": 2.433179177042571, "grad_norm": 0.7489178695141648, "learning_rate": 1.7560484568668334e-05, "loss": 0.1067, "step": 20519 }, { "epoch": 2.433297758804696, "grad_norm": 0.5786204640239, "learning_rate": 1.755819298612503e-05, "loss": 0.0617, "step": 20520 }, { "epoch": 2.4334163405668208, "grad_norm": 0.49346179493446485, "learning_rate": 1.7555901472186064e-05, "loss": 0.0649, "step": 20521 }, { "epoch": 2.433534922328946, "grad_norm": 0.7167050619812333, "learning_rate": 1.7553610026872545e-05, "loss": 0.0904, "step": 20522 }, { "epoch": 2.4336535040910707, "grad_norm": 1.0807429972725695, "learning_rate": 1.7551318650205623e-05, "loss": 0.1276, "step": 20523 }, { "epoch": 2.433772085853196, "grad_norm": 1.0062568187412955, "learning_rate": 1.7549027342206398e-05, "loss": 0.1017, "step": 20524 }, { "epoch": 2.4338906676153207, "grad_norm": 0.822369275188511, "learning_rate": 1.7546736102896015e-05, "loss": 0.0751, "step": 20525 }, { "epoch": 2.434009249377446, "grad_norm": 0.7792357420657303, "learning_rate": 1.7544444932295573e-05, "loss": 0.0866, "step": 20526 }, { "epoch": 2.4341278311395707, "grad_norm": 0.6984320947151539, "learning_rate": 1.7542153830426217e-05, "loss": 0.0876, "step": 20527 }, { "epoch": 2.434246412901696, "grad_norm": 0.8503929164411931, "learning_rate": 1.7539862797309058e-05, "loss": 0.1072, "step": 20528 }, { "epoch": 2.4343649946638206, "grad_norm": 0.6294120927796336, "learning_rate": 1.7537571832965218e-05, "loss": 0.0661, "step": 20529 }, { "epoch": 2.434483576425946, "grad_norm": 0.9407869390453607, "learning_rate": 1.7535280937415798e-05, "loss": 0.1302, "step": 20530 }, { "epoch": 2.4346021581880706, "grad_norm": 0.5431809596140349, "learning_rate": 1.7532990110681947e-05, "loss": 0.074, "step": 20531 }, { "epoch": 2.434720739950196, "grad_norm": 0.8229096006936102, "learning_rate": 1.7530699352784773e-05, "loss": 0.1034, "step": 20532 }, { "epoch": 2.4348393217123205, "grad_norm": 1.1503698958257544, "learning_rate": 1.7528408663745377e-05, "loss": 0.1546, "step": 20533 }, { "epoch": 2.4349579034744457, "grad_norm": 0.8134469088032726, "learning_rate": 1.7526118043584904e-05, "loss": 0.1209, "step": 20534 }, { "epoch": 2.4350764852365705, "grad_norm": 0.9644072780464764, "learning_rate": 1.752382749232445e-05, "loss": 0.1571, "step": 20535 }, { "epoch": 2.4351950669986957, "grad_norm": 0.9621637097537685, "learning_rate": 1.7521537009985146e-05, "loss": 0.116, "step": 20536 }, { "epoch": 2.4353136487608205, "grad_norm": 0.4411337036976172, "learning_rate": 1.751924659658809e-05, "loss": 0.0675, "step": 20537 }, { "epoch": 2.4354322305229457, "grad_norm": 0.60748575790993, "learning_rate": 1.7516956252154415e-05, "loss": 0.0871, "step": 20538 }, { "epoch": 2.4355508122850704, "grad_norm": 0.3796155094161408, "learning_rate": 1.751466597670523e-05, "loss": 0.0529, "step": 20539 }, { "epoch": 2.4356693940471956, "grad_norm": 0.8656278199888006, "learning_rate": 1.7512375770261645e-05, "loss": 0.1186, "step": 20540 }, { "epoch": 2.4357879758093204, "grad_norm": 0.6476835203426963, "learning_rate": 1.7510085632844762e-05, "loss": 0.0884, "step": 20541 }, { "epoch": 2.4359065575714456, "grad_norm": 0.6457363257656736, "learning_rate": 1.7507795564475714e-05, "loss": 0.0696, "step": 20542 }, { "epoch": 2.4360251393335703, "grad_norm": 0.7323743649166528, "learning_rate": 1.75055055651756e-05, "loss": 0.1081, "step": 20543 }, { "epoch": 2.4361437210956955, "grad_norm": 0.733474936875559, "learning_rate": 1.750321563496554e-05, "loss": 0.0808, "step": 20544 }, { "epoch": 2.4362623028578203, "grad_norm": 0.5113663111178807, "learning_rate": 1.7500925773866626e-05, "loss": 0.0652, "step": 20545 }, { "epoch": 2.4363808846199455, "grad_norm": 0.6876968259736314, "learning_rate": 1.749863598189999e-05, "loss": 0.0966, "step": 20546 }, { "epoch": 2.4364994663820703, "grad_norm": 0.6425109730468664, "learning_rate": 1.7496346259086734e-05, "loss": 0.0814, "step": 20547 }, { "epoch": 2.4366180481441955, "grad_norm": 0.692502053040945, "learning_rate": 1.7494056605447955e-05, "loss": 0.1036, "step": 20548 }, { "epoch": 2.4367366299063202, "grad_norm": 0.7954179455385406, "learning_rate": 1.7491767021004778e-05, "loss": 0.1196, "step": 20549 }, { "epoch": 2.4368552116684454, "grad_norm": 0.7454411856236889, "learning_rate": 1.74894775057783e-05, "loss": 0.0917, "step": 20550 }, { "epoch": 2.43697379343057, "grad_norm": 0.574152852879181, "learning_rate": 1.7487188059789635e-05, "loss": 0.0765, "step": 20551 }, { "epoch": 2.4370923751926954, "grad_norm": 0.6320028906352021, "learning_rate": 1.7484898683059868e-05, "loss": 0.0946, "step": 20552 }, { "epoch": 2.4372109569548206, "grad_norm": 0.8191241413469318, "learning_rate": 1.7482609375610132e-05, "loss": 0.1379, "step": 20553 }, { "epoch": 2.4373295387169454, "grad_norm": 0.9545684611458045, "learning_rate": 1.7480320137461508e-05, "loss": 0.1284, "step": 20554 }, { "epoch": 2.43744812047907, "grad_norm": 0.7373664593727889, "learning_rate": 1.747803096863512e-05, "loss": 0.0897, "step": 20555 }, { "epoch": 2.4375667022411953, "grad_norm": 0.6350567357986692, "learning_rate": 1.7475741869152056e-05, "loss": 0.0776, "step": 20556 }, { "epoch": 2.4376852840033205, "grad_norm": 0.694073078652184, "learning_rate": 1.7473452839033433e-05, "loss": 0.0864, "step": 20557 }, { "epoch": 2.4378038657654453, "grad_norm": 0.6783527151499834, "learning_rate": 1.7471163878300344e-05, "loss": 0.0917, "step": 20558 }, { "epoch": 2.43792244752757, "grad_norm": 0.724953337734948, "learning_rate": 1.7468874986973893e-05, "loss": 0.0878, "step": 20559 }, { "epoch": 2.4380410292896952, "grad_norm": 0.9561467363894501, "learning_rate": 1.7466586165075173e-05, "loss": 0.1182, "step": 20560 }, { "epoch": 2.4381596110518204, "grad_norm": 0.8194012346468562, "learning_rate": 1.7464297412625293e-05, "loss": 0.1325, "step": 20561 }, { "epoch": 2.438278192813945, "grad_norm": 0.5521671270851761, "learning_rate": 1.7462008729645353e-05, "loss": 0.0687, "step": 20562 }, { "epoch": 2.43839677457607, "grad_norm": 0.965076548062945, "learning_rate": 1.7459720116156443e-05, "loss": 0.1453, "step": 20563 }, { "epoch": 2.438515356338195, "grad_norm": 0.6882994900578039, "learning_rate": 1.745743157217967e-05, "loss": 0.1061, "step": 20564 }, { "epoch": 2.4386339381003204, "grad_norm": 0.6510997544278733, "learning_rate": 1.7455143097736126e-05, "loss": 0.0907, "step": 20565 }, { "epoch": 2.438752519862445, "grad_norm": 0.9685684366849682, "learning_rate": 1.7452854692846918e-05, "loss": 0.1175, "step": 20566 }, { "epoch": 2.4388711016245703, "grad_norm": 0.7505913100916941, "learning_rate": 1.745056635753312e-05, "loss": 0.1029, "step": 20567 }, { "epoch": 2.438989683386695, "grad_norm": 0.7138473802661519, "learning_rate": 1.7448278091815858e-05, "loss": 0.1007, "step": 20568 }, { "epoch": 2.4391082651488203, "grad_norm": 0.8301486893118868, "learning_rate": 1.744598989571621e-05, "loss": 0.141, "step": 20569 }, { "epoch": 2.439226846910945, "grad_norm": 0.9714382462273952, "learning_rate": 1.744370176925527e-05, "loss": 0.1164, "step": 20570 }, { "epoch": 2.4393454286730702, "grad_norm": 0.9530837484299615, "learning_rate": 1.7441413712454124e-05, "loss": 0.0903, "step": 20571 }, { "epoch": 2.439464010435195, "grad_norm": 0.4806813767034375, "learning_rate": 1.7439125725333885e-05, "loss": 0.0778, "step": 20572 }, { "epoch": 2.43958259219732, "grad_norm": 0.4636226355894945, "learning_rate": 1.7436837807915627e-05, "loss": 0.0748, "step": 20573 }, { "epoch": 2.439701173959445, "grad_norm": 0.7838810637228693, "learning_rate": 1.743454996022046e-05, "loss": 0.1138, "step": 20574 }, { "epoch": 2.43981975572157, "grad_norm": 0.5349520643310081, "learning_rate": 1.7432262182269454e-05, "loss": 0.0806, "step": 20575 }, { "epoch": 2.439938337483695, "grad_norm": 0.6204787594560555, "learning_rate": 1.7429974474083717e-05, "loss": 0.0944, "step": 20576 }, { "epoch": 2.44005691924582, "grad_norm": 0.4864649398804172, "learning_rate": 1.7427686835684336e-05, "loss": 0.0744, "step": 20577 }, { "epoch": 2.440175501007945, "grad_norm": 0.8285263800624789, "learning_rate": 1.7425399267092383e-05, "loss": 0.1042, "step": 20578 }, { "epoch": 2.44029408277007, "grad_norm": 0.45467112433212414, "learning_rate": 1.7423111768328976e-05, "loss": 0.0535, "step": 20579 }, { "epoch": 2.440412664532195, "grad_norm": 0.5045304192691862, "learning_rate": 1.7420824339415182e-05, "loss": 0.063, "step": 20580 }, { "epoch": 2.44053124629432, "grad_norm": 0.6872786500252062, "learning_rate": 1.7418536980372096e-05, "loss": 0.0994, "step": 20581 }, { "epoch": 2.440649828056445, "grad_norm": 0.756411679807771, "learning_rate": 1.7416249691220795e-05, "loss": 0.0913, "step": 20582 }, { "epoch": 2.44076840981857, "grad_norm": 1.0215751956453878, "learning_rate": 1.7413962471982377e-05, "loss": 0.1168, "step": 20583 }, { "epoch": 2.4408869915806948, "grad_norm": 1.09017099306803, "learning_rate": 1.7411675322677916e-05, "loss": 0.1204, "step": 20584 }, { "epoch": 2.44100557334282, "grad_norm": 0.9983386427865891, "learning_rate": 1.740938824332851e-05, "loss": 0.1346, "step": 20585 }, { "epoch": 2.4411241551049447, "grad_norm": 0.7741768644450451, "learning_rate": 1.7407101233955224e-05, "loss": 0.0913, "step": 20586 }, { "epoch": 2.44124273686707, "grad_norm": 0.8552167980676849, "learning_rate": 1.7404814294579168e-05, "loss": 0.1068, "step": 20587 }, { "epoch": 2.4413613186291947, "grad_norm": 0.7288657026876763, "learning_rate": 1.740252742522141e-05, "loss": 0.0799, "step": 20588 }, { "epoch": 2.44147990039132, "grad_norm": 0.5147913576744787, "learning_rate": 1.740024062590303e-05, "loss": 0.0765, "step": 20589 }, { "epoch": 2.4415984821534447, "grad_norm": 0.8236188261629301, "learning_rate": 1.7397953896645103e-05, "loss": 0.1191, "step": 20590 }, { "epoch": 2.44171706391557, "grad_norm": 0.7285016085895992, "learning_rate": 1.7395667237468733e-05, "loss": 0.1045, "step": 20591 }, { "epoch": 2.4418356456776946, "grad_norm": 0.8460424537233397, "learning_rate": 1.739338064839498e-05, "loss": 0.1027, "step": 20592 }, { "epoch": 2.44195422743982, "grad_norm": 0.6394736315218411, "learning_rate": 1.739109412944492e-05, "loss": 0.0882, "step": 20593 }, { "epoch": 2.4420728092019446, "grad_norm": 0.6872383614108158, "learning_rate": 1.7388807680639655e-05, "loss": 0.0952, "step": 20594 }, { "epoch": 2.44219139096407, "grad_norm": 0.7518910190054401, "learning_rate": 1.738652130200024e-05, "loss": 0.1235, "step": 20595 }, { "epoch": 2.4423099727261945, "grad_norm": 1.1416730695277892, "learning_rate": 1.738423499354777e-05, "loss": 0.1209, "step": 20596 }, { "epoch": 2.4424285544883197, "grad_norm": 0.6893737552999919, "learning_rate": 1.7381948755303306e-05, "loss": 0.0786, "step": 20597 }, { "epoch": 2.4425471362504445, "grad_norm": 0.7130118751058202, "learning_rate": 1.737966258728794e-05, "loss": 0.1089, "step": 20598 }, { "epoch": 2.4426657180125697, "grad_norm": 0.6488331081266713, "learning_rate": 1.737737648952274e-05, "loss": 0.0988, "step": 20599 }, { "epoch": 2.4427842997746945, "grad_norm": 0.8242305527172274, "learning_rate": 1.7375090462028787e-05, "loss": 0.1227, "step": 20600 }, { "epoch": 2.4429028815368197, "grad_norm": 0.6852341620811824, "learning_rate": 1.7372804504827132e-05, "loss": 0.1078, "step": 20601 }, { "epoch": 2.4430214632989444, "grad_norm": 0.4990574898999918, "learning_rate": 1.737051861793888e-05, "loss": 0.0582, "step": 20602 }, { "epoch": 2.4431400450610696, "grad_norm": 0.650274925896552, "learning_rate": 1.7368232801385086e-05, "loss": 0.1148, "step": 20603 }, { "epoch": 2.4432586268231944, "grad_norm": 0.6379775134057803, "learning_rate": 1.7365947055186827e-05, "loss": 0.0792, "step": 20604 }, { "epoch": 2.4433772085853196, "grad_norm": 1.0406083264737265, "learning_rate": 1.7363661379365174e-05, "loss": 0.1179, "step": 20605 }, { "epoch": 2.443495790347445, "grad_norm": 0.48944762221820415, "learning_rate": 1.73613757739412e-05, "loss": 0.0628, "step": 20606 }, { "epoch": 2.4436143721095696, "grad_norm": 0.6507145661079988, "learning_rate": 1.735909023893598e-05, "loss": 0.086, "step": 20607 }, { "epoch": 2.4437329538716943, "grad_norm": 0.7364688983701472, "learning_rate": 1.7356804774370565e-05, "loss": 0.0924, "step": 20608 }, { "epoch": 2.4438515356338195, "grad_norm": 0.805729154334331, "learning_rate": 1.735451938026605e-05, "loss": 0.1342, "step": 20609 }, { "epoch": 2.4439701173959447, "grad_norm": 0.928480013438251, "learning_rate": 1.7352234056643488e-05, "loss": 0.1338, "step": 20610 }, { "epoch": 2.4440886991580695, "grad_norm": 0.9266314716924047, "learning_rate": 1.734994880352395e-05, "loss": 0.1294, "step": 20611 }, { "epoch": 2.4442072809201942, "grad_norm": 0.5453040668964139, "learning_rate": 1.7347663620928495e-05, "loss": 0.0722, "step": 20612 }, { "epoch": 2.4443258626823194, "grad_norm": 0.8332706076822329, "learning_rate": 1.7345378508878206e-05, "loss": 0.1402, "step": 20613 }, { "epoch": 2.4444444444444446, "grad_norm": 0.8407623236069776, "learning_rate": 1.7343093467394133e-05, "loss": 0.1106, "step": 20614 }, { "epoch": 2.4445630262065694, "grad_norm": 0.6323529820958378, "learning_rate": 1.7340808496497358e-05, "loss": 0.09, "step": 20615 }, { "epoch": 2.4446816079686946, "grad_norm": 0.6042538023486035, "learning_rate": 1.733852359620892e-05, "loss": 0.0795, "step": 20616 }, { "epoch": 2.4448001897308194, "grad_norm": 0.7231274228070389, "learning_rate": 1.733623876654992e-05, "loss": 0.0901, "step": 20617 }, { "epoch": 2.4449187714929446, "grad_norm": 0.5799592583649538, "learning_rate": 1.7333954007541393e-05, "loss": 0.0709, "step": 20618 }, { "epoch": 2.4450373532550693, "grad_norm": 0.7638795715868576, "learning_rate": 1.73316693192044e-05, "loss": 0.1257, "step": 20619 }, { "epoch": 2.4451559350171945, "grad_norm": 0.5556023119603624, "learning_rate": 1.7329384701560023e-05, "loss": 0.0796, "step": 20620 }, { "epoch": 2.4452745167793193, "grad_norm": 0.7641301363635972, "learning_rate": 1.7327100154629312e-05, "loss": 0.1167, "step": 20621 }, { "epoch": 2.4453930985414445, "grad_norm": 1.0633401766103183, "learning_rate": 1.7324815678433324e-05, "loss": 0.1134, "step": 20622 }, { "epoch": 2.4455116803035692, "grad_norm": 0.8603164762713452, "learning_rate": 1.7322531272993125e-05, "loss": 0.1225, "step": 20623 }, { "epoch": 2.4456302620656944, "grad_norm": 0.8806634849411635, "learning_rate": 1.732024693832977e-05, "loss": 0.0989, "step": 20624 }, { "epoch": 2.445748843827819, "grad_norm": 0.6601042775963958, "learning_rate": 1.7317962674464328e-05, "loss": 0.0948, "step": 20625 }, { "epoch": 2.4458674255899444, "grad_norm": 0.5277571994543706, "learning_rate": 1.7315678481417848e-05, "loss": 0.0722, "step": 20626 }, { "epoch": 2.445986007352069, "grad_norm": 0.6957240548938082, "learning_rate": 1.7313394359211382e-05, "loss": 0.1074, "step": 20627 }, { "epoch": 2.4461045891141944, "grad_norm": 0.7079549865104884, "learning_rate": 1.7311110307866002e-05, "loss": 0.0872, "step": 20628 }, { "epoch": 2.446223170876319, "grad_norm": 0.682266693292401, "learning_rate": 1.7308826327402755e-05, "loss": 0.0804, "step": 20629 }, { "epoch": 2.4463417526384443, "grad_norm": 0.8045841707327871, "learning_rate": 1.7306542417842704e-05, "loss": 0.1145, "step": 20630 }, { "epoch": 2.446460334400569, "grad_norm": 0.6111302203005289, "learning_rate": 1.730425857920688e-05, "loss": 0.096, "step": 20631 }, { "epoch": 2.4465789161626943, "grad_norm": 0.6780752106039678, "learning_rate": 1.7301974811516368e-05, "loss": 0.087, "step": 20632 }, { "epoch": 2.446697497924819, "grad_norm": 0.5410577719577452, "learning_rate": 1.7299691114792204e-05, "loss": 0.0757, "step": 20633 }, { "epoch": 2.4468160796869443, "grad_norm": 0.7660098513061135, "learning_rate": 1.7297407489055445e-05, "loss": 0.0803, "step": 20634 }, { "epoch": 2.446934661449069, "grad_norm": 0.6969447459121716, "learning_rate": 1.729512393432714e-05, "loss": 0.089, "step": 20635 }, { "epoch": 2.447053243211194, "grad_norm": 0.5030663985695636, "learning_rate": 1.7292840450628353e-05, "loss": 0.0726, "step": 20636 }, { "epoch": 2.447171824973319, "grad_norm": 0.959201114598613, "learning_rate": 1.7290557037980126e-05, "loss": 0.123, "step": 20637 }, { "epoch": 2.447290406735444, "grad_norm": 0.841351637113285, "learning_rate": 1.7288273696403494e-05, "loss": 0.13, "step": 20638 }, { "epoch": 2.447408988497569, "grad_norm": 0.7420667933554135, "learning_rate": 1.7285990425919536e-05, "loss": 0.1147, "step": 20639 }, { "epoch": 2.447527570259694, "grad_norm": 0.666337515076891, "learning_rate": 1.728370722654929e-05, "loss": 0.1003, "step": 20640 }, { "epoch": 2.447646152021819, "grad_norm": 0.6292501415945561, "learning_rate": 1.7281424098313795e-05, "loss": 0.078, "step": 20641 }, { "epoch": 2.447764733783944, "grad_norm": 0.6043837541234782, "learning_rate": 1.7279141041234097e-05, "loss": 0.071, "step": 20642 }, { "epoch": 2.447883315546069, "grad_norm": 0.6909210683816364, "learning_rate": 1.727685805533126e-05, "loss": 0.1009, "step": 20643 }, { "epoch": 2.448001897308194, "grad_norm": 0.545552949209228, "learning_rate": 1.7274575140626318e-05, "loss": 0.0698, "step": 20644 }, { "epoch": 2.448120479070319, "grad_norm": 0.8094543423372235, "learning_rate": 1.7272292297140322e-05, "loss": 0.0913, "step": 20645 }, { "epoch": 2.448239060832444, "grad_norm": 0.8169079042288704, "learning_rate": 1.7270009524894303e-05, "loss": 0.0967, "step": 20646 }, { "epoch": 2.448357642594569, "grad_norm": 0.7202884643729462, "learning_rate": 1.7267726823909333e-05, "loss": 0.0992, "step": 20647 }, { "epoch": 2.448476224356694, "grad_norm": 0.6731865720893271, "learning_rate": 1.7265444194206436e-05, "loss": 0.0918, "step": 20648 }, { "epoch": 2.4485948061188187, "grad_norm": 0.5727225174476387, "learning_rate": 1.726316163580665e-05, "loss": 0.0636, "step": 20649 }, { "epoch": 2.448713387880944, "grad_norm": 0.6190379343259996, "learning_rate": 1.7260879148731036e-05, "loss": 0.0763, "step": 20650 }, { "epoch": 2.4488319696430687, "grad_norm": 0.6585955819196773, "learning_rate": 1.7258596733000625e-05, "loss": 0.0925, "step": 20651 }, { "epoch": 2.448950551405194, "grad_norm": 0.7032723098575442, "learning_rate": 1.7256314388636454e-05, "loss": 0.0737, "step": 20652 }, { "epoch": 2.4490691331673187, "grad_norm": 0.7510088915659506, "learning_rate": 1.725403211565957e-05, "loss": 0.1061, "step": 20653 }, { "epoch": 2.449187714929444, "grad_norm": 0.48384843116692244, "learning_rate": 1.7251749914091008e-05, "loss": 0.0895, "step": 20654 }, { "epoch": 2.449306296691569, "grad_norm": 0.5672097329875889, "learning_rate": 1.724946778395182e-05, "loss": 0.0791, "step": 20655 }, { "epoch": 2.449424878453694, "grad_norm": 0.7311053300686302, "learning_rate": 1.7247185725263032e-05, "loss": 0.0919, "step": 20656 }, { "epoch": 2.4495434602158186, "grad_norm": 0.6491733702485836, "learning_rate": 1.7244903738045675e-05, "loss": 0.0838, "step": 20657 }, { "epoch": 2.449662041977944, "grad_norm": 0.6016747332409366, "learning_rate": 1.7242621822320802e-05, "loss": 0.075, "step": 20658 }, { "epoch": 2.449780623740069, "grad_norm": 0.45238387191227286, "learning_rate": 1.7240339978109448e-05, "loss": 0.0649, "step": 20659 }, { "epoch": 2.4498992055021938, "grad_norm": 0.5458136648623211, "learning_rate": 1.7238058205432644e-05, "loss": 0.0735, "step": 20660 }, { "epoch": 2.4500177872643185, "grad_norm": 0.6241162072549651, "learning_rate": 1.7235776504311412e-05, "loss": 0.0905, "step": 20661 }, { "epoch": 2.4501363690264437, "grad_norm": 0.5172845980238713, "learning_rate": 1.723349487476681e-05, "loss": 0.0778, "step": 20662 }, { "epoch": 2.450254950788569, "grad_norm": 0.7687110034584835, "learning_rate": 1.7231213316819854e-05, "loss": 0.1529, "step": 20663 }, { "epoch": 2.4503735325506937, "grad_norm": 1.027448874490979, "learning_rate": 1.722893183049159e-05, "loss": 0.1371, "step": 20664 }, { "epoch": 2.450492114312819, "grad_norm": 0.829382107827657, "learning_rate": 1.722665041580304e-05, "loss": 0.0923, "step": 20665 }, { "epoch": 2.4506106960749436, "grad_norm": 0.6521298261420704, "learning_rate": 1.722436907277525e-05, "loss": 0.0981, "step": 20666 }, { "epoch": 2.450729277837069, "grad_norm": 0.6739328104033177, "learning_rate": 1.7222087801429242e-05, "loss": 0.0826, "step": 20667 }, { "epoch": 2.4508478595991936, "grad_norm": 0.6710648357440095, "learning_rate": 1.7219806601786033e-05, "loss": 0.0746, "step": 20668 }, { "epoch": 2.450966441361319, "grad_norm": 0.5415076284243004, "learning_rate": 1.721752547386668e-05, "loss": 0.0744, "step": 20669 }, { "epoch": 2.4510850231234436, "grad_norm": 0.9078999517583739, "learning_rate": 1.72152444176922e-05, "loss": 0.1229, "step": 20670 }, { "epoch": 2.4512036048855688, "grad_norm": 0.7716367283922257, "learning_rate": 1.721296343328361e-05, "loss": 0.1037, "step": 20671 }, { "epoch": 2.4513221866476935, "grad_norm": 0.6293445284241302, "learning_rate": 1.721068252066195e-05, "loss": 0.0863, "step": 20672 }, { "epoch": 2.4514407684098187, "grad_norm": 0.5510232692174492, "learning_rate": 1.7208401679848246e-05, "loss": 0.0903, "step": 20673 }, { "epoch": 2.4515593501719435, "grad_norm": 0.7636611264392492, "learning_rate": 1.7206120910863527e-05, "loss": 0.0729, "step": 20674 }, { "epoch": 2.4516779319340687, "grad_norm": 0.6618452472523966, "learning_rate": 1.7203840213728817e-05, "loss": 0.0896, "step": 20675 }, { "epoch": 2.4517965136961934, "grad_norm": 0.7589325594404099, "learning_rate": 1.720155958846513e-05, "loss": 0.0952, "step": 20676 }, { "epoch": 2.4519150954583186, "grad_norm": 1.225435835852238, "learning_rate": 1.7199279035093513e-05, "loss": 0.1987, "step": 20677 }, { "epoch": 2.4520336772204434, "grad_norm": 0.5960674518193757, "learning_rate": 1.7196998553634974e-05, "loss": 0.087, "step": 20678 }, { "epoch": 2.4521522589825686, "grad_norm": 0.8508331027558069, "learning_rate": 1.719471814411053e-05, "loss": 0.1155, "step": 20679 }, { "epoch": 2.4522708407446934, "grad_norm": 0.5399054810817019, "learning_rate": 1.7192437806541224e-05, "loss": 0.0748, "step": 20680 }, { "epoch": 2.4523894225068186, "grad_norm": 0.778903838751472, "learning_rate": 1.7190157540948067e-05, "loss": 0.1036, "step": 20681 }, { "epoch": 2.4525080042689433, "grad_norm": 0.7838523505778842, "learning_rate": 1.7187877347352073e-05, "loss": 0.0946, "step": 20682 }, { "epoch": 2.4526265860310685, "grad_norm": 0.4658373898373317, "learning_rate": 1.7185597225774276e-05, "loss": 0.0549, "step": 20683 }, { "epoch": 2.4527451677931933, "grad_norm": 0.8030462255876556, "learning_rate": 1.7183317176235686e-05, "loss": 0.1209, "step": 20684 }, { "epoch": 2.4528637495553185, "grad_norm": 0.725086099998036, "learning_rate": 1.718103719875733e-05, "loss": 0.1069, "step": 20685 }, { "epoch": 2.4529823313174433, "grad_norm": 0.5637888978602948, "learning_rate": 1.7178757293360227e-05, "loss": 0.078, "step": 20686 }, { "epoch": 2.4531009130795685, "grad_norm": 0.9298620444114336, "learning_rate": 1.7176477460065377e-05, "loss": 0.1109, "step": 20687 }, { "epoch": 2.453219494841693, "grad_norm": 0.6242542919415361, "learning_rate": 1.717419769889382e-05, "loss": 0.0816, "step": 20688 }, { "epoch": 2.4533380766038184, "grad_norm": 0.810179826049842, "learning_rate": 1.7171918009866565e-05, "loss": 0.1125, "step": 20689 }, { "epoch": 2.453456658365943, "grad_norm": 0.6804402373606038, "learning_rate": 1.7169638393004615e-05, "loss": 0.0833, "step": 20690 }, { "epoch": 2.4535752401280684, "grad_norm": 1.0331740420827813, "learning_rate": 1.7167358848329012e-05, "loss": 0.1428, "step": 20691 }, { "epoch": 2.453693821890193, "grad_norm": 0.806913346781553, "learning_rate": 1.7165079375860755e-05, "loss": 0.0995, "step": 20692 }, { "epoch": 2.4538124036523183, "grad_norm": 0.6699461398483338, "learning_rate": 1.7162799975620848e-05, "loss": 0.0857, "step": 20693 }, { "epoch": 2.453930985414443, "grad_norm": 0.8506504365843588, "learning_rate": 1.716052064763032e-05, "loss": 0.1066, "step": 20694 }, { "epoch": 2.4540495671765683, "grad_norm": 0.7536798769216085, "learning_rate": 1.7158241391910174e-05, "loss": 0.0886, "step": 20695 }, { "epoch": 2.454168148938693, "grad_norm": 0.6007489884333291, "learning_rate": 1.715596220848143e-05, "loss": 0.0891, "step": 20696 }, { "epoch": 2.4542867307008183, "grad_norm": 0.6970013884414722, "learning_rate": 1.7153683097365097e-05, "loss": 0.0884, "step": 20697 }, { "epoch": 2.454405312462943, "grad_norm": 0.7896184770952923, "learning_rate": 1.715140405858218e-05, "loss": 0.1114, "step": 20698 }, { "epoch": 2.4545238942250682, "grad_norm": 0.5907643080150148, "learning_rate": 1.7149125092153695e-05, "loss": 0.0533, "step": 20699 }, { "epoch": 2.454642475987193, "grad_norm": 0.838773836410216, "learning_rate": 1.7146846198100653e-05, "loss": 0.1141, "step": 20700 }, { "epoch": 2.454761057749318, "grad_norm": 0.8718974771349209, "learning_rate": 1.7144567376444052e-05, "loss": 0.0988, "step": 20701 }, { "epoch": 2.454879639511443, "grad_norm": 1.0893218566129321, "learning_rate": 1.714228862720491e-05, "loss": 0.1552, "step": 20702 }, { "epoch": 2.454998221273568, "grad_norm": 0.8373746260281485, "learning_rate": 1.7140009950404228e-05, "loss": 0.1312, "step": 20703 }, { "epoch": 2.4551168030356934, "grad_norm": 1.0272575885875217, "learning_rate": 1.7137731346063024e-05, "loss": 0.1395, "step": 20704 }, { "epoch": 2.455235384797818, "grad_norm": 0.7064733980300377, "learning_rate": 1.713545281420228e-05, "loss": 0.084, "step": 20705 }, { "epoch": 2.455353966559943, "grad_norm": 0.48064404090402707, "learning_rate": 1.7133174354843033e-05, "loss": 0.074, "step": 20706 }, { "epoch": 2.455472548322068, "grad_norm": 0.983688647780766, "learning_rate": 1.7130895968006266e-05, "loss": 0.1494, "step": 20707 }, { "epoch": 2.4555911300841933, "grad_norm": 0.5682623779642088, "learning_rate": 1.7128617653712995e-05, "loss": 0.0894, "step": 20708 }, { "epoch": 2.455709711846318, "grad_norm": 0.92987816949919, "learning_rate": 1.71263394119842e-05, "loss": 0.1369, "step": 20709 }, { "epoch": 2.455828293608443, "grad_norm": 1.114728635468689, "learning_rate": 1.7124061242840915e-05, "loss": 0.1171, "step": 20710 }, { "epoch": 2.455946875370568, "grad_norm": 0.75004729641078, "learning_rate": 1.7121783146304128e-05, "loss": 0.1105, "step": 20711 }, { "epoch": 2.456065457132693, "grad_norm": 0.7090513495845456, "learning_rate": 1.711950512239483e-05, "loss": 0.0957, "step": 20712 }, { "epoch": 2.456184038894818, "grad_norm": 0.5673411041180091, "learning_rate": 1.7117227171134032e-05, "loss": 0.0843, "step": 20713 }, { "epoch": 2.4563026206569427, "grad_norm": 0.7534423195457624, "learning_rate": 1.7114949292542735e-05, "loss": 0.1053, "step": 20714 }, { "epoch": 2.456421202419068, "grad_norm": 0.6386549525004825, "learning_rate": 1.711267148664194e-05, "loss": 0.0931, "step": 20715 }, { "epoch": 2.456539784181193, "grad_norm": 0.8179496571374062, "learning_rate": 1.711039375345264e-05, "loss": 0.1125, "step": 20716 }, { "epoch": 2.456658365943318, "grad_norm": 0.590395900467978, "learning_rate": 1.710811609299583e-05, "loss": 0.1005, "step": 20717 }, { "epoch": 2.456776947705443, "grad_norm": 0.43932923915635047, "learning_rate": 1.710583850529252e-05, "loss": 0.0643, "step": 20718 }, { "epoch": 2.456895529467568, "grad_norm": 0.6971032617480657, "learning_rate": 1.7103560990363696e-05, "loss": 0.0955, "step": 20719 }, { "epoch": 2.457014111229693, "grad_norm": 0.4761993358266804, "learning_rate": 1.7101283548230347e-05, "loss": 0.0694, "step": 20720 }, { "epoch": 2.457132692991818, "grad_norm": 0.5690947823584435, "learning_rate": 1.709900617891349e-05, "loss": 0.0732, "step": 20721 }, { "epoch": 2.457251274753943, "grad_norm": 0.6194894518129641, "learning_rate": 1.7096728882434105e-05, "loss": 0.0796, "step": 20722 }, { "epoch": 2.4573698565160678, "grad_norm": 0.5692458567364709, "learning_rate": 1.7094451658813183e-05, "loss": 0.0793, "step": 20723 }, { "epoch": 2.457488438278193, "grad_norm": 0.6118870772239535, "learning_rate": 1.7092174508071725e-05, "loss": 0.1035, "step": 20724 }, { "epoch": 2.4576070200403177, "grad_norm": 0.7833486133996022, "learning_rate": 1.7089897430230716e-05, "loss": 0.1085, "step": 20725 }, { "epoch": 2.457725601802443, "grad_norm": 0.7123978399525839, "learning_rate": 1.7087620425311164e-05, "loss": 0.1068, "step": 20726 }, { "epoch": 2.4578441835645677, "grad_norm": 0.5545706573656838, "learning_rate": 1.7085343493334043e-05, "loss": 0.0853, "step": 20727 }, { "epoch": 2.457962765326693, "grad_norm": 0.69181583901534, "learning_rate": 1.7083066634320344e-05, "loss": 0.0935, "step": 20728 }, { "epoch": 2.4580813470888176, "grad_norm": 0.6367548304137194, "learning_rate": 1.7080789848291066e-05, "loss": 0.0869, "step": 20729 }, { "epoch": 2.458199928850943, "grad_norm": 0.6287653160504295, "learning_rate": 1.70785131352672e-05, "loss": 0.0842, "step": 20730 }, { "epoch": 2.4583185106130676, "grad_norm": 0.4747450642971999, "learning_rate": 1.7076236495269725e-05, "loss": 0.0786, "step": 20731 }, { "epoch": 2.458437092375193, "grad_norm": 0.4435758192105646, "learning_rate": 1.7073959928319627e-05, "loss": 0.0621, "step": 20732 }, { "epoch": 2.4585556741373176, "grad_norm": 0.8595675776563956, "learning_rate": 1.70716834344379e-05, "loss": 0.1254, "step": 20733 }, { "epoch": 2.4586742558994428, "grad_norm": 0.5049559956172341, "learning_rate": 1.706940701364554e-05, "loss": 0.0705, "step": 20734 }, { "epoch": 2.4587928376615675, "grad_norm": 0.5755230054484719, "learning_rate": 1.706713066596351e-05, "loss": 0.1004, "step": 20735 }, { "epoch": 2.4589114194236927, "grad_norm": 0.928125280869315, "learning_rate": 1.7064854391412814e-05, "loss": 0.101, "step": 20736 }, { "epoch": 2.4590300011858175, "grad_norm": 0.8798653726755944, "learning_rate": 1.7062578190014435e-05, "loss": 0.1032, "step": 20737 }, { "epoch": 2.4591485829479427, "grad_norm": 0.7391205434172601, "learning_rate": 1.7060302061789345e-05, "loss": 0.1158, "step": 20738 }, { "epoch": 2.4592671647100675, "grad_norm": 0.6241218180898043, "learning_rate": 1.7058026006758526e-05, "loss": 0.0816, "step": 20739 }, { "epoch": 2.4593857464721927, "grad_norm": 0.9129901370021706, "learning_rate": 1.705575002494298e-05, "loss": 0.1269, "step": 20740 }, { "epoch": 2.4595043282343174, "grad_norm": 0.7853614965000809, "learning_rate": 1.7053474116363676e-05, "loss": 0.1131, "step": 20741 }, { "epoch": 2.4596229099964426, "grad_norm": 0.6805143473698542, "learning_rate": 1.7051198281041593e-05, "loss": 0.0821, "step": 20742 }, { "epoch": 2.4597414917585674, "grad_norm": 0.6933708523138653, "learning_rate": 1.7048922518997713e-05, "loss": 0.0891, "step": 20743 }, { "epoch": 2.4598600735206926, "grad_norm": 0.8001773645776676, "learning_rate": 1.7046646830253017e-05, "loss": 0.1288, "step": 20744 }, { "epoch": 2.4599786552828173, "grad_norm": 1.1201929494605851, "learning_rate": 1.7044371214828487e-05, "loss": 0.1192, "step": 20745 }, { "epoch": 2.4600972370449425, "grad_norm": 0.9153553407158945, "learning_rate": 1.70420956727451e-05, "loss": 0.1244, "step": 20746 }, { "epoch": 2.4602158188070673, "grad_norm": 0.7389488851208383, "learning_rate": 1.703982020402382e-05, "loss": 0.1127, "step": 20747 }, { "epoch": 2.4603344005691925, "grad_norm": 1.1523753301387447, "learning_rate": 1.7037544808685653e-05, "loss": 0.1479, "step": 20748 }, { "epoch": 2.4604529823313173, "grad_norm": 0.6671207068770151, "learning_rate": 1.7035269486751554e-05, "loss": 0.0994, "step": 20749 }, { "epoch": 2.4605715640934425, "grad_norm": 0.8258596313317011, "learning_rate": 1.7032994238242492e-05, "loss": 0.0902, "step": 20750 }, { "epoch": 2.4606901458555672, "grad_norm": 0.799154387052367, "learning_rate": 1.7030719063179464e-05, "loss": 0.1457, "step": 20751 }, { "epoch": 2.4608087276176924, "grad_norm": 0.6779122176254897, "learning_rate": 1.702844396158343e-05, "loss": 0.0999, "step": 20752 }, { "epoch": 2.4609273093798176, "grad_norm": 0.9056489379806298, "learning_rate": 1.7026168933475372e-05, "loss": 0.105, "step": 20753 }, { "epoch": 2.4610458911419424, "grad_norm": 0.5827249285188854, "learning_rate": 1.702389397887625e-05, "loss": 0.088, "step": 20754 }, { "epoch": 2.461164472904067, "grad_norm": 0.6170775452447961, "learning_rate": 1.7021619097807052e-05, "loss": 0.1038, "step": 20755 }, { "epoch": 2.4612830546661923, "grad_norm": 0.7667511235444905, "learning_rate": 1.7019344290288744e-05, "loss": 0.1067, "step": 20756 }, { "epoch": 2.4614016364283176, "grad_norm": 0.6275994452482672, "learning_rate": 1.7017069556342294e-05, "loss": 0.0716, "step": 20757 }, { "epoch": 2.4615202181904423, "grad_norm": 0.7299637542168926, "learning_rate": 1.7014794895988662e-05, "loss": 0.1157, "step": 20758 }, { "epoch": 2.461638799952567, "grad_norm": 0.6221920224885246, "learning_rate": 1.701252030924884e-05, "loss": 0.0588, "step": 20759 }, { "epoch": 2.4617573817146923, "grad_norm": 0.6430015647547059, "learning_rate": 1.7010245796143785e-05, "loss": 0.0886, "step": 20760 }, { "epoch": 2.4618759634768175, "grad_norm": 0.6116693458371941, "learning_rate": 1.7007971356694466e-05, "loss": 0.0763, "step": 20761 }, { "epoch": 2.4619945452389422, "grad_norm": 0.566986709936935, "learning_rate": 1.700569699092185e-05, "loss": 0.0763, "step": 20762 }, { "epoch": 2.462113127001067, "grad_norm": 1.1885859149895452, "learning_rate": 1.70034226988469e-05, "loss": 0.1636, "step": 20763 }, { "epoch": 2.462231708763192, "grad_norm": 0.7382774916257784, "learning_rate": 1.7001148480490593e-05, "loss": 0.1049, "step": 20764 }, { "epoch": 2.4623502905253174, "grad_norm": 0.7122169619600097, "learning_rate": 1.6998874335873876e-05, "loss": 0.1031, "step": 20765 }, { "epoch": 2.462468872287442, "grad_norm": 0.8568723934270331, "learning_rate": 1.699660026501774e-05, "loss": 0.1362, "step": 20766 }, { "epoch": 2.4625874540495674, "grad_norm": 0.9129848078809151, "learning_rate": 1.6994326267943134e-05, "loss": 0.1301, "step": 20767 }, { "epoch": 2.462706035811692, "grad_norm": 1.004682991546434, "learning_rate": 1.6992052344671023e-05, "loss": 0.1407, "step": 20768 }, { "epoch": 2.4628246175738173, "grad_norm": 0.5274869455322904, "learning_rate": 1.6989778495222357e-05, "loss": 0.0768, "step": 20769 }, { "epoch": 2.462943199335942, "grad_norm": 0.981989817381269, "learning_rate": 1.6987504719618122e-05, "loss": 0.1373, "step": 20770 }, { "epoch": 2.4630617810980673, "grad_norm": 1.6364125518893593, "learning_rate": 1.698523101787927e-05, "loss": 0.192, "step": 20771 }, { "epoch": 2.463180362860192, "grad_norm": 0.7375696201122908, "learning_rate": 1.6982957390026748e-05, "loss": 0.089, "step": 20772 }, { "epoch": 2.4632989446223172, "grad_norm": 0.6379653717319274, "learning_rate": 1.6980683836081535e-05, "loss": 0.098, "step": 20773 }, { "epoch": 2.463417526384442, "grad_norm": 0.823009970682833, "learning_rate": 1.697841035606458e-05, "loss": 0.138, "step": 20774 }, { "epoch": 2.463536108146567, "grad_norm": 0.43252702956795147, "learning_rate": 1.6976136949996853e-05, "loss": 0.0577, "step": 20775 }, { "epoch": 2.463654689908692, "grad_norm": 0.5038853493531708, "learning_rate": 1.697386361789929e-05, "loss": 0.077, "step": 20776 }, { "epoch": 2.463773271670817, "grad_norm": 0.60103834317059, "learning_rate": 1.697159035979287e-05, "loss": 0.0886, "step": 20777 }, { "epoch": 2.463891853432942, "grad_norm": 0.5737636061698748, "learning_rate": 1.6969317175698547e-05, "loss": 0.0888, "step": 20778 }, { "epoch": 2.464010435195067, "grad_norm": 0.870653423814872, "learning_rate": 1.6967044065637273e-05, "loss": 0.1389, "step": 20779 }, { "epoch": 2.464129016957192, "grad_norm": 0.6397697773338261, "learning_rate": 1.6964771029629986e-05, "loss": 0.0704, "step": 20780 }, { "epoch": 2.464247598719317, "grad_norm": 0.5074488480020862, "learning_rate": 1.696249806769767e-05, "loss": 0.0754, "step": 20781 }, { "epoch": 2.464366180481442, "grad_norm": 0.7673985569711886, "learning_rate": 1.6960225179861262e-05, "loss": 0.121, "step": 20782 }, { "epoch": 2.464484762243567, "grad_norm": 0.8646260904109409, "learning_rate": 1.695795236614172e-05, "loss": 0.105, "step": 20783 }, { "epoch": 2.464603344005692, "grad_norm": 0.6509168327452073, "learning_rate": 1.6955679626559985e-05, "loss": 0.1061, "step": 20784 }, { "epoch": 2.464721925767817, "grad_norm": 1.3666039418112053, "learning_rate": 1.6953406961137032e-05, "loss": 0.1688, "step": 20785 }, { "epoch": 2.4648405075299418, "grad_norm": 0.5179677156640112, "learning_rate": 1.69511343698938e-05, "loss": 0.0746, "step": 20786 }, { "epoch": 2.464959089292067, "grad_norm": 0.5558608098946262, "learning_rate": 1.694886185285124e-05, "loss": 0.076, "step": 20787 }, { "epoch": 2.4650776710541917, "grad_norm": 0.4255606961514756, "learning_rate": 1.6946589410030287e-05, "loss": 0.0597, "step": 20788 }, { "epoch": 2.465196252816317, "grad_norm": 0.7737026815826021, "learning_rate": 1.6944317041451915e-05, "loss": 0.1137, "step": 20789 }, { "epoch": 2.4653148345784417, "grad_norm": 0.5396480267058706, "learning_rate": 1.694204474713706e-05, "loss": 0.0665, "step": 20790 }, { "epoch": 2.465433416340567, "grad_norm": 0.6876677017619486, "learning_rate": 1.6939772527106665e-05, "loss": 0.108, "step": 20791 }, { "epoch": 2.4655519981026917, "grad_norm": 0.7176614857676281, "learning_rate": 1.6937500381381692e-05, "loss": 0.0985, "step": 20792 }, { "epoch": 2.465670579864817, "grad_norm": 0.733196446799048, "learning_rate": 1.693522830998307e-05, "loss": 0.1015, "step": 20793 }, { "epoch": 2.4657891616269416, "grad_norm": 0.9722975490572883, "learning_rate": 1.6932956312931764e-05, "loss": 0.114, "step": 20794 }, { "epoch": 2.465907743389067, "grad_norm": 0.72007172237148, "learning_rate": 1.6930684390248696e-05, "loss": 0.1273, "step": 20795 }, { "epoch": 2.4660263251511916, "grad_norm": 0.9439747019329725, "learning_rate": 1.692841254195483e-05, "loss": 0.1231, "step": 20796 }, { "epoch": 2.466144906913317, "grad_norm": 0.839343208694002, "learning_rate": 1.6926140768071106e-05, "loss": 0.1138, "step": 20797 }, { "epoch": 2.4662634886754415, "grad_norm": 0.6414732619751775, "learning_rate": 1.692386906861846e-05, "loss": 0.1076, "step": 20798 }, { "epoch": 2.4663820704375667, "grad_norm": 0.7197324642693047, "learning_rate": 1.6921597443617832e-05, "loss": 0.0836, "step": 20799 }, { "epoch": 2.4665006521996915, "grad_norm": 0.7101516697473397, "learning_rate": 1.6919325893090175e-05, "loss": 0.095, "step": 20800 }, { "epoch": 2.4666192339618167, "grad_norm": 0.8001347536685708, "learning_rate": 1.6917054417056423e-05, "loss": 0.1194, "step": 20801 }, { "epoch": 2.466737815723942, "grad_norm": 0.5281789561741611, "learning_rate": 1.6914783015537518e-05, "loss": 0.0807, "step": 20802 }, { "epoch": 2.4668563974860667, "grad_norm": 0.8253117695585921, "learning_rate": 1.691251168855439e-05, "loss": 0.1292, "step": 20803 }, { "epoch": 2.4669749792481914, "grad_norm": 0.6951253769962257, "learning_rate": 1.6910240436127997e-05, "loss": 0.1087, "step": 20804 }, { "epoch": 2.4670935610103166, "grad_norm": 0.8507626889453094, "learning_rate": 1.6907969258279265e-05, "loss": 0.1304, "step": 20805 }, { "epoch": 2.467212142772442, "grad_norm": 0.47753121639335266, "learning_rate": 1.6905698155029124e-05, "loss": 0.0684, "step": 20806 }, { "epoch": 2.4673307245345666, "grad_norm": 1.33201830589515, "learning_rate": 1.6903427126398534e-05, "loss": 0.1835, "step": 20807 }, { "epoch": 2.4674493062966913, "grad_norm": 0.5364184670646888, "learning_rate": 1.6901156172408416e-05, "loss": 0.0619, "step": 20808 }, { "epoch": 2.4675678880588165, "grad_norm": 0.5538779292713681, "learning_rate": 1.689888529307971e-05, "loss": 0.0839, "step": 20809 }, { "epoch": 2.4676864698209418, "grad_norm": 0.6816349757690612, "learning_rate": 1.6896614488433328e-05, "loss": 0.0847, "step": 20810 }, { "epoch": 2.4678050515830665, "grad_norm": 0.874855711035919, "learning_rate": 1.6894343758490238e-05, "loss": 0.1089, "step": 20811 }, { "epoch": 2.4679236333451913, "grad_norm": 0.8386167168719807, "learning_rate": 1.6892073103271355e-05, "loss": 0.1102, "step": 20812 }, { "epoch": 2.4680422151073165, "grad_norm": 0.6675902130738254, "learning_rate": 1.688980252279762e-05, "loss": 0.0845, "step": 20813 }, { "epoch": 2.4681607968694417, "grad_norm": 0.59146941613212, "learning_rate": 1.6887532017089948e-05, "loss": 0.0695, "step": 20814 }, { "epoch": 2.4682793786315664, "grad_norm": 0.742236030931941, "learning_rate": 1.6885261586169296e-05, "loss": 0.102, "step": 20815 }, { "epoch": 2.4683979603936916, "grad_norm": 0.5222529052743768, "learning_rate": 1.688299123005658e-05, "loss": 0.0633, "step": 20816 }, { "epoch": 2.4685165421558164, "grad_norm": 0.7314251504842825, "learning_rate": 1.688072094877273e-05, "loss": 0.1058, "step": 20817 }, { "epoch": 2.4686351239179416, "grad_norm": 0.5015893350329986, "learning_rate": 1.6878450742338666e-05, "loss": 0.072, "step": 20818 }, { "epoch": 2.4687537056800664, "grad_norm": 0.5642978848452547, "learning_rate": 1.6876180610775338e-05, "loss": 0.0842, "step": 20819 }, { "epoch": 2.4688722874421916, "grad_norm": 0.7908886411978082, "learning_rate": 1.6873910554103664e-05, "loss": 0.1044, "step": 20820 }, { "epoch": 2.4689908692043163, "grad_norm": 0.5625421057479723, "learning_rate": 1.687164057234456e-05, "loss": 0.0667, "step": 20821 }, { "epoch": 2.4691094509664415, "grad_norm": 0.6304259620510066, "learning_rate": 1.686937066551897e-05, "loss": 0.0876, "step": 20822 }, { "epoch": 2.4692280327285663, "grad_norm": 0.7795828747700618, "learning_rate": 1.6867100833647807e-05, "loss": 0.0939, "step": 20823 }, { "epoch": 2.4693466144906915, "grad_norm": 0.8188389853966487, "learning_rate": 1.6864831076752006e-05, "loss": 0.1132, "step": 20824 }, { "epoch": 2.4694651962528162, "grad_norm": 0.5247779674095279, "learning_rate": 1.686256139485248e-05, "loss": 0.0582, "step": 20825 }, { "epoch": 2.4695837780149414, "grad_norm": 0.7492646728498968, "learning_rate": 1.6860291787970164e-05, "loss": 0.1026, "step": 20826 }, { "epoch": 2.469702359777066, "grad_norm": 0.659325577657611, "learning_rate": 1.685802225612598e-05, "loss": 0.0914, "step": 20827 }, { "epoch": 2.4698209415391914, "grad_norm": 0.9625294419921993, "learning_rate": 1.6855752799340846e-05, "loss": 0.1369, "step": 20828 }, { "epoch": 2.469939523301316, "grad_norm": 0.6760899523224889, "learning_rate": 1.6853483417635674e-05, "loss": 0.0792, "step": 20829 }, { "epoch": 2.4700581050634414, "grad_norm": 0.4786214984169984, "learning_rate": 1.6851214111031403e-05, "loss": 0.054, "step": 20830 }, { "epoch": 2.470176686825566, "grad_norm": 0.8780740686007096, "learning_rate": 1.684894487954894e-05, "loss": 0.1465, "step": 20831 }, { "epoch": 2.4702952685876913, "grad_norm": 0.9258599112082196, "learning_rate": 1.6846675723209216e-05, "loss": 0.1309, "step": 20832 }, { "epoch": 2.470413850349816, "grad_norm": 0.418278182923433, "learning_rate": 1.6844406642033133e-05, "loss": 0.0692, "step": 20833 }, { "epoch": 2.4705324321119413, "grad_norm": 0.44993278633825823, "learning_rate": 1.684213763604163e-05, "loss": 0.0532, "step": 20834 }, { "epoch": 2.470651013874066, "grad_norm": 0.6645901154689963, "learning_rate": 1.6839868705255614e-05, "loss": 0.0891, "step": 20835 }, { "epoch": 2.4707695956361913, "grad_norm": 1.0365730572358296, "learning_rate": 1.6837599849695995e-05, "loss": 0.1032, "step": 20836 }, { "epoch": 2.470888177398316, "grad_norm": 0.587135642422548, "learning_rate": 1.6835331069383704e-05, "loss": 0.0736, "step": 20837 }, { "epoch": 2.471006759160441, "grad_norm": 0.73036808403641, "learning_rate": 1.6833062364339648e-05, "loss": 0.0855, "step": 20838 }, { "epoch": 2.471125340922566, "grad_norm": 0.8579687972394711, "learning_rate": 1.6830793734584742e-05, "loss": 0.1123, "step": 20839 }, { "epoch": 2.471243922684691, "grad_norm": 0.5133076166621489, "learning_rate": 1.6828525180139888e-05, "loss": 0.0568, "step": 20840 }, { "epoch": 2.471362504446816, "grad_norm": 0.7116661095306224, "learning_rate": 1.682625670102602e-05, "loss": 0.0965, "step": 20841 }, { "epoch": 2.471481086208941, "grad_norm": 0.4673459356829709, "learning_rate": 1.6823988297264042e-05, "loss": 0.0665, "step": 20842 }, { "epoch": 2.471599667971066, "grad_norm": 0.5896110873083569, "learning_rate": 1.6821719968874873e-05, "loss": 0.0813, "step": 20843 }, { "epoch": 2.471718249733191, "grad_norm": 0.5183638055753489, "learning_rate": 1.68194517158794e-05, "loss": 0.0759, "step": 20844 }, { "epoch": 2.471836831495316, "grad_norm": 0.9030521975474598, "learning_rate": 1.6817183538298565e-05, "loss": 0.1475, "step": 20845 }, { "epoch": 2.471955413257441, "grad_norm": 0.6263288392695359, "learning_rate": 1.681491543615326e-05, "loss": 0.0774, "step": 20846 }, { "epoch": 2.472073995019566, "grad_norm": 0.7477516937382337, "learning_rate": 1.6812647409464398e-05, "loss": 0.0965, "step": 20847 }, { "epoch": 2.472192576781691, "grad_norm": 0.5724601862420308, "learning_rate": 1.6810379458252878e-05, "loss": 0.0731, "step": 20848 }, { "epoch": 2.472311158543816, "grad_norm": 0.8359756643915937, "learning_rate": 1.6808111582539628e-05, "loss": 0.0873, "step": 20849 }, { "epoch": 2.472429740305941, "grad_norm": 0.5309654285608767, "learning_rate": 1.6805843782345544e-05, "loss": 0.0819, "step": 20850 }, { "epoch": 2.4725483220680657, "grad_norm": 0.5490688889326046, "learning_rate": 1.6803576057691516e-05, "loss": 0.0657, "step": 20851 }, { "epoch": 2.472666903830191, "grad_norm": 0.63263180598042, "learning_rate": 1.6801308408598482e-05, "loss": 0.0944, "step": 20852 }, { "epoch": 2.4727854855923157, "grad_norm": 0.9599170512523392, "learning_rate": 1.6799040835087325e-05, "loss": 0.1342, "step": 20853 }, { "epoch": 2.472904067354441, "grad_norm": 0.5064674152518135, "learning_rate": 1.6796773337178955e-05, "loss": 0.0607, "step": 20854 }, { "epoch": 2.473022649116566, "grad_norm": 1.1686459350117822, "learning_rate": 1.679450591489427e-05, "loss": 0.172, "step": 20855 }, { "epoch": 2.473141230878691, "grad_norm": 1.0091980741443507, "learning_rate": 1.6792238568254186e-05, "loss": 0.1215, "step": 20856 }, { "epoch": 2.4732598126408156, "grad_norm": 0.6320187378887063, "learning_rate": 1.67899712972796e-05, "loss": 0.0856, "step": 20857 }, { "epoch": 2.473378394402941, "grad_norm": 0.8091061130061451, "learning_rate": 1.678770410199141e-05, "loss": 0.0863, "step": 20858 }, { "epoch": 2.473496976165066, "grad_norm": 0.8853092726514739, "learning_rate": 1.678543698241051e-05, "loss": 0.1121, "step": 20859 }, { "epoch": 2.473615557927191, "grad_norm": 0.8236733445708486, "learning_rate": 1.6783169938557812e-05, "loss": 0.0953, "step": 20860 }, { "epoch": 2.4737341396893155, "grad_norm": 0.7030607368041392, "learning_rate": 1.678090297045421e-05, "loss": 0.0916, "step": 20861 }, { "epoch": 2.4738527214514408, "grad_norm": 0.6666951533446697, "learning_rate": 1.67786360781206e-05, "loss": 0.0987, "step": 20862 }, { "epoch": 2.473971303213566, "grad_norm": 0.8857836874015975, "learning_rate": 1.6776369261577885e-05, "loss": 0.1154, "step": 20863 }, { "epoch": 2.4740898849756907, "grad_norm": 0.7493463685740321, "learning_rate": 1.6774102520846968e-05, "loss": 0.1085, "step": 20864 }, { "epoch": 2.474208466737816, "grad_norm": 0.4527035276209304, "learning_rate": 1.6771835855948738e-05, "loss": 0.0535, "step": 20865 }, { "epoch": 2.4743270484999407, "grad_norm": 0.7629990952780102, "learning_rate": 1.6769569266904078e-05, "loss": 0.1346, "step": 20866 }, { "epoch": 2.474445630262066, "grad_norm": 1.0294989187828374, "learning_rate": 1.676730275373391e-05, "loss": 0.1171, "step": 20867 }, { "epoch": 2.4745642120241906, "grad_norm": 0.6702732317329356, "learning_rate": 1.6765036316459113e-05, "loss": 0.0665, "step": 20868 }, { "epoch": 2.474682793786316, "grad_norm": 0.8388952650128475, "learning_rate": 1.6762769955100585e-05, "loss": 0.0887, "step": 20869 }, { "epoch": 2.4748013755484406, "grad_norm": 0.5945641814413383, "learning_rate": 1.6760503669679205e-05, "loss": 0.0937, "step": 20870 }, { "epoch": 2.474919957310566, "grad_norm": 0.5439257491835755, "learning_rate": 1.675823746021589e-05, "loss": 0.0745, "step": 20871 }, { "epoch": 2.4750385390726906, "grad_norm": 0.8361707445319233, "learning_rate": 1.6755971326731512e-05, "loss": 0.0992, "step": 20872 }, { "epoch": 2.4751571208348158, "grad_norm": 0.5613437874430875, "learning_rate": 1.675370526924697e-05, "loss": 0.066, "step": 20873 }, { "epoch": 2.4752757025969405, "grad_norm": 0.5137499077786829, "learning_rate": 1.675143928778315e-05, "loss": 0.0681, "step": 20874 }, { "epoch": 2.4753942843590657, "grad_norm": 0.6730086576044848, "learning_rate": 1.674917338236095e-05, "loss": 0.0812, "step": 20875 }, { "epoch": 2.4755128661211905, "grad_norm": 1.0388758307379284, "learning_rate": 1.6746907553001258e-05, "loss": 0.1472, "step": 20876 }, { "epoch": 2.4756314478833157, "grad_norm": 0.5251882127768448, "learning_rate": 1.6744641799724942e-05, "loss": 0.0804, "step": 20877 }, { "epoch": 2.4757500296454404, "grad_norm": 0.5857378395106151, "learning_rate": 1.6742376122552923e-05, "loss": 0.0915, "step": 20878 }, { "epoch": 2.4758686114075656, "grad_norm": 0.6276790742194166, "learning_rate": 1.6740110521506063e-05, "loss": 0.0823, "step": 20879 }, { "epoch": 2.4759871931696904, "grad_norm": 0.5708576823823224, "learning_rate": 1.6737844996605252e-05, "loss": 0.0905, "step": 20880 }, { "epoch": 2.4761057749318156, "grad_norm": 0.5879974987925588, "learning_rate": 1.6735579547871378e-05, "loss": 0.0709, "step": 20881 }, { "epoch": 2.4762243566939404, "grad_norm": 0.8166135196007918, "learning_rate": 1.6733314175325327e-05, "loss": 0.1281, "step": 20882 }, { "epoch": 2.4763429384560656, "grad_norm": 1.01793250411701, "learning_rate": 1.6731048878987988e-05, "loss": 0.1437, "step": 20883 }, { "epoch": 2.4764615202181903, "grad_norm": 0.6000151118382209, "learning_rate": 1.6728783658880236e-05, "loss": 0.0777, "step": 20884 }, { "epoch": 2.4765801019803155, "grad_norm": 0.5661926944313093, "learning_rate": 1.6726518515022946e-05, "loss": 0.086, "step": 20885 }, { "epoch": 2.4766986837424403, "grad_norm": 0.9086423712169093, "learning_rate": 1.6724253447437023e-05, "loss": 0.1042, "step": 20886 }, { "epoch": 2.4768172655045655, "grad_norm": 0.7487607421652654, "learning_rate": 1.672198845614333e-05, "loss": 0.0993, "step": 20887 }, { "epoch": 2.4769358472666902, "grad_norm": 0.594224825883056, "learning_rate": 1.6719723541162755e-05, "loss": 0.0908, "step": 20888 }, { "epoch": 2.4770544290288155, "grad_norm": 0.6166587855565865, "learning_rate": 1.6717458702516166e-05, "loss": 0.0799, "step": 20889 }, { "epoch": 2.47717301079094, "grad_norm": 0.5861612401130948, "learning_rate": 1.6715193940224457e-05, "loss": 0.0969, "step": 20890 }, { "epoch": 2.4772915925530654, "grad_norm": 0.4830215559146203, "learning_rate": 1.67129292543085e-05, "loss": 0.0654, "step": 20891 }, { "epoch": 2.47741017431519, "grad_norm": 0.527961540532863, "learning_rate": 1.671066464478917e-05, "loss": 0.0667, "step": 20892 }, { "epoch": 2.4775287560773154, "grad_norm": 0.5203924182673586, "learning_rate": 1.6708400111687346e-05, "loss": 0.061, "step": 20893 }, { "epoch": 2.47764733783944, "grad_norm": 0.8834460911290144, "learning_rate": 1.6706135655023912e-05, "loss": 0.1159, "step": 20894 }, { "epoch": 2.4777659196015653, "grad_norm": 0.6784038947702673, "learning_rate": 1.6703871274819737e-05, "loss": 0.0849, "step": 20895 }, { "epoch": 2.47788450136369, "grad_norm": 0.6192179507101, "learning_rate": 1.6701606971095687e-05, "loss": 0.0977, "step": 20896 }, { "epoch": 2.4780030831258153, "grad_norm": 0.5783644581521827, "learning_rate": 1.6699342743872655e-05, "loss": 0.0714, "step": 20897 }, { "epoch": 2.47812166488794, "grad_norm": 0.7346089541084143, "learning_rate": 1.66970785931715e-05, "loss": 0.116, "step": 20898 }, { "epoch": 2.4782402466500653, "grad_norm": 0.6916119273759727, "learning_rate": 1.6694814519013102e-05, "loss": 0.0917, "step": 20899 }, { "epoch": 2.47835882841219, "grad_norm": 0.5770411178297564, "learning_rate": 1.6692550521418317e-05, "loss": 0.0721, "step": 20900 }, { "epoch": 2.478477410174315, "grad_norm": 0.632840244238328, "learning_rate": 1.669028660040804e-05, "loss": 0.0851, "step": 20901 }, { "epoch": 2.47859599193644, "grad_norm": 0.5075395247053768, "learning_rate": 1.6688022756003123e-05, "loss": 0.0742, "step": 20902 }, { "epoch": 2.478714573698565, "grad_norm": 0.7468849260735413, "learning_rate": 1.6685758988224453e-05, "loss": 0.1225, "step": 20903 }, { "epoch": 2.4788331554606904, "grad_norm": 0.7409211705218411, "learning_rate": 1.6683495297092876e-05, "loss": 0.1123, "step": 20904 }, { "epoch": 2.478951737222815, "grad_norm": 0.7113152381720811, "learning_rate": 1.6681231682629287e-05, "loss": 0.0827, "step": 20905 }, { "epoch": 2.47907031898494, "grad_norm": 0.6905949020336172, "learning_rate": 1.667896814485454e-05, "loss": 0.0797, "step": 20906 }, { "epoch": 2.479188900747065, "grad_norm": 0.5787937602614343, "learning_rate": 1.667670468378949e-05, "loss": 0.0794, "step": 20907 }, { "epoch": 2.4793074825091903, "grad_norm": 0.6054909455882728, "learning_rate": 1.667444129945503e-05, "loss": 0.0951, "step": 20908 }, { "epoch": 2.479426064271315, "grad_norm": 0.505244236828614, "learning_rate": 1.6672177991872007e-05, "loss": 0.0649, "step": 20909 }, { "epoch": 2.47954464603344, "grad_norm": 0.7433867246391286, "learning_rate": 1.6669914761061288e-05, "loss": 0.1021, "step": 20910 }, { "epoch": 2.479663227795565, "grad_norm": 0.6960733448430833, "learning_rate": 1.666765160704374e-05, "loss": 0.0837, "step": 20911 }, { "epoch": 2.4797818095576902, "grad_norm": 0.657769913239249, "learning_rate": 1.6665388529840225e-05, "loss": 0.0928, "step": 20912 }, { "epoch": 2.479900391319815, "grad_norm": 0.7013208499142256, "learning_rate": 1.666312552947162e-05, "loss": 0.0941, "step": 20913 }, { "epoch": 2.4800189730819397, "grad_norm": 0.7322826566240648, "learning_rate": 1.6660862605958766e-05, "loss": 0.0942, "step": 20914 }, { "epoch": 2.480137554844065, "grad_norm": 0.8629534244016511, "learning_rate": 1.6658599759322524e-05, "loss": 0.1141, "step": 20915 }, { "epoch": 2.48025613660619, "grad_norm": 0.7162965006787277, "learning_rate": 1.6656336989583775e-05, "loss": 0.0923, "step": 20916 }, { "epoch": 2.480374718368315, "grad_norm": 0.7931900673066015, "learning_rate": 1.6654074296763366e-05, "loss": 0.1217, "step": 20917 }, { "epoch": 2.48049330013044, "grad_norm": 0.8875287626150337, "learning_rate": 1.6651811680882158e-05, "loss": 0.0912, "step": 20918 }, { "epoch": 2.480611881892565, "grad_norm": 0.7192129266069622, "learning_rate": 1.6649549141960995e-05, "loss": 0.0895, "step": 20919 }, { "epoch": 2.48073046365469, "grad_norm": 0.5772411023556344, "learning_rate": 1.6647286680020765e-05, "loss": 0.069, "step": 20920 }, { "epoch": 2.480849045416815, "grad_norm": 1.2966894282815704, "learning_rate": 1.66450242950823e-05, "loss": 0.1865, "step": 20921 }, { "epoch": 2.48096762717894, "grad_norm": 0.6656895757611063, "learning_rate": 1.664276198716647e-05, "loss": 0.0819, "step": 20922 }, { "epoch": 2.481086208941065, "grad_norm": 0.7166193321589911, "learning_rate": 1.6640499756294124e-05, "loss": 0.1119, "step": 20923 }, { "epoch": 2.48120479070319, "grad_norm": 0.46124509979925676, "learning_rate": 1.6638237602486123e-05, "loss": 0.0621, "step": 20924 }, { "epoch": 2.4813233724653148, "grad_norm": 0.6874137745423312, "learning_rate": 1.663597552576332e-05, "loss": 0.0836, "step": 20925 }, { "epoch": 2.48144195422744, "grad_norm": 0.8654707313955415, "learning_rate": 1.6633713526146552e-05, "loss": 0.1146, "step": 20926 }, { "epoch": 2.4815605359895647, "grad_norm": 0.7531655452112359, "learning_rate": 1.6631451603656696e-05, "loss": 0.1217, "step": 20927 }, { "epoch": 2.48167911775169, "grad_norm": 0.9923874710229172, "learning_rate": 1.6629189758314598e-05, "loss": 0.1462, "step": 20928 }, { "epoch": 2.4817976995138147, "grad_norm": 0.8159198518300302, "learning_rate": 1.6626927990141107e-05, "loss": 0.104, "step": 20929 }, { "epoch": 2.48191628127594, "grad_norm": 0.39740758692740447, "learning_rate": 1.6624666299157055e-05, "loss": 0.0499, "step": 20930 }, { "epoch": 2.4820348630380646, "grad_norm": 0.5903897924439601, "learning_rate": 1.6622404685383326e-05, "loss": 0.0765, "step": 20931 }, { "epoch": 2.48215344480019, "grad_norm": 0.7859192777542752, "learning_rate": 1.6620143148840743e-05, "loss": 0.1177, "step": 20932 }, { "epoch": 2.4822720265623146, "grad_norm": 0.7632807089580872, "learning_rate": 1.6617881689550163e-05, "loss": 0.1091, "step": 20933 }, { "epoch": 2.48239060832444, "grad_norm": 0.7561981229300224, "learning_rate": 1.6615620307532437e-05, "loss": 0.0872, "step": 20934 }, { "epoch": 2.4825091900865646, "grad_norm": 0.7965787867353483, "learning_rate": 1.6613359002808416e-05, "loss": 0.0919, "step": 20935 }, { "epoch": 2.4826277718486898, "grad_norm": 0.6757902610536951, "learning_rate": 1.6611097775398943e-05, "loss": 0.0944, "step": 20936 }, { "epoch": 2.4827463536108145, "grad_norm": 0.5083696099669677, "learning_rate": 1.6608836625324846e-05, "loss": 0.0842, "step": 20937 }, { "epoch": 2.4828649353729397, "grad_norm": 0.9906304326052547, "learning_rate": 1.6606575552606998e-05, "loss": 0.1336, "step": 20938 }, { "epoch": 2.4829835171350645, "grad_norm": 0.5215708039581911, "learning_rate": 1.6604314557266233e-05, "loss": 0.071, "step": 20939 }, { "epoch": 2.4831020988971897, "grad_norm": 0.8223940491869038, "learning_rate": 1.6602053639323385e-05, "loss": 0.1286, "step": 20940 }, { "epoch": 2.4832206806593144, "grad_norm": 0.8003510933586482, "learning_rate": 1.65997927987993e-05, "loss": 0.0887, "step": 20941 }, { "epoch": 2.4833392624214397, "grad_norm": 0.7793940417115367, "learning_rate": 1.659753203571483e-05, "loss": 0.1091, "step": 20942 }, { "epoch": 2.4834578441835644, "grad_norm": 0.6130098144960732, "learning_rate": 1.6595271350090817e-05, "loss": 0.0815, "step": 20943 }, { "epoch": 2.4835764259456896, "grad_norm": 0.5664847305352829, "learning_rate": 1.659301074194809e-05, "loss": 0.0783, "step": 20944 }, { "epoch": 2.4836950077078144, "grad_norm": 0.7097231409368371, "learning_rate": 1.6590750211307487e-05, "loss": 0.0973, "step": 20945 }, { "epoch": 2.4838135894699396, "grad_norm": 0.9354876555123837, "learning_rate": 1.658848975818987e-05, "loss": 0.1341, "step": 20946 }, { "epoch": 2.4839321712320643, "grad_norm": 0.5934335198534626, "learning_rate": 1.6586229382616054e-05, "loss": 0.071, "step": 20947 }, { "epoch": 2.4840507529941895, "grad_norm": 0.88385131474436, "learning_rate": 1.6583969084606883e-05, "loss": 0.1079, "step": 20948 }, { "epoch": 2.4841693347563143, "grad_norm": 0.7613334979374042, "learning_rate": 1.6581708864183202e-05, "loss": 0.1218, "step": 20949 }, { "epoch": 2.4842879165184395, "grad_norm": 0.9421212087182589, "learning_rate": 1.6579448721365842e-05, "loss": 0.1313, "step": 20950 }, { "epoch": 2.4844064982805643, "grad_norm": 0.6488531968603838, "learning_rate": 1.6577188656175636e-05, "loss": 0.075, "step": 20951 }, { "epoch": 2.4845250800426895, "grad_norm": 0.5357455069988216, "learning_rate": 1.6574928668633422e-05, "loss": 0.0817, "step": 20952 }, { "epoch": 2.4846436618048147, "grad_norm": 0.6503647907513621, "learning_rate": 1.6572668758760033e-05, "loss": 0.0996, "step": 20953 }, { "epoch": 2.4847622435669394, "grad_norm": 0.6420818844364695, "learning_rate": 1.657040892657631e-05, "loss": 0.0855, "step": 20954 }, { "epoch": 2.484880825329064, "grad_norm": 0.7605492243152511, "learning_rate": 1.6568149172103077e-05, "loss": 0.0917, "step": 20955 }, { "epoch": 2.4849994070911894, "grad_norm": 0.47617936737169475, "learning_rate": 1.6565889495361158e-05, "loss": 0.0649, "step": 20956 }, { "epoch": 2.4851179888533146, "grad_norm": 0.37976325081509865, "learning_rate": 1.656362989637141e-05, "loss": 0.0565, "step": 20957 }, { "epoch": 2.4852365706154393, "grad_norm": 0.5427047367810001, "learning_rate": 1.6561370375154648e-05, "loss": 0.0633, "step": 20958 }, { "epoch": 2.485355152377564, "grad_norm": 0.6911774052469812, "learning_rate": 1.6559110931731698e-05, "loss": 0.1105, "step": 20959 }, { "epoch": 2.4854737341396893, "grad_norm": 0.6763546002861934, "learning_rate": 1.6556851566123388e-05, "loss": 0.1038, "step": 20960 }, { "epoch": 2.4855923159018145, "grad_norm": 0.7472788979962444, "learning_rate": 1.655459227835056e-05, "loss": 0.0944, "step": 20961 }, { "epoch": 2.4857108976639393, "grad_norm": 0.696215834188691, "learning_rate": 1.655233306843404e-05, "loss": 0.1207, "step": 20962 }, { "epoch": 2.485829479426064, "grad_norm": 0.9048574467671708, "learning_rate": 1.6550073936394634e-05, "loss": 0.1323, "step": 20963 }, { "epoch": 2.4859480611881892, "grad_norm": 0.5097156039690814, "learning_rate": 1.65478148822532e-05, "loss": 0.0778, "step": 20964 }, { "epoch": 2.4860666429503144, "grad_norm": 0.7942265007997724, "learning_rate": 1.6545555906030547e-05, "loss": 0.0939, "step": 20965 }, { "epoch": 2.486185224712439, "grad_norm": 0.8946468106744507, "learning_rate": 1.65432970077475e-05, "loss": 0.1422, "step": 20966 }, { "epoch": 2.4863038064745644, "grad_norm": 0.6750826983195275, "learning_rate": 1.6541038187424875e-05, "loss": 0.0794, "step": 20967 }, { "epoch": 2.486422388236689, "grad_norm": 0.5975614703106592, "learning_rate": 1.653877944508351e-05, "loss": 0.0661, "step": 20968 }, { "epoch": 2.4865409699988144, "grad_norm": 0.6815471556242623, "learning_rate": 1.6536520780744228e-05, "loss": 0.0964, "step": 20969 }, { "epoch": 2.486659551760939, "grad_norm": 0.8540552157673297, "learning_rate": 1.6534262194427842e-05, "loss": 0.0908, "step": 20970 }, { "epoch": 2.4867781335230643, "grad_norm": 0.42788080757395663, "learning_rate": 1.653200368615517e-05, "loss": 0.0632, "step": 20971 }, { "epoch": 2.486896715285189, "grad_norm": 0.7668171583071989, "learning_rate": 1.6529745255947046e-05, "loss": 0.1092, "step": 20972 }, { "epoch": 2.4870152970473143, "grad_norm": 0.6664006723519624, "learning_rate": 1.652748690382429e-05, "loss": 0.0874, "step": 20973 }, { "epoch": 2.487133878809439, "grad_norm": 0.4329012765958965, "learning_rate": 1.652522862980771e-05, "loss": 0.0775, "step": 20974 }, { "epoch": 2.4872524605715642, "grad_norm": 1.0088065068974856, "learning_rate": 1.6522970433918122e-05, "loss": 0.1187, "step": 20975 }, { "epoch": 2.487371042333689, "grad_norm": 0.8337394514819793, "learning_rate": 1.652071231617636e-05, "loss": 0.1302, "step": 20976 }, { "epoch": 2.487489624095814, "grad_norm": 0.6270559694773218, "learning_rate": 1.6518454276603236e-05, "loss": 0.0822, "step": 20977 }, { "epoch": 2.487608205857939, "grad_norm": 0.9116914622003164, "learning_rate": 1.6516196315219552e-05, "loss": 0.1203, "step": 20978 }, { "epoch": 2.487726787620064, "grad_norm": 0.5898512638481359, "learning_rate": 1.651393843204614e-05, "loss": 0.1004, "step": 20979 }, { "epoch": 2.487845369382189, "grad_norm": 0.8227093328341436, "learning_rate": 1.6511680627103816e-05, "loss": 0.1056, "step": 20980 }, { "epoch": 2.487963951144314, "grad_norm": 0.7380753205886903, "learning_rate": 1.6509422900413375e-05, "loss": 0.0932, "step": 20981 }, { "epoch": 2.488082532906439, "grad_norm": 0.6816959465422353, "learning_rate": 1.650716525199565e-05, "loss": 0.1157, "step": 20982 }, { "epoch": 2.488201114668564, "grad_norm": 0.5790035443115789, "learning_rate": 1.6504907681871444e-05, "loss": 0.0771, "step": 20983 }, { "epoch": 2.488319696430689, "grad_norm": 0.9090626175679626, "learning_rate": 1.6502650190061576e-05, "loss": 0.1337, "step": 20984 }, { "epoch": 2.488438278192814, "grad_norm": 0.8202928200551096, "learning_rate": 1.6500392776586854e-05, "loss": 0.0955, "step": 20985 }, { "epoch": 2.488556859954939, "grad_norm": 0.57657790025274, "learning_rate": 1.6498135441468076e-05, "loss": 0.0872, "step": 20986 }, { "epoch": 2.488675441717064, "grad_norm": 0.6535433848161041, "learning_rate": 1.6495878184726077e-05, "loss": 0.0934, "step": 20987 }, { "epoch": 2.4887940234791888, "grad_norm": 0.6682790318445135, "learning_rate": 1.649362100638165e-05, "loss": 0.0824, "step": 20988 }, { "epoch": 2.488912605241314, "grad_norm": 0.590494952523208, "learning_rate": 1.64913639064556e-05, "loss": 0.0648, "step": 20989 }, { "epoch": 2.4890311870034387, "grad_norm": 0.5219055670054221, "learning_rate": 1.6489106884968745e-05, "loss": 0.0778, "step": 20990 }, { "epoch": 2.489149768765564, "grad_norm": 0.9277046891009498, "learning_rate": 1.6486849941941885e-05, "loss": 0.136, "step": 20991 }, { "epoch": 2.4892683505276887, "grad_norm": 0.7934382714301292, "learning_rate": 1.6484593077395832e-05, "loss": 0.0843, "step": 20992 }, { "epoch": 2.489386932289814, "grad_norm": 1.1096030669620274, "learning_rate": 1.648233629135138e-05, "loss": 0.1613, "step": 20993 }, { "epoch": 2.4895055140519387, "grad_norm": 0.9015432757463602, "learning_rate": 1.6480079583829356e-05, "loss": 0.1054, "step": 20994 }, { "epoch": 2.489624095814064, "grad_norm": 0.8349292984166051, "learning_rate": 1.6477822954850548e-05, "loss": 0.071, "step": 20995 }, { "epoch": 2.4897426775761886, "grad_norm": 0.9883349893246602, "learning_rate": 1.647556640443576e-05, "loss": 0.1251, "step": 20996 }, { "epoch": 2.489861259338314, "grad_norm": 0.8285140231457055, "learning_rate": 1.647330993260579e-05, "loss": 0.1245, "step": 20997 }, { "epoch": 2.4899798411004386, "grad_norm": 0.752604353976522, "learning_rate": 1.6471053539381454e-05, "loss": 0.1213, "step": 20998 }, { "epoch": 2.4900984228625638, "grad_norm": 0.7210798247126474, "learning_rate": 1.6468797224783543e-05, "loss": 0.1058, "step": 20999 }, { "epoch": 2.4902170046246885, "grad_norm": 0.7166485219379403, "learning_rate": 1.646654098883286e-05, "loss": 0.1034, "step": 21000 }, { "epoch": 2.4903355863868137, "grad_norm": 0.6165038951169779, "learning_rate": 1.6464284831550202e-05, "loss": 0.078, "step": 21001 }, { "epoch": 2.490454168148939, "grad_norm": 1.1492604397342356, "learning_rate": 1.646202875295637e-05, "loss": 0.1568, "step": 21002 }, { "epoch": 2.4905727499110637, "grad_norm": 0.7022925631336464, "learning_rate": 1.645977275307217e-05, "loss": 0.0816, "step": 21003 }, { "epoch": 2.4906913316731885, "grad_norm": 0.5788172433099744, "learning_rate": 1.645751683191839e-05, "loss": 0.0842, "step": 21004 }, { "epoch": 2.4908099134353137, "grad_norm": 0.6369284621193753, "learning_rate": 1.645526098951582e-05, "loss": 0.0854, "step": 21005 }, { "epoch": 2.490928495197439, "grad_norm": 0.5218012636557446, "learning_rate": 1.6453005225885276e-05, "loss": 0.0722, "step": 21006 }, { "epoch": 2.4910470769595636, "grad_norm": 0.5099624490551737, "learning_rate": 1.6450749541047546e-05, "loss": 0.0897, "step": 21007 }, { "epoch": 2.4911656587216884, "grad_norm": 0.7414125441011692, "learning_rate": 1.644849393502341e-05, "loss": 0.1024, "step": 21008 }, { "epoch": 2.4912842404838136, "grad_norm": 0.6426714720391214, "learning_rate": 1.6446238407833678e-05, "loss": 0.0814, "step": 21009 }, { "epoch": 2.491402822245939, "grad_norm": 1.177101796614947, "learning_rate": 1.6443982959499137e-05, "loss": 0.1364, "step": 21010 }, { "epoch": 2.4915214040080635, "grad_norm": 0.8369995287616867, "learning_rate": 1.6441727590040586e-05, "loss": 0.0995, "step": 21011 }, { "epoch": 2.4916399857701883, "grad_norm": 0.6911979989012658, "learning_rate": 1.6439472299478803e-05, "loss": 0.0854, "step": 21012 }, { "epoch": 2.4917585675323135, "grad_norm": 0.7541301444725492, "learning_rate": 1.6437217087834598e-05, "loss": 0.0962, "step": 21013 }, { "epoch": 2.4918771492944387, "grad_norm": 0.6268166704635806, "learning_rate": 1.643496195512875e-05, "loss": 0.086, "step": 21014 }, { "epoch": 2.4919957310565635, "grad_norm": 0.5415278267112105, "learning_rate": 1.643270690138205e-05, "loss": 0.0847, "step": 21015 }, { "epoch": 2.4921143128186887, "grad_norm": 0.7128709100363025, "learning_rate": 1.6430451926615275e-05, "loss": 0.1086, "step": 21016 }, { "epoch": 2.4922328945808134, "grad_norm": 0.6830142959178864, "learning_rate": 1.6428197030849236e-05, "loss": 0.1033, "step": 21017 }, { "epoch": 2.4923514763429386, "grad_norm": 0.5625407940099288, "learning_rate": 1.6425942214104707e-05, "loss": 0.0985, "step": 21018 }, { "epoch": 2.4924700581050634, "grad_norm": 0.7122861556638524, "learning_rate": 1.6423687476402468e-05, "loss": 0.1037, "step": 21019 }, { "epoch": 2.4925886398671886, "grad_norm": 0.7527540324239157, "learning_rate": 1.6421432817763323e-05, "loss": 0.0967, "step": 21020 }, { "epoch": 2.4927072216293134, "grad_norm": 0.5801564456535857, "learning_rate": 1.6419178238208043e-05, "loss": 0.098, "step": 21021 }, { "epoch": 2.4928258033914386, "grad_norm": 0.7579988005053704, "learning_rate": 1.641692373775742e-05, "loss": 0.0993, "step": 21022 }, { "epoch": 2.4929443851535633, "grad_norm": 0.6612872194761049, "learning_rate": 1.641466931643223e-05, "loss": 0.093, "step": 21023 }, { "epoch": 2.4930629669156885, "grad_norm": 0.45438235603645605, "learning_rate": 1.641241497425327e-05, "loss": 0.0748, "step": 21024 }, { "epoch": 2.4931815486778133, "grad_norm": 0.6688133350667054, "learning_rate": 1.6410160711241312e-05, "loss": 0.0989, "step": 21025 }, { "epoch": 2.4933001304399385, "grad_norm": 0.6313075290433969, "learning_rate": 1.640790652741714e-05, "loss": 0.0835, "step": 21026 }, { "epoch": 2.4934187122020632, "grad_norm": 0.8674709343223904, "learning_rate": 1.640565242280152e-05, "loss": 0.1222, "step": 21027 }, { "epoch": 2.4935372939641884, "grad_norm": 0.6333832834710779, "learning_rate": 1.6403398397415256e-05, "loss": 0.0781, "step": 21028 }, { "epoch": 2.493655875726313, "grad_norm": 0.6297365831626656, "learning_rate": 1.640114445127912e-05, "loss": 0.084, "step": 21029 }, { "epoch": 2.4937744574884384, "grad_norm": 0.500447357258479, "learning_rate": 1.6398890584413878e-05, "loss": 0.0584, "step": 21030 }, { "epoch": 2.493893039250563, "grad_norm": 0.6678671549588854, "learning_rate": 1.639663679684032e-05, "loss": 0.0983, "step": 21031 }, { "epoch": 2.4940116210126884, "grad_norm": 0.5505614993383509, "learning_rate": 1.639438308857922e-05, "loss": 0.0676, "step": 21032 }, { "epoch": 2.494130202774813, "grad_norm": 0.8532831111387837, "learning_rate": 1.639212945965136e-05, "loss": 0.1202, "step": 21033 }, { "epoch": 2.4942487845369383, "grad_norm": 0.8146634902145333, "learning_rate": 1.63898759100775e-05, "loss": 0.1057, "step": 21034 }, { "epoch": 2.494367366299063, "grad_norm": 0.7831359742311974, "learning_rate": 1.6387622439878442e-05, "loss": 0.1011, "step": 21035 }, { "epoch": 2.4944859480611883, "grad_norm": 0.5707830189779725, "learning_rate": 1.638536904907494e-05, "loss": 0.0805, "step": 21036 }, { "epoch": 2.494604529823313, "grad_norm": 0.4423528038086672, "learning_rate": 1.6383115737687767e-05, "loss": 0.0697, "step": 21037 }, { "epoch": 2.4947231115854382, "grad_norm": 0.9445001071695924, "learning_rate": 1.6380862505737695e-05, "loss": 0.1245, "step": 21038 }, { "epoch": 2.494841693347563, "grad_norm": 0.841551249513039, "learning_rate": 1.637860935324551e-05, "loss": 0.1179, "step": 21039 }, { "epoch": 2.494960275109688, "grad_norm": 0.5425900343138955, "learning_rate": 1.6376356280231968e-05, "loss": 0.0817, "step": 21040 }, { "epoch": 2.495078856871813, "grad_norm": 0.5798785672795183, "learning_rate": 1.6374103286717852e-05, "loss": 0.0976, "step": 21041 }, { "epoch": 2.495197438633938, "grad_norm": 0.5878476819163321, "learning_rate": 1.6371850372723917e-05, "loss": 0.089, "step": 21042 }, { "epoch": 2.495316020396063, "grad_norm": 0.6358450239154725, "learning_rate": 1.636959753827095e-05, "loss": 0.1019, "step": 21043 }, { "epoch": 2.495434602158188, "grad_norm": 0.33361724864436526, "learning_rate": 1.636734478337971e-05, "loss": 0.0507, "step": 21044 }, { "epoch": 2.495553183920313, "grad_norm": 0.5190870336727907, "learning_rate": 1.6365092108070967e-05, "loss": 0.0787, "step": 21045 }, { "epoch": 2.495671765682438, "grad_norm": 1.0654419884861914, "learning_rate": 1.6362839512365475e-05, "loss": 0.1403, "step": 21046 }, { "epoch": 2.495790347444563, "grad_norm": 0.6703725500474584, "learning_rate": 1.636058699628402e-05, "loss": 0.0865, "step": 21047 }, { "epoch": 2.495908929206688, "grad_norm": 0.6616198330154293, "learning_rate": 1.635833455984736e-05, "loss": 0.0634, "step": 21048 }, { "epoch": 2.496027510968813, "grad_norm": 0.6748966282606551, "learning_rate": 1.6356082203076246e-05, "loss": 0.0947, "step": 21049 }, { "epoch": 2.496146092730938, "grad_norm": 0.7050391832256626, "learning_rate": 1.6353829925991467e-05, "loss": 0.1053, "step": 21050 }, { "epoch": 2.4962646744930628, "grad_norm": 0.6031446126341878, "learning_rate": 1.6351577728613765e-05, "loss": 0.0786, "step": 21051 }, { "epoch": 2.496383256255188, "grad_norm": 0.9357567648945577, "learning_rate": 1.634932561096392e-05, "loss": 0.14, "step": 21052 }, { "epoch": 2.4965018380173127, "grad_norm": 0.44262326959543896, "learning_rate": 1.6347073573062672e-05, "loss": 0.0677, "step": 21053 }, { "epoch": 2.496620419779438, "grad_norm": 0.8741395677259681, "learning_rate": 1.6344821614930806e-05, "loss": 0.1107, "step": 21054 }, { "epoch": 2.496739001541563, "grad_norm": 0.38073980503737137, "learning_rate": 1.634256973658907e-05, "loss": 0.0612, "step": 21055 }, { "epoch": 2.496857583303688, "grad_norm": 0.7601296368103896, "learning_rate": 1.6340317938058225e-05, "loss": 0.0946, "step": 21056 }, { "epoch": 2.4969761650658127, "grad_norm": 0.5080509930198253, "learning_rate": 1.633806621935902e-05, "loss": 0.068, "step": 21057 }, { "epoch": 2.497094746827938, "grad_norm": 0.4078579120013963, "learning_rate": 1.6335814580512233e-05, "loss": 0.0597, "step": 21058 }, { "epoch": 2.497213328590063, "grad_norm": 0.7001256215314647, "learning_rate": 1.6333563021538612e-05, "loss": 0.0908, "step": 21059 }, { "epoch": 2.497331910352188, "grad_norm": 0.6005160247782392, "learning_rate": 1.6331311542458904e-05, "loss": 0.0814, "step": 21060 }, { "epoch": 2.4974504921143126, "grad_norm": 0.8590810676546597, "learning_rate": 1.632906014329387e-05, "loss": 0.1146, "step": 21061 }, { "epoch": 2.497569073876438, "grad_norm": 0.598194631996934, "learning_rate": 1.6326808824064276e-05, "loss": 0.0994, "step": 21062 }, { "epoch": 2.497687655638563, "grad_norm": 0.5895626496027416, "learning_rate": 1.632455758479087e-05, "loss": 0.0584, "step": 21063 }, { "epoch": 2.4978062374006877, "grad_norm": 0.5571637429098188, "learning_rate": 1.6322306425494398e-05, "loss": 0.0696, "step": 21064 }, { "epoch": 2.497924819162813, "grad_norm": 1.194417885821483, "learning_rate": 1.632005534619563e-05, "loss": 0.1339, "step": 21065 }, { "epoch": 2.4980434009249377, "grad_norm": 0.7092644330854693, "learning_rate": 1.6317804346915302e-05, "loss": 0.0949, "step": 21066 }, { "epoch": 2.498161982687063, "grad_norm": 0.672649230103092, "learning_rate": 1.6315553427674174e-05, "loss": 0.0884, "step": 21067 }, { "epoch": 2.4982805644491877, "grad_norm": 0.6369340069997936, "learning_rate": 1.6313302588492983e-05, "loss": 0.0769, "step": 21068 }, { "epoch": 2.498399146211313, "grad_norm": 0.8208171643275337, "learning_rate": 1.63110518293925e-05, "loss": 0.1358, "step": 21069 }, { "epoch": 2.4985177279734376, "grad_norm": 0.5886825724371734, "learning_rate": 1.630880115039346e-05, "loss": 0.1013, "step": 21070 }, { "epoch": 2.498636309735563, "grad_norm": 0.5215580416763855, "learning_rate": 1.630655055151662e-05, "loss": 0.0529, "step": 21071 }, { "epoch": 2.4987548914976876, "grad_norm": 0.7391223397330811, "learning_rate": 1.6304300032782715e-05, "loss": 0.093, "step": 21072 }, { "epoch": 2.498873473259813, "grad_norm": 0.49089771203862625, "learning_rate": 1.630204959421251e-05, "loss": 0.0766, "step": 21073 }, { "epoch": 2.4989920550219376, "grad_norm": 0.8058096311132605, "learning_rate": 1.629979923582674e-05, "loss": 0.1332, "step": 21074 }, { "epoch": 2.4991106367840628, "grad_norm": 0.9004524895138818, "learning_rate": 1.6297548957646152e-05, "loss": 0.121, "step": 21075 }, { "epoch": 2.4992292185461875, "grad_norm": 0.9395667005202466, "learning_rate": 1.629529875969148e-05, "loss": 0.1451, "step": 21076 }, { "epoch": 2.4993478003083127, "grad_norm": 0.7594174723515573, "learning_rate": 1.6293048641983492e-05, "loss": 0.1021, "step": 21077 }, { "epoch": 2.4994663820704375, "grad_norm": 0.7551149106994824, "learning_rate": 1.6290798604542916e-05, "loss": 0.0959, "step": 21078 }, { "epoch": 2.4995849638325627, "grad_norm": 0.9602651456545053, "learning_rate": 1.628854864739049e-05, "loss": 0.1496, "step": 21079 }, { "epoch": 2.4997035455946874, "grad_norm": 0.6952490883077984, "learning_rate": 1.6286298770546968e-05, "loss": 0.0778, "step": 21080 }, { "epoch": 2.4998221273568126, "grad_norm": 0.7586346124935794, "learning_rate": 1.6284048974033083e-05, "loss": 0.0949, "step": 21081 }, { "epoch": 2.4999407091189374, "grad_norm": 0.4286714203589519, "learning_rate": 1.6281799257869583e-05, "loss": 0.0576, "step": 21082 }, { "epoch": 2.5000592908810626, "grad_norm": 0.6033000594068344, "learning_rate": 1.6279549622077195e-05, "loss": 0.0702, "step": 21083 }, { "epoch": 2.5001778726431874, "grad_norm": 0.7281039306164327, "learning_rate": 1.6277300066676676e-05, "loss": 0.111, "step": 21084 }, { "epoch": 2.5002964544053126, "grad_norm": 0.48487470244200453, "learning_rate": 1.6275050591688752e-05, "loss": 0.0687, "step": 21085 }, { "epoch": 2.5004150361674373, "grad_norm": 0.6558655928395012, "learning_rate": 1.6272801197134163e-05, "loss": 0.0981, "step": 21086 }, { "epoch": 2.5005336179295625, "grad_norm": 0.5486047494231322, "learning_rate": 1.6270551883033632e-05, "loss": 0.0594, "step": 21087 }, { "epoch": 2.5006521996916873, "grad_norm": 0.7297009101147324, "learning_rate": 1.626830264940792e-05, "loss": 0.0774, "step": 21088 }, { "epoch": 2.5007707814538125, "grad_norm": 0.7381310700437813, "learning_rate": 1.6266053496277744e-05, "loss": 0.1074, "step": 21089 }, { "epoch": 2.5008893632159372, "grad_norm": 0.7611058171077287, "learning_rate": 1.6263804423663853e-05, "loss": 0.0828, "step": 21090 }, { "epoch": 2.5010079449780624, "grad_norm": 0.6721245629414353, "learning_rate": 1.626155543158696e-05, "loss": 0.0866, "step": 21091 }, { "epoch": 2.501126526740187, "grad_norm": 0.6720205933846405, "learning_rate": 1.625930652006782e-05, "loss": 0.0786, "step": 21092 }, { "epoch": 2.5012451085023124, "grad_norm": 0.5942359043274482, "learning_rate": 1.6257057689127157e-05, "loss": 0.0845, "step": 21093 }, { "epoch": 2.501363690264437, "grad_norm": 0.7796736102857782, "learning_rate": 1.6254808938785688e-05, "loss": 0.1096, "step": 21094 }, { "epoch": 2.5014822720265624, "grad_norm": 0.6023745000357921, "learning_rate": 1.6252560269064168e-05, "loss": 0.0909, "step": 21095 }, { "epoch": 2.5016008537886876, "grad_norm": 0.5270114302487215, "learning_rate": 1.6250311679983314e-05, "loss": 0.0771, "step": 21096 }, { "epoch": 2.5017194355508123, "grad_norm": 0.6368728934491102, "learning_rate": 1.624806317156386e-05, "loss": 0.1035, "step": 21097 }, { "epoch": 2.501838017312937, "grad_norm": 0.7554421930450529, "learning_rate": 1.624581474382652e-05, "loss": 0.1031, "step": 21098 }, { "epoch": 2.5019565990750623, "grad_norm": 0.7338338767631697, "learning_rate": 1.6243566396792044e-05, "loss": 0.1392, "step": 21099 }, { "epoch": 2.5020751808371875, "grad_norm": 0.8630656316923839, "learning_rate": 1.624131813048114e-05, "loss": 0.1342, "step": 21100 }, { "epoch": 2.5021937625993123, "grad_norm": 0.4285471761069529, "learning_rate": 1.6239069944914547e-05, "loss": 0.0518, "step": 21101 }, { "epoch": 2.502312344361437, "grad_norm": 0.5858206773555166, "learning_rate": 1.6236821840112977e-05, "loss": 0.0768, "step": 21102 }, { "epoch": 2.502430926123562, "grad_norm": 1.0179343089741784, "learning_rate": 1.6234573816097174e-05, "loss": 0.1331, "step": 21103 }, { "epoch": 2.5025495078856874, "grad_norm": 0.7640512597837301, "learning_rate": 1.623232587288785e-05, "loss": 0.0897, "step": 21104 }, { "epoch": 2.502668089647812, "grad_norm": 0.587564883460058, "learning_rate": 1.623007801050572e-05, "loss": 0.0763, "step": 21105 }, { "epoch": 2.502786671409937, "grad_norm": 0.7818188416904707, "learning_rate": 1.622783022897153e-05, "loss": 0.1023, "step": 21106 }, { "epoch": 2.502905253172062, "grad_norm": 0.7199714788891135, "learning_rate": 1.622558252830599e-05, "loss": 0.0813, "step": 21107 }, { "epoch": 2.5030238349341873, "grad_norm": 0.6392257724187865, "learning_rate": 1.622333490852981e-05, "loss": 0.0831, "step": 21108 }, { "epoch": 2.503142416696312, "grad_norm": 0.5904942136453676, "learning_rate": 1.6221087369663715e-05, "loss": 0.1005, "step": 21109 }, { "epoch": 2.503260998458437, "grad_norm": 0.5831743352152686, "learning_rate": 1.621883991172844e-05, "loss": 0.0944, "step": 21110 }, { "epoch": 2.503379580220562, "grad_norm": 0.542619296881233, "learning_rate": 1.6216592534744684e-05, "loss": 0.0685, "step": 21111 }, { "epoch": 2.5034981619826873, "grad_norm": 0.6617774696689404, "learning_rate": 1.621434523873318e-05, "loss": 0.1018, "step": 21112 }, { "epoch": 2.503616743744812, "grad_norm": 1.0481507196537825, "learning_rate": 1.6212098023714634e-05, "loss": 0.1692, "step": 21113 }, { "epoch": 2.503735325506937, "grad_norm": 0.643732658667772, "learning_rate": 1.6209850889709773e-05, "loss": 0.0895, "step": 21114 }, { "epoch": 2.503853907269062, "grad_norm": 0.5982056636137397, "learning_rate": 1.620760383673931e-05, "loss": 0.1002, "step": 21115 }, { "epoch": 2.503972489031187, "grad_norm": 0.7658680497746043, "learning_rate": 1.6205356864823955e-05, "loss": 0.0792, "step": 21116 }, { "epoch": 2.504091070793312, "grad_norm": 0.8868926006883749, "learning_rate": 1.6203109973984414e-05, "loss": 0.1231, "step": 21117 }, { "epoch": 2.5042096525554367, "grad_norm": 0.842258850795142, "learning_rate": 1.6200863164241425e-05, "loss": 0.1021, "step": 21118 }, { "epoch": 2.504328234317562, "grad_norm": 0.9746483719451734, "learning_rate": 1.6198616435615683e-05, "loss": 0.1147, "step": 21119 }, { "epoch": 2.504446816079687, "grad_norm": 0.9152385740714258, "learning_rate": 1.6196369788127903e-05, "loss": 0.1246, "step": 21120 }, { "epoch": 2.504565397841812, "grad_norm": 0.5728330921181314, "learning_rate": 1.6194123221798797e-05, "loss": 0.0568, "step": 21121 }, { "epoch": 2.504683979603937, "grad_norm": 0.6517795301602478, "learning_rate": 1.619187673664908e-05, "loss": 0.0977, "step": 21122 }, { "epoch": 2.504802561366062, "grad_norm": 1.0361559447042468, "learning_rate": 1.618963033269946e-05, "loss": 0.1061, "step": 21123 }, { "epoch": 2.504921143128187, "grad_norm": 0.582555806287187, "learning_rate": 1.6187384009970638e-05, "loss": 0.0721, "step": 21124 }, { "epoch": 2.505039724890312, "grad_norm": 0.6181166870481027, "learning_rate": 1.6185137768483337e-05, "loss": 0.1168, "step": 21125 }, { "epoch": 2.505158306652437, "grad_norm": 0.6739163209054891, "learning_rate": 1.618289160825825e-05, "loss": 0.0764, "step": 21126 }, { "epoch": 2.5052768884145618, "grad_norm": 0.5839819608437394, "learning_rate": 1.6180645529316095e-05, "loss": 0.0787, "step": 21127 }, { "epoch": 2.505395470176687, "grad_norm": 0.7607065161068104, "learning_rate": 1.6178399531677565e-05, "loss": 0.1007, "step": 21128 }, { "epoch": 2.5055140519388117, "grad_norm": 0.8262306818744597, "learning_rate": 1.6176153615363382e-05, "loss": 0.1113, "step": 21129 }, { "epoch": 2.505632633700937, "grad_norm": 0.5636130975301148, "learning_rate": 1.6173907780394236e-05, "loss": 0.0671, "step": 21130 }, { "epoch": 2.5057512154630617, "grad_norm": 0.9521062750254975, "learning_rate": 1.617166202679084e-05, "loss": 0.1437, "step": 21131 }, { "epoch": 2.505869797225187, "grad_norm": 0.6627753308162271, "learning_rate": 1.6169416354573892e-05, "loss": 0.1045, "step": 21132 }, { "epoch": 2.5059883789873116, "grad_norm": 0.5764820735013353, "learning_rate": 1.61671707637641e-05, "loss": 0.0755, "step": 21133 }, { "epoch": 2.506106960749437, "grad_norm": 0.7572754219825468, "learning_rate": 1.616492525438216e-05, "loss": 0.1218, "step": 21134 }, { "epoch": 2.5062255425115616, "grad_norm": 0.8408150035696037, "learning_rate": 1.6162679826448772e-05, "loss": 0.1109, "step": 21135 }, { "epoch": 2.506344124273687, "grad_norm": 0.5757024579243644, "learning_rate": 1.616043447998464e-05, "loss": 0.0697, "step": 21136 }, { "epoch": 2.5064627060358116, "grad_norm": 0.7653203109199602, "learning_rate": 1.615818921501047e-05, "loss": 0.137, "step": 21137 }, { "epoch": 2.5065812877979368, "grad_norm": 0.6397537663865878, "learning_rate": 1.6155944031546945e-05, "loss": 0.0996, "step": 21138 }, { "epoch": 2.5066998695600615, "grad_norm": 1.1691253996130948, "learning_rate": 1.6153698929614776e-05, "loss": 0.1325, "step": 21139 }, { "epoch": 2.5068184513221867, "grad_norm": 0.4747747925562896, "learning_rate": 1.615145390923465e-05, "loss": 0.0668, "step": 21140 }, { "epoch": 2.5069370330843115, "grad_norm": 0.6915127267555807, "learning_rate": 1.614920897042727e-05, "loss": 0.1155, "step": 21141 }, { "epoch": 2.5070556148464367, "grad_norm": 0.5877855421865675, "learning_rate": 1.614696411321333e-05, "loss": 0.0766, "step": 21142 }, { "epoch": 2.5071741966085614, "grad_norm": 0.9368142808347628, "learning_rate": 1.614471933761352e-05, "loss": 0.1295, "step": 21143 }, { "epoch": 2.5072927783706866, "grad_norm": 0.5588919312477221, "learning_rate": 1.6142474643648548e-05, "loss": 0.095, "step": 21144 }, { "epoch": 2.507411360132812, "grad_norm": 0.7437785462582569, "learning_rate": 1.6140230031339095e-05, "loss": 0.1087, "step": 21145 }, { "epoch": 2.5075299418949366, "grad_norm": 1.1440651886030582, "learning_rate": 1.6137985500705854e-05, "loss": 0.1259, "step": 21146 }, { "epoch": 2.5076485236570614, "grad_norm": 0.5100359729476346, "learning_rate": 1.6135741051769516e-05, "loss": 0.061, "step": 21147 }, { "epoch": 2.5077671054191866, "grad_norm": 0.9561436716459476, "learning_rate": 1.613349668455078e-05, "loss": 0.1458, "step": 21148 }, { "epoch": 2.5078856871813118, "grad_norm": 0.8346914258509308, "learning_rate": 1.6131252399070328e-05, "loss": 0.1313, "step": 21149 }, { "epoch": 2.5080042689434365, "grad_norm": 1.0793388479332586, "learning_rate": 1.6129008195348853e-05, "loss": 0.0951, "step": 21150 }, { "epoch": 2.5081228507055613, "grad_norm": 0.6006742998901674, "learning_rate": 1.6126764073407048e-05, "loss": 0.0747, "step": 21151 }, { "epoch": 2.5082414324676865, "grad_norm": 0.6422206330645762, "learning_rate": 1.61245200332656e-05, "loss": 0.0862, "step": 21152 }, { "epoch": 2.5083600142298117, "grad_norm": 0.6937765464670296, "learning_rate": 1.612227607494519e-05, "loss": 0.0711, "step": 21153 }, { "epoch": 2.5084785959919365, "grad_norm": 0.8102706779057159, "learning_rate": 1.6120032198466507e-05, "loss": 0.1356, "step": 21154 }, { "epoch": 2.508597177754061, "grad_norm": 0.6922246944778802, "learning_rate": 1.611778840385024e-05, "loss": 0.0792, "step": 21155 }, { "epoch": 2.5087157595161864, "grad_norm": 0.7808227063432985, "learning_rate": 1.6115544691117074e-05, "loss": 0.1033, "step": 21156 }, { "epoch": 2.5088343412783116, "grad_norm": 0.5979220493151917, "learning_rate": 1.611330106028769e-05, "loss": 0.079, "step": 21157 }, { "epoch": 2.5089529230404364, "grad_norm": 0.8131332024416453, "learning_rate": 1.6111057511382764e-05, "loss": 0.1123, "step": 21158 }, { "epoch": 2.509071504802561, "grad_norm": 0.7257758163689607, "learning_rate": 1.6108814044422994e-05, "loss": 0.1009, "step": 21159 }, { "epoch": 2.5091900865646863, "grad_norm": 0.6929750379220999, "learning_rate": 1.6106570659429055e-05, "loss": 0.0975, "step": 21160 }, { "epoch": 2.5093086683268115, "grad_norm": 0.5700697341794875, "learning_rate": 1.6104327356421633e-05, "loss": 0.0699, "step": 21161 }, { "epoch": 2.5094272500889363, "grad_norm": 0.6928684983813714, "learning_rate": 1.610208413542139e-05, "loss": 0.0774, "step": 21162 }, { "epoch": 2.509545831851061, "grad_norm": 0.7731091151700584, "learning_rate": 1.6099840996449035e-05, "loss": 0.1206, "step": 21163 }, { "epoch": 2.5096644136131863, "grad_norm": 0.5599173950107457, "learning_rate": 1.609759793952523e-05, "loss": 0.067, "step": 21164 }, { "epoch": 2.5097829953753115, "grad_norm": 0.7412282651517007, "learning_rate": 1.6095354964670644e-05, "loss": 0.1114, "step": 21165 }, { "epoch": 2.5099015771374362, "grad_norm": 0.571506374677571, "learning_rate": 1.6093112071905982e-05, "loss": 0.0881, "step": 21166 }, { "epoch": 2.510020158899561, "grad_norm": 0.6368275125936826, "learning_rate": 1.6090869261251894e-05, "loss": 0.0897, "step": 21167 }, { "epoch": 2.510138740661686, "grad_norm": 1.0584719650990955, "learning_rate": 1.6088626532729068e-05, "loss": 0.1598, "step": 21168 }, { "epoch": 2.5102573224238114, "grad_norm": 0.5799771825190118, "learning_rate": 1.6086383886358176e-05, "loss": 0.0847, "step": 21169 }, { "epoch": 2.510375904185936, "grad_norm": 0.6510756646644285, "learning_rate": 1.6084141322159894e-05, "loss": 0.0648, "step": 21170 }, { "epoch": 2.5104944859480613, "grad_norm": 0.6339639709063764, "learning_rate": 1.60818988401549e-05, "loss": 0.0878, "step": 21171 }, { "epoch": 2.510613067710186, "grad_norm": 0.8567401309828432, "learning_rate": 1.6079656440363865e-05, "loss": 0.1366, "step": 21172 }, { "epoch": 2.5107316494723113, "grad_norm": 0.9866758779082196, "learning_rate": 1.6077414122807442e-05, "loss": 0.1294, "step": 21173 }, { "epoch": 2.510850231234436, "grad_norm": 1.007889576951197, "learning_rate": 1.6075171887506335e-05, "loss": 0.1185, "step": 21174 }, { "epoch": 2.5109688129965613, "grad_norm": 0.7373451638446665, "learning_rate": 1.60729297344812e-05, "loss": 0.0975, "step": 21175 }, { "epoch": 2.511087394758686, "grad_norm": 0.6009333135575639, "learning_rate": 1.6070687663752693e-05, "loss": 0.0857, "step": 21176 }, { "epoch": 2.5112059765208112, "grad_norm": 0.8012535845621486, "learning_rate": 1.606844567534151e-05, "loss": 0.0638, "step": 21177 }, { "epoch": 2.511324558282936, "grad_norm": 0.6619486379249423, "learning_rate": 1.60662037692683e-05, "loss": 0.0957, "step": 21178 }, { "epoch": 2.511443140045061, "grad_norm": 0.6822073365194059, "learning_rate": 1.6063961945553732e-05, "loss": 0.1087, "step": 21179 }, { "epoch": 2.511561721807186, "grad_norm": 0.7428110053192709, "learning_rate": 1.606172020421848e-05, "loss": 0.1075, "step": 21180 }, { "epoch": 2.511680303569311, "grad_norm": 0.5176230749773819, "learning_rate": 1.6059478545283203e-05, "loss": 0.0679, "step": 21181 }, { "epoch": 2.511798885331436, "grad_norm": 0.563625431628325, "learning_rate": 1.6057236968768574e-05, "loss": 0.063, "step": 21182 }, { "epoch": 2.511917467093561, "grad_norm": 0.8606204972811653, "learning_rate": 1.605499547469526e-05, "loss": 0.1132, "step": 21183 }, { "epoch": 2.512036048855686, "grad_norm": 0.7822027362067526, "learning_rate": 1.60527540630839e-05, "loss": 0.0991, "step": 21184 }, { "epoch": 2.512154630617811, "grad_norm": 0.5612508037681575, "learning_rate": 1.605051273395519e-05, "loss": 0.0756, "step": 21185 }, { "epoch": 2.512273212379936, "grad_norm": 0.7891987253655633, "learning_rate": 1.6048271487329782e-05, "loss": 0.0924, "step": 21186 }, { "epoch": 2.512391794142061, "grad_norm": 0.5989264886433577, "learning_rate": 1.6046030323228332e-05, "loss": 0.0712, "step": 21187 }, { "epoch": 2.512510375904186, "grad_norm": 0.7573045985997359, "learning_rate": 1.604378924167149e-05, "loss": 0.1066, "step": 21188 }, { "epoch": 2.512628957666311, "grad_norm": 1.0946505912221336, "learning_rate": 1.6041548242679937e-05, "loss": 0.1464, "step": 21189 }, { "epoch": 2.5127475394284358, "grad_norm": 0.6098427773513393, "learning_rate": 1.6039307326274323e-05, "loss": 0.0514, "step": 21190 }, { "epoch": 2.512866121190561, "grad_norm": 0.6510119936805168, "learning_rate": 1.6037066492475306e-05, "loss": 0.0671, "step": 21191 }, { "epoch": 2.5129847029526857, "grad_norm": 0.46791017399413354, "learning_rate": 1.603482574130354e-05, "loss": 0.0649, "step": 21192 }, { "epoch": 2.513103284714811, "grad_norm": 0.6618851519477186, "learning_rate": 1.60325850727797e-05, "loss": 0.1012, "step": 21193 }, { "epoch": 2.5132218664769357, "grad_norm": 0.7260795218145009, "learning_rate": 1.603034448692442e-05, "loss": 0.093, "step": 21194 }, { "epoch": 2.513340448239061, "grad_norm": 0.4446402415870884, "learning_rate": 1.602810398375836e-05, "loss": 0.0632, "step": 21195 }, { "epoch": 2.5134590300011856, "grad_norm": 0.5490139517931548, "learning_rate": 1.602586356330219e-05, "loss": 0.087, "step": 21196 }, { "epoch": 2.513577611763311, "grad_norm": 0.9974464193836134, "learning_rate": 1.602362322557655e-05, "loss": 0.1256, "step": 21197 }, { "epoch": 2.513696193525436, "grad_norm": 0.7183247002678562, "learning_rate": 1.6021382970602094e-05, "loss": 0.0888, "step": 21198 }, { "epoch": 2.513814775287561, "grad_norm": 0.6905717781448057, "learning_rate": 1.6019142798399472e-05, "loss": 0.0926, "step": 21199 }, { "epoch": 2.5139333570496856, "grad_norm": 0.6922766969248829, "learning_rate": 1.6016902708989346e-05, "loss": 0.1103, "step": 21200 }, { "epoch": 2.5140519388118108, "grad_norm": 0.532485902784654, "learning_rate": 1.6014662702392365e-05, "loss": 0.0627, "step": 21201 }, { "epoch": 2.514170520573936, "grad_norm": 0.7207496130375991, "learning_rate": 1.601242277862917e-05, "loss": 0.1012, "step": 21202 }, { "epoch": 2.5142891023360607, "grad_norm": 0.8036522880946295, "learning_rate": 1.6010182937720413e-05, "loss": 0.0833, "step": 21203 }, { "epoch": 2.5144076840981855, "grad_norm": 0.5365242018451132, "learning_rate": 1.600794317968675e-05, "loss": 0.0455, "step": 21204 }, { "epoch": 2.5145262658603107, "grad_norm": 0.8245399410927114, "learning_rate": 1.600570350454883e-05, "loss": 0.1202, "step": 21205 }, { "epoch": 2.514644847622436, "grad_norm": 0.9347200521136813, "learning_rate": 1.600346391232728e-05, "loss": 0.1076, "step": 21206 }, { "epoch": 2.5147634293845607, "grad_norm": 0.4728885134283516, "learning_rate": 1.600122440304277e-05, "loss": 0.075, "step": 21207 }, { "epoch": 2.5148820111466854, "grad_norm": 0.4142569114603341, "learning_rate": 1.5998984976715934e-05, "loss": 0.0664, "step": 21208 }, { "epoch": 2.5150005929088106, "grad_norm": 0.6603189676975086, "learning_rate": 1.599674563336742e-05, "loss": 0.1087, "step": 21209 }, { "epoch": 2.515119174670936, "grad_norm": 0.7157895741410386, "learning_rate": 1.5994506373017867e-05, "loss": 0.0883, "step": 21210 }, { "epoch": 2.5152377564330606, "grad_norm": 0.8958167539895541, "learning_rate": 1.599226719568792e-05, "loss": 0.1462, "step": 21211 }, { "epoch": 2.5153563381951853, "grad_norm": 0.6297402675771617, "learning_rate": 1.5990028101398234e-05, "loss": 0.1036, "step": 21212 }, { "epoch": 2.5154749199573105, "grad_norm": 0.6449638904939751, "learning_rate": 1.5987789090169435e-05, "loss": 0.0942, "step": 21213 }, { "epoch": 2.5155935017194357, "grad_norm": 0.6226921231345522, "learning_rate": 1.598555016202216e-05, "loss": 0.0963, "step": 21214 }, { "epoch": 2.5157120834815605, "grad_norm": 0.6936114071149169, "learning_rate": 1.5983311316977073e-05, "loss": 0.072, "step": 21215 }, { "epoch": 2.5158306652436853, "grad_norm": 0.7546230822107276, "learning_rate": 1.5981072555054794e-05, "loss": 0.1191, "step": 21216 }, { "epoch": 2.5159492470058105, "grad_norm": 0.6952587993939495, "learning_rate": 1.597883387627596e-05, "loss": 0.0884, "step": 21217 }, { "epoch": 2.5160678287679357, "grad_norm": 0.5856828432582977, "learning_rate": 1.5976595280661218e-05, "loss": 0.0621, "step": 21218 }, { "epoch": 2.5161864105300604, "grad_norm": 0.6747654401282737, "learning_rate": 1.5974356768231202e-05, "loss": 0.1107, "step": 21219 }, { "epoch": 2.5163049922921856, "grad_norm": 0.8071487310929476, "learning_rate": 1.5972118339006555e-05, "loss": 0.1145, "step": 21220 }, { "epoch": 2.5164235740543104, "grad_norm": 0.5376883883596805, "learning_rate": 1.5969879993007895e-05, "loss": 0.069, "step": 21221 }, { "epoch": 2.5165421558164356, "grad_norm": 0.8495537943063647, "learning_rate": 1.5967641730255877e-05, "loss": 0.1097, "step": 21222 }, { "epoch": 2.5166607375785603, "grad_norm": 0.6993414598176287, "learning_rate": 1.5965403550771125e-05, "loss": 0.0989, "step": 21223 }, { "epoch": 2.5167793193406856, "grad_norm": 0.979090746635705, "learning_rate": 1.5963165454574276e-05, "loss": 0.116, "step": 21224 }, { "epoch": 2.5168979011028103, "grad_norm": 0.6425007592096162, "learning_rate": 1.596092744168595e-05, "loss": 0.083, "step": 21225 }, { "epoch": 2.5170164828649355, "grad_norm": 1.4340654342075105, "learning_rate": 1.5958689512126796e-05, "loss": 0.1496, "step": 21226 }, { "epoch": 2.5171350646270603, "grad_norm": 0.8443594787466172, "learning_rate": 1.5956451665917437e-05, "loss": 0.1346, "step": 21227 }, { "epoch": 2.5172536463891855, "grad_norm": 0.6472856763117454, "learning_rate": 1.59542139030785e-05, "loss": 0.09, "step": 21228 }, { "epoch": 2.5173722281513102, "grad_norm": 0.6663024515116961, "learning_rate": 1.595197622363062e-05, "loss": 0.0877, "step": 21229 }, { "epoch": 2.5174908099134354, "grad_norm": 0.7530457177489921, "learning_rate": 1.5949738627594424e-05, "loss": 0.0844, "step": 21230 }, { "epoch": 2.51760939167556, "grad_norm": 0.6769323080684769, "learning_rate": 1.5947501114990542e-05, "loss": 0.103, "step": 21231 }, { "epoch": 2.5177279734376854, "grad_norm": 0.6126467305943148, "learning_rate": 1.59452636858396e-05, "loss": 0.0909, "step": 21232 }, { "epoch": 2.51784655519981, "grad_norm": 0.8163258232955648, "learning_rate": 1.5943026340162215e-05, "loss": 0.1119, "step": 21233 }, { "epoch": 2.5179651369619354, "grad_norm": 0.614448428275557, "learning_rate": 1.594078907797903e-05, "loss": 0.0803, "step": 21234 }, { "epoch": 2.51808371872406, "grad_norm": 0.511285322680352, "learning_rate": 1.5938551899310655e-05, "loss": 0.0541, "step": 21235 }, { "epoch": 2.5182023004861853, "grad_norm": 0.5824805455676073, "learning_rate": 1.5936314804177715e-05, "loss": 0.087, "step": 21236 }, { "epoch": 2.51832088224831, "grad_norm": 0.6113144790880425, "learning_rate": 1.593407779260085e-05, "loss": 0.0836, "step": 21237 }, { "epoch": 2.5184394640104353, "grad_norm": 0.6897680667976167, "learning_rate": 1.5931840864600666e-05, "loss": 0.0895, "step": 21238 }, { "epoch": 2.51855804577256, "grad_norm": 0.7698529461466672, "learning_rate": 1.5929604020197787e-05, "loss": 0.0846, "step": 21239 }, { "epoch": 2.5186766275346852, "grad_norm": 0.6485024022592171, "learning_rate": 1.592736725941283e-05, "loss": 0.0976, "step": 21240 }, { "epoch": 2.51879520929681, "grad_norm": 0.9602080900584585, "learning_rate": 1.592513058226643e-05, "loss": 0.1429, "step": 21241 }, { "epoch": 2.518913791058935, "grad_norm": 0.7968558645579773, "learning_rate": 1.59228939887792e-05, "loss": 0.1218, "step": 21242 }, { "epoch": 2.51903237282106, "grad_norm": 0.7842297243723687, "learning_rate": 1.5920657478971752e-05, "loss": 0.1117, "step": 21243 }, { "epoch": 2.519150954583185, "grad_norm": 0.7184039579375429, "learning_rate": 1.59184210528647e-05, "loss": 0.0876, "step": 21244 }, { "epoch": 2.51926953634531, "grad_norm": 0.432472229028866, "learning_rate": 1.5916184710478678e-05, "loss": 0.0554, "step": 21245 }, { "epoch": 2.519388118107435, "grad_norm": 0.687183045116382, "learning_rate": 1.5913948451834298e-05, "loss": 0.085, "step": 21246 }, { "epoch": 2.5195066998695603, "grad_norm": 0.5864287301129602, "learning_rate": 1.5911712276952162e-05, "loss": 0.0698, "step": 21247 }, { "epoch": 2.519625281631685, "grad_norm": 0.6846620742459917, "learning_rate": 1.5909476185852898e-05, "loss": 0.1071, "step": 21248 }, { "epoch": 2.51974386339381, "grad_norm": 0.6260072587031013, "learning_rate": 1.5907240178557115e-05, "loss": 0.0814, "step": 21249 }, { "epoch": 2.519862445155935, "grad_norm": 0.7705515979468751, "learning_rate": 1.590500425508543e-05, "loss": 0.1016, "step": 21250 }, { "epoch": 2.5199810269180603, "grad_norm": 0.6867039540103161, "learning_rate": 1.5902768415458448e-05, "loss": 0.0915, "step": 21251 }, { "epoch": 2.520099608680185, "grad_norm": 0.4893199393934805, "learning_rate": 1.5900532659696787e-05, "loss": 0.0665, "step": 21252 }, { "epoch": 2.5202181904423098, "grad_norm": 0.527711742334573, "learning_rate": 1.5898296987821064e-05, "loss": 0.0678, "step": 21253 }, { "epoch": 2.520336772204435, "grad_norm": 0.5925465391598634, "learning_rate": 1.589606139985188e-05, "loss": 0.0891, "step": 21254 }, { "epoch": 2.52045535396656, "grad_norm": 0.7313935304750119, "learning_rate": 1.5893825895809833e-05, "loss": 0.0978, "step": 21255 }, { "epoch": 2.520573935728685, "grad_norm": 0.5381858404958506, "learning_rate": 1.5891590475715558e-05, "loss": 0.0762, "step": 21256 }, { "epoch": 2.5206925174908097, "grad_norm": 1.0878183173839218, "learning_rate": 1.5889355139589645e-05, "loss": 0.0989, "step": 21257 }, { "epoch": 2.520811099252935, "grad_norm": 0.6887122326538243, "learning_rate": 1.58871198874527e-05, "loss": 0.0852, "step": 21258 }, { "epoch": 2.52092968101506, "grad_norm": 0.6866559063405675, "learning_rate": 1.588488471932534e-05, "loss": 0.0768, "step": 21259 }, { "epoch": 2.521048262777185, "grad_norm": 0.6488916116757518, "learning_rate": 1.5882649635228164e-05, "loss": 0.0948, "step": 21260 }, { "epoch": 2.5211668445393096, "grad_norm": 0.5172522362637477, "learning_rate": 1.5880414635181777e-05, "loss": 0.0823, "step": 21261 }, { "epoch": 2.521285426301435, "grad_norm": 0.6948933453001559, "learning_rate": 1.587817971920678e-05, "loss": 0.1039, "step": 21262 }, { "epoch": 2.52140400806356, "grad_norm": 0.8030353967306548, "learning_rate": 1.587594488732379e-05, "loss": 0.1027, "step": 21263 }, { "epoch": 2.521522589825685, "grad_norm": 0.5790485137881716, "learning_rate": 1.58737101395534e-05, "loss": 0.0767, "step": 21264 }, { "epoch": 2.5216411715878095, "grad_norm": 0.8458766697666417, "learning_rate": 1.587147547591621e-05, "loss": 0.1408, "step": 21265 }, { "epoch": 2.5217597533499347, "grad_norm": 0.4800373873828188, "learning_rate": 1.5869240896432806e-05, "loss": 0.0728, "step": 21266 }, { "epoch": 2.52187833511206, "grad_norm": 0.5450342010884944, "learning_rate": 1.586700640112382e-05, "loss": 0.0691, "step": 21267 }, { "epoch": 2.5219969168741847, "grad_norm": 0.795968038598675, "learning_rate": 1.586477199000983e-05, "loss": 0.089, "step": 21268 }, { "epoch": 2.5221154986363095, "grad_norm": 0.7026515821204286, "learning_rate": 1.5862537663111443e-05, "loss": 0.0966, "step": 21269 }, { "epoch": 2.5222340803984347, "grad_norm": 1.0283008232737523, "learning_rate": 1.5860303420449245e-05, "loss": 0.1082, "step": 21270 }, { "epoch": 2.52235266216056, "grad_norm": 0.7113579075457984, "learning_rate": 1.5858069262043854e-05, "loss": 0.101, "step": 21271 }, { "epoch": 2.5224712439226846, "grad_norm": 0.6153374128872715, "learning_rate": 1.5855835187915852e-05, "loss": 0.0778, "step": 21272 }, { "epoch": 2.52258982568481, "grad_norm": 0.7571337036008207, "learning_rate": 1.5853601198085834e-05, "loss": 0.0953, "step": 21273 }, { "epoch": 2.5227084074469346, "grad_norm": 0.8499453637619396, "learning_rate": 1.585136729257439e-05, "loss": 0.0763, "step": 21274 }, { "epoch": 2.52282698920906, "grad_norm": 0.48813810173494915, "learning_rate": 1.5849133471402127e-05, "loss": 0.0674, "step": 21275 }, { "epoch": 2.5229455709711845, "grad_norm": 0.798607820942903, "learning_rate": 1.5846899734589638e-05, "loss": 0.0914, "step": 21276 }, { "epoch": 2.5230641527333098, "grad_norm": 0.8220704567490645, "learning_rate": 1.5844666082157494e-05, "loss": 0.079, "step": 21277 }, { "epoch": 2.5231827344954345, "grad_norm": 1.2074088504968805, "learning_rate": 1.5842432514126316e-05, "loss": 0.1424, "step": 21278 }, { "epoch": 2.5233013162575597, "grad_norm": 0.8473358634742717, "learning_rate": 1.584019903051667e-05, "loss": 0.1022, "step": 21279 }, { "epoch": 2.5234198980196845, "grad_norm": 0.7860802042912769, "learning_rate": 1.5837965631349164e-05, "loss": 0.105, "step": 21280 }, { "epoch": 2.5235384797818097, "grad_norm": 0.8088072671402129, "learning_rate": 1.583573231664437e-05, "loss": 0.1275, "step": 21281 }, { "epoch": 2.5236570615439344, "grad_norm": 0.6148884886052473, "learning_rate": 1.5833499086422894e-05, "loss": 0.0904, "step": 21282 }, { "epoch": 2.5237756433060596, "grad_norm": 0.49898405079213337, "learning_rate": 1.583126594070532e-05, "loss": 0.0675, "step": 21283 }, { "epoch": 2.5238942250681844, "grad_norm": 0.8358561825814387, "learning_rate": 1.582903287951223e-05, "loss": 0.1006, "step": 21284 }, { "epoch": 2.5240128068303096, "grad_norm": 0.580590977791898, "learning_rate": 1.58267999028642e-05, "loss": 0.091, "step": 21285 }, { "epoch": 2.5241313885924344, "grad_norm": 0.7144230402814509, "learning_rate": 1.5824567010781832e-05, "loss": 0.0896, "step": 21286 }, { "epoch": 2.5242499703545596, "grad_norm": 0.6599775877907417, "learning_rate": 1.5822334203285704e-05, "loss": 0.093, "step": 21287 }, { "epoch": 2.5243685521166843, "grad_norm": 0.6735555424812282, "learning_rate": 1.58201014803964e-05, "loss": 0.0799, "step": 21288 }, { "epoch": 2.5244871338788095, "grad_norm": 0.6469368809132241, "learning_rate": 1.5817868842134504e-05, "loss": 0.1072, "step": 21289 }, { "epoch": 2.5246057156409343, "grad_norm": 0.655316105510702, "learning_rate": 1.5815636288520594e-05, "loss": 0.079, "step": 21290 }, { "epoch": 2.5247242974030595, "grad_norm": 0.6535783033483098, "learning_rate": 1.581340381957526e-05, "loss": 0.0854, "step": 21291 }, { "epoch": 2.5248428791651842, "grad_norm": 0.666211953830134, "learning_rate": 1.5811171435319067e-05, "loss": 0.0915, "step": 21292 }, { "epoch": 2.5249614609273094, "grad_norm": 0.781464983928972, "learning_rate": 1.580893913577262e-05, "loss": 0.108, "step": 21293 }, { "epoch": 2.525080042689434, "grad_norm": 0.6152789615941434, "learning_rate": 1.5806706920956476e-05, "loss": 0.0847, "step": 21294 }, { "epoch": 2.5251986244515594, "grad_norm": 0.5779655423595016, "learning_rate": 1.5804474790891227e-05, "loss": 0.0862, "step": 21295 }, { "epoch": 2.5253172062136846, "grad_norm": 0.5749657128377035, "learning_rate": 1.5802242745597432e-05, "loss": 0.0864, "step": 21296 }, { "epoch": 2.5254357879758094, "grad_norm": 0.9151627899529122, "learning_rate": 1.580001078509569e-05, "loss": 0.1068, "step": 21297 }, { "epoch": 2.525554369737934, "grad_norm": 0.740132708607028, "learning_rate": 1.579777890940656e-05, "loss": 0.1184, "step": 21298 }, { "epoch": 2.5256729515000593, "grad_norm": 0.6812029282939753, "learning_rate": 1.579554711855063e-05, "loss": 0.0957, "step": 21299 }, { "epoch": 2.5257915332621845, "grad_norm": 0.6280226773195359, "learning_rate": 1.579331541254846e-05, "loss": 0.0779, "step": 21300 }, { "epoch": 2.5259101150243093, "grad_norm": 0.819494004408467, "learning_rate": 1.5791083791420642e-05, "loss": 0.1158, "step": 21301 }, { "epoch": 2.526028696786434, "grad_norm": 0.789435546291929, "learning_rate": 1.5788852255187736e-05, "loss": 0.0826, "step": 21302 }, { "epoch": 2.5261472785485592, "grad_norm": 0.7825214458603302, "learning_rate": 1.5786620803870317e-05, "loss": 0.0809, "step": 21303 }, { "epoch": 2.5262658603106845, "grad_norm": 0.5654859634549121, "learning_rate": 1.5784389437488947e-05, "loss": 0.0725, "step": 21304 }, { "epoch": 2.526384442072809, "grad_norm": 0.61824833713517, "learning_rate": 1.5782158156064216e-05, "loss": 0.0892, "step": 21305 }, { "epoch": 2.526503023834934, "grad_norm": 0.5881089797525876, "learning_rate": 1.5779926959616683e-05, "loss": 0.0824, "step": 21306 }, { "epoch": 2.526621605597059, "grad_norm": 0.6388794775630645, "learning_rate": 1.5777695848166907e-05, "loss": 0.0771, "step": 21307 }, { "epoch": 2.5267401873591844, "grad_norm": 0.8025774161591848, "learning_rate": 1.5775464821735475e-05, "loss": 0.1343, "step": 21308 }, { "epoch": 2.526858769121309, "grad_norm": 0.5742635003862505, "learning_rate": 1.5773233880342942e-05, "loss": 0.0787, "step": 21309 }, { "epoch": 2.526977350883434, "grad_norm": 0.6401381837169694, "learning_rate": 1.5771003024009884e-05, "loss": 0.0746, "step": 21310 }, { "epoch": 2.527095932645559, "grad_norm": 0.5758265508949326, "learning_rate": 1.5768772252756848e-05, "loss": 0.0752, "step": 21311 }, { "epoch": 2.5272145144076843, "grad_norm": 0.4727605809320031, "learning_rate": 1.576654156660442e-05, "loss": 0.0614, "step": 21312 }, { "epoch": 2.527333096169809, "grad_norm": 1.135875076729461, "learning_rate": 1.576431096557316e-05, "loss": 0.1525, "step": 21313 }, { "epoch": 2.527451677931934, "grad_norm": 0.9188057472627202, "learning_rate": 1.5762080449683627e-05, "loss": 0.128, "step": 21314 }, { "epoch": 2.527570259694059, "grad_norm": 0.8312820094796594, "learning_rate": 1.5759850018956375e-05, "loss": 0.1296, "step": 21315 }, { "epoch": 2.527688841456184, "grad_norm": 1.1083637333029084, "learning_rate": 1.5757619673411984e-05, "loss": 0.136, "step": 21316 }, { "epoch": 2.527807423218309, "grad_norm": 0.8689547664020171, "learning_rate": 1.5755389413071004e-05, "loss": 0.131, "step": 21317 }, { "epoch": 2.5279260049804337, "grad_norm": 0.6218064871013912, "learning_rate": 1.575315923795399e-05, "loss": 0.0796, "step": 21318 }, { "epoch": 2.528044586742559, "grad_norm": 1.075657374751013, "learning_rate": 1.575092914808151e-05, "loss": 0.14, "step": 21319 }, { "epoch": 2.528163168504684, "grad_norm": 0.8143566622234711, "learning_rate": 1.5748699143474125e-05, "loss": 0.1162, "step": 21320 }, { "epoch": 2.528281750266809, "grad_norm": 0.6309696135830213, "learning_rate": 1.5746469224152393e-05, "loss": 0.0898, "step": 21321 }, { "epoch": 2.528400332028934, "grad_norm": 0.8991769490985351, "learning_rate": 1.5744239390136854e-05, "loss": 0.0948, "step": 21322 }, { "epoch": 2.528518913791059, "grad_norm": 0.675833658404797, "learning_rate": 1.574200964144809e-05, "loss": 0.0652, "step": 21323 }, { "epoch": 2.528637495553184, "grad_norm": 0.9999458834740371, "learning_rate": 1.5739779978106644e-05, "loss": 0.1419, "step": 21324 }, { "epoch": 2.528756077315309, "grad_norm": 0.6585057640300015, "learning_rate": 1.5737550400133072e-05, "loss": 0.0919, "step": 21325 }, { "epoch": 2.528874659077434, "grad_norm": 0.563245916762423, "learning_rate": 1.5735320907547918e-05, "loss": 0.0684, "step": 21326 }, { "epoch": 2.528993240839559, "grad_norm": 0.49375253271008424, "learning_rate": 1.573309150037175e-05, "loss": 0.067, "step": 21327 }, { "epoch": 2.529111822601684, "grad_norm": 0.5974908024149669, "learning_rate": 1.573086217862511e-05, "loss": 0.0772, "step": 21328 }, { "epoch": 2.5292304043638087, "grad_norm": 0.6794303625722746, "learning_rate": 1.5728632942328563e-05, "loss": 0.0891, "step": 21329 }, { "epoch": 2.529348986125934, "grad_norm": 0.5602299732057155, "learning_rate": 1.572640379150264e-05, "loss": 0.0649, "step": 21330 }, { "epoch": 2.5294675678880587, "grad_norm": 0.6162324998859603, "learning_rate": 1.5724174726167912e-05, "loss": 0.0971, "step": 21331 }, { "epoch": 2.529586149650184, "grad_norm": 0.6382535469280032, "learning_rate": 1.5721945746344917e-05, "loss": 0.0917, "step": 21332 }, { "epoch": 2.5297047314123087, "grad_norm": 0.5277946082473173, "learning_rate": 1.5719716852054204e-05, "loss": 0.0678, "step": 21333 }, { "epoch": 2.529823313174434, "grad_norm": 0.6672909301635114, "learning_rate": 1.571748804331631e-05, "loss": 0.0739, "step": 21334 }, { "epoch": 2.5299418949365586, "grad_norm": 0.7452947120658344, "learning_rate": 1.5715259320151806e-05, "loss": 0.097, "step": 21335 }, { "epoch": 2.530060476698684, "grad_norm": 0.5493272464846725, "learning_rate": 1.5713030682581225e-05, "loss": 0.0672, "step": 21336 }, { "epoch": 2.5301790584608086, "grad_norm": 0.6562297082760508, "learning_rate": 1.5710802130625102e-05, "loss": 0.0782, "step": 21337 }, { "epoch": 2.530297640222934, "grad_norm": 0.8142567015662449, "learning_rate": 1.5708573664304004e-05, "loss": 0.1055, "step": 21338 }, { "epoch": 2.5304162219850586, "grad_norm": 0.6832243852624851, "learning_rate": 1.5706345283638458e-05, "loss": 0.0966, "step": 21339 }, { "epoch": 2.5305348037471838, "grad_norm": 0.5776624940880218, "learning_rate": 1.5704116988649014e-05, "loss": 0.0855, "step": 21340 }, { "epoch": 2.5306533855093085, "grad_norm": 0.45085309349237535, "learning_rate": 1.5701888779356204e-05, "loss": 0.0722, "step": 21341 }, { "epoch": 2.5307719672714337, "grad_norm": 0.4581341049199007, "learning_rate": 1.5699660655780584e-05, "loss": 0.0712, "step": 21342 }, { "epoch": 2.5308905490335585, "grad_norm": 0.8217490533547537, "learning_rate": 1.5697432617942692e-05, "loss": 0.1139, "step": 21343 }, { "epoch": 2.5310091307956837, "grad_norm": 0.5671148754417874, "learning_rate": 1.5695204665863065e-05, "loss": 0.0738, "step": 21344 }, { "epoch": 2.531127712557809, "grad_norm": 0.6787102124359455, "learning_rate": 1.5692976799562227e-05, "loss": 0.0891, "step": 21345 }, { "epoch": 2.5312462943199336, "grad_norm": 0.5643183433788461, "learning_rate": 1.569074901906074e-05, "loss": 0.073, "step": 21346 }, { "epoch": 2.5313648760820584, "grad_norm": 0.9145787226535894, "learning_rate": 1.568852132437913e-05, "loss": 0.1468, "step": 21347 }, { "epoch": 2.5314834578441836, "grad_norm": 0.9637523565972025, "learning_rate": 1.568629371553793e-05, "loss": 0.1546, "step": 21348 }, { "epoch": 2.531602039606309, "grad_norm": 0.575715205523021, "learning_rate": 1.5684066192557685e-05, "loss": 0.0878, "step": 21349 }, { "epoch": 2.5317206213684336, "grad_norm": 0.6033988695582727, "learning_rate": 1.5681838755458928e-05, "loss": 0.0843, "step": 21350 }, { "epoch": 2.5318392031305583, "grad_norm": 0.5954625349158761, "learning_rate": 1.567961140426219e-05, "loss": 0.0783, "step": 21351 }, { "epoch": 2.5319577848926835, "grad_norm": 0.5964618890167728, "learning_rate": 1.5677384138988e-05, "loss": 0.0932, "step": 21352 }, { "epoch": 2.5320763666548087, "grad_norm": 0.671647332768306, "learning_rate": 1.56751569596569e-05, "loss": 0.1, "step": 21353 }, { "epoch": 2.5321949484169335, "grad_norm": 0.679007150663439, "learning_rate": 1.567292986628942e-05, "loss": 0.1071, "step": 21354 }, { "epoch": 2.5323135301790582, "grad_norm": 0.5295665330085858, "learning_rate": 1.5670702858906094e-05, "loss": 0.0747, "step": 21355 }, { "epoch": 2.5324321119411835, "grad_norm": 0.5475992943520677, "learning_rate": 1.5668475937527432e-05, "loss": 0.0741, "step": 21356 }, { "epoch": 2.5325506937033087, "grad_norm": 0.7221759018154783, "learning_rate": 1.566624910217399e-05, "loss": 0.0975, "step": 21357 }, { "epoch": 2.5326692754654334, "grad_norm": 0.7057870014221922, "learning_rate": 1.566402235286628e-05, "loss": 0.1027, "step": 21358 }, { "epoch": 2.532787857227558, "grad_norm": 0.6032345886881225, "learning_rate": 1.5661795689624843e-05, "loss": 0.0728, "step": 21359 }, { "epoch": 2.5329064389896834, "grad_norm": 0.7550559841281823, "learning_rate": 1.5659569112470183e-05, "loss": 0.1097, "step": 21360 }, { "epoch": 2.5330250207518086, "grad_norm": 1.0084981879530714, "learning_rate": 1.565734262142286e-05, "loss": 0.1146, "step": 21361 }, { "epoch": 2.5331436025139333, "grad_norm": 0.9713821551938056, "learning_rate": 1.5655116216503375e-05, "loss": 0.154, "step": 21362 }, { "epoch": 2.533262184276058, "grad_norm": 0.7035524880434478, "learning_rate": 1.565288989773225e-05, "loss": 0.0904, "step": 21363 }, { "epoch": 2.5333807660381833, "grad_norm": 0.5443701854309239, "learning_rate": 1.565066366513003e-05, "loss": 0.077, "step": 21364 }, { "epoch": 2.5334993478003085, "grad_norm": 0.5578310551512045, "learning_rate": 1.5648437518717223e-05, "loss": 0.0858, "step": 21365 }, { "epoch": 2.5336179295624333, "grad_norm": 1.0640638221568757, "learning_rate": 1.564621145851436e-05, "loss": 0.1337, "step": 21366 }, { "epoch": 2.533736511324558, "grad_norm": 0.8377480761565995, "learning_rate": 1.564398548454194e-05, "loss": 0.1426, "step": 21367 }, { "epoch": 2.533855093086683, "grad_norm": 0.5361796297558525, "learning_rate": 1.5641759596820517e-05, "loss": 0.0835, "step": 21368 }, { "epoch": 2.5339736748488084, "grad_norm": 0.9823883848553014, "learning_rate": 1.5639533795370584e-05, "loss": 0.1114, "step": 21369 }, { "epoch": 2.534092256610933, "grad_norm": 0.5313059740724674, "learning_rate": 1.5637308080212677e-05, "loss": 0.0795, "step": 21370 }, { "epoch": 2.5342108383730584, "grad_norm": 0.7764688797809753, "learning_rate": 1.56350824513673e-05, "loss": 0.1257, "step": 21371 }, { "epoch": 2.534329420135183, "grad_norm": 0.6919005308959864, "learning_rate": 1.563285690885499e-05, "loss": 0.0828, "step": 21372 }, { "epoch": 2.5344480018973083, "grad_norm": 0.544993903860499, "learning_rate": 1.563063145269625e-05, "loss": 0.0736, "step": 21373 }, { "epoch": 2.534566583659433, "grad_norm": 0.5405379773721206, "learning_rate": 1.56284060829116e-05, "loss": 0.0854, "step": 21374 }, { "epoch": 2.5346851654215583, "grad_norm": 0.6989543644469031, "learning_rate": 1.5626180799521546e-05, "loss": 0.0908, "step": 21375 }, { "epoch": 2.534803747183683, "grad_norm": 0.7815316312278002, "learning_rate": 1.5623955602546613e-05, "loss": 0.1219, "step": 21376 }, { "epoch": 2.5349223289458083, "grad_norm": 0.5144071957960994, "learning_rate": 1.5621730492007312e-05, "loss": 0.0814, "step": 21377 }, { "epoch": 2.535040910707933, "grad_norm": 0.6321964247914796, "learning_rate": 1.5619505467924157e-05, "loss": 0.0996, "step": 21378 }, { "epoch": 2.5351594924700582, "grad_norm": 0.6857003285264767, "learning_rate": 1.5617280530317654e-05, "loss": 0.0919, "step": 21379 }, { "epoch": 2.535278074232183, "grad_norm": 0.4153169763339736, "learning_rate": 1.5615055679208327e-05, "loss": 0.0521, "step": 21380 }, { "epoch": 2.535396655994308, "grad_norm": 0.8701194181280495, "learning_rate": 1.561283091461668e-05, "loss": 0.117, "step": 21381 }, { "epoch": 2.535515237756433, "grad_norm": 1.1011608010275402, "learning_rate": 1.5610606236563203e-05, "loss": 0.1225, "step": 21382 }, { "epoch": 2.535633819518558, "grad_norm": 0.62425900057351, "learning_rate": 1.5608381645068437e-05, "loss": 0.0721, "step": 21383 }, { "epoch": 2.535752401280683, "grad_norm": 0.5114191792089512, "learning_rate": 1.5606157140152876e-05, "loss": 0.0679, "step": 21384 }, { "epoch": 2.535870983042808, "grad_norm": 0.493276936517542, "learning_rate": 1.5603932721837025e-05, "loss": 0.0611, "step": 21385 }, { "epoch": 2.535989564804933, "grad_norm": 0.7496334077889462, "learning_rate": 1.5601708390141385e-05, "loss": 0.0957, "step": 21386 }, { "epoch": 2.536108146567058, "grad_norm": 0.8543792878882609, "learning_rate": 1.5599484145086473e-05, "loss": 0.1426, "step": 21387 }, { "epoch": 2.536226728329183, "grad_norm": 0.5702387280409568, "learning_rate": 1.559725998669279e-05, "loss": 0.0759, "step": 21388 }, { "epoch": 2.536345310091308, "grad_norm": 0.6277846728364588, "learning_rate": 1.5595035914980844e-05, "loss": 0.0996, "step": 21389 }, { "epoch": 2.536463891853433, "grad_norm": 0.5883480082640337, "learning_rate": 1.559281192997112e-05, "loss": 0.0775, "step": 21390 }, { "epoch": 2.536582473615558, "grad_norm": 0.6124578368288347, "learning_rate": 1.559058803168415e-05, "loss": 0.0988, "step": 21391 }, { "epoch": 2.5367010553776828, "grad_norm": 0.5528245083018609, "learning_rate": 1.5588364220140417e-05, "loss": 0.0956, "step": 21392 }, { "epoch": 2.536819637139808, "grad_norm": 0.5001961901196859, "learning_rate": 1.558614049536041e-05, "loss": 0.0686, "step": 21393 }, { "epoch": 2.5369382189019327, "grad_norm": 0.5972809955739095, "learning_rate": 1.558391685736466e-05, "loss": 0.0774, "step": 21394 }, { "epoch": 2.537056800664058, "grad_norm": 0.6669966395763532, "learning_rate": 1.5581693306173646e-05, "loss": 0.1055, "step": 21395 }, { "epoch": 2.5371753824261827, "grad_norm": 0.713907845185839, "learning_rate": 1.5579469841807866e-05, "loss": 0.0996, "step": 21396 }, { "epoch": 2.537293964188308, "grad_norm": 0.46105404314145265, "learning_rate": 1.5577246464287825e-05, "loss": 0.0557, "step": 21397 }, { "epoch": 2.537412545950433, "grad_norm": 0.5420500525651486, "learning_rate": 1.557502317363401e-05, "loss": 0.0748, "step": 21398 }, { "epoch": 2.537531127712558, "grad_norm": 0.9311110346310092, "learning_rate": 1.5572799969866936e-05, "loss": 0.1051, "step": 21399 }, { "epoch": 2.5376497094746826, "grad_norm": 0.4657393539884365, "learning_rate": 1.557057685300708e-05, "loss": 0.069, "step": 21400 }, { "epoch": 2.537768291236808, "grad_norm": 0.8134063879912088, "learning_rate": 1.556835382307494e-05, "loss": 0.1028, "step": 21401 }, { "epoch": 2.537886872998933, "grad_norm": 0.9631775927323866, "learning_rate": 1.5566130880091012e-05, "loss": 0.1201, "step": 21402 }, { "epoch": 2.5380054547610578, "grad_norm": 0.7152505155982368, "learning_rate": 1.5563908024075797e-05, "loss": 0.0933, "step": 21403 }, { "epoch": 2.5381240365231825, "grad_norm": 0.7994502238370568, "learning_rate": 1.5561685255049775e-05, "loss": 0.1227, "step": 21404 }, { "epoch": 2.5382426182853077, "grad_norm": 1.0339864605388562, "learning_rate": 1.555946257303343e-05, "loss": 0.1428, "step": 21405 }, { "epoch": 2.538361200047433, "grad_norm": 0.7414231040154973, "learning_rate": 1.5557239978047274e-05, "loss": 0.0751, "step": 21406 }, { "epoch": 2.5384797818095577, "grad_norm": 0.9259074107430612, "learning_rate": 1.5555017470111782e-05, "loss": 0.1588, "step": 21407 }, { "epoch": 2.5385983635716824, "grad_norm": 0.7932805087623734, "learning_rate": 1.555279504924745e-05, "loss": 0.0987, "step": 21408 }, { "epoch": 2.5387169453338077, "grad_norm": 0.8855268781604224, "learning_rate": 1.5550572715474754e-05, "loss": 0.1438, "step": 21409 }, { "epoch": 2.538835527095933, "grad_norm": 0.9026828476576046, "learning_rate": 1.5548350468814198e-05, "loss": 0.1254, "step": 21410 }, { "epoch": 2.5389541088580576, "grad_norm": 0.9209641152021051, "learning_rate": 1.5546128309286264e-05, "loss": 0.0957, "step": 21411 }, { "epoch": 2.5390726906201824, "grad_norm": 0.758288466936238, "learning_rate": 1.5543906236911424e-05, "loss": 0.1083, "step": 21412 }, { "epoch": 2.5391912723823076, "grad_norm": 0.48787556222178047, "learning_rate": 1.5541684251710182e-05, "loss": 0.062, "step": 21413 }, { "epoch": 2.5393098541444328, "grad_norm": 0.7094957488320865, "learning_rate": 1.553946235370301e-05, "loss": 0.1158, "step": 21414 }, { "epoch": 2.5394284359065575, "grad_norm": 0.7280227640118505, "learning_rate": 1.5537240542910396e-05, "loss": 0.0916, "step": 21415 }, { "epoch": 2.5395470176686823, "grad_norm": 0.6456939860591353, "learning_rate": 1.553501881935281e-05, "loss": 0.0911, "step": 21416 }, { "epoch": 2.5396655994308075, "grad_norm": 0.6817602383441491, "learning_rate": 1.5532797183050752e-05, "loss": 0.1093, "step": 21417 }, { "epoch": 2.5397841811929327, "grad_norm": 0.8174861987319114, "learning_rate": 1.553057563402469e-05, "loss": 0.1081, "step": 21418 }, { "epoch": 2.5399027629550575, "grad_norm": 0.5869234126782907, "learning_rate": 1.5528354172295114e-05, "loss": 0.0744, "step": 21419 }, { "epoch": 2.5400213447171827, "grad_norm": 0.6361858583171346, "learning_rate": 1.5526132797882486e-05, "loss": 0.0651, "step": 21420 }, { "epoch": 2.5401399264793074, "grad_norm": 0.5402051189483245, "learning_rate": 1.552391151080731e-05, "loss": 0.0761, "step": 21421 }, { "epoch": 2.5402585082414326, "grad_norm": 0.6763458230637077, "learning_rate": 1.5521690311090048e-05, "loss": 0.1024, "step": 21422 }, { "epoch": 2.5403770900035574, "grad_norm": 0.7096811962777245, "learning_rate": 1.5519469198751166e-05, "loss": 0.0933, "step": 21423 }, { "epoch": 2.5404956717656826, "grad_norm": 0.9749822315615653, "learning_rate": 1.5517248173811163e-05, "loss": 0.1364, "step": 21424 }, { "epoch": 2.5406142535278073, "grad_norm": 0.5835655812161914, "learning_rate": 1.5515027236290503e-05, "loss": 0.0977, "step": 21425 }, { "epoch": 2.5407328352899325, "grad_norm": 0.5919165998638725, "learning_rate": 1.5512806386209658e-05, "loss": 0.0911, "step": 21426 }, { "epoch": 2.5408514170520573, "grad_norm": 0.662203739211671, "learning_rate": 1.55105856235891e-05, "loss": 0.1059, "step": 21427 }, { "epoch": 2.5409699988141825, "grad_norm": 0.5889288540780524, "learning_rate": 1.550836494844931e-05, "loss": 0.0782, "step": 21428 }, { "epoch": 2.5410885805763073, "grad_norm": 0.5725810396661768, "learning_rate": 1.550614436081076e-05, "loss": 0.0763, "step": 21429 }, { "epoch": 2.5412071623384325, "grad_norm": 0.6387576091701502, "learning_rate": 1.5503923860693917e-05, "loss": 0.1007, "step": 21430 }, { "epoch": 2.5413257441005572, "grad_norm": 0.7429378169660595, "learning_rate": 1.550170344811924e-05, "loss": 0.1034, "step": 21431 }, { "epoch": 2.5414443258626824, "grad_norm": 0.6366780073179603, "learning_rate": 1.549948312310722e-05, "loss": 0.1024, "step": 21432 }, { "epoch": 2.541562907624807, "grad_norm": 0.9647917424638454, "learning_rate": 1.5497262885678315e-05, "loss": 0.1033, "step": 21433 }, { "epoch": 2.5416814893869324, "grad_norm": 0.558587641076497, "learning_rate": 1.5495042735852983e-05, "loss": 0.0784, "step": 21434 }, { "epoch": 2.541800071149057, "grad_norm": 0.5147839030478643, "learning_rate": 1.5492822673651712e-05, "loss": 0.0755, "step": 21435 }, { "epoch": 2.5419186529111824, "grad_norm": 0.5052937601392505, "learning_rate": 1.549060269909496e-05, "loss": 0.0734, "step": 21436 }, { "epoch": 2.542037234673307, "grad_norm": 0.6818045785775417, "learning_rate": 1.548838281220318e-05, "loss": 0.0879, "step": 21437 }, { "epoch": 2.5421558164354323, "grad_norm": 0.6562784640422374, "learning_rate": 1.548616301299685e-05, "loss": 0.0879, "step": 21438 }, { "epoch": 2.542274398197557, "grad_norm": 1.0696565845285737, "learning_rate": 1.5483943301496428e-05, "loss": 0.1381, "step": 21439 }, { "epoch": 2.5423929799596823, "grad_norm": 0.6283801722724008, "learning_rate": 1.548172367772239e-05, "loss": 0.0894, "step": 21440 }, { "epoch": 2.542511561721807, "grad_norm": 0.843453889689084, "learning_rate": 1.547950414169518e-05, "loss": 0.1094, "step": 21441 }, { "epoch": 2.5426301434839322, "grad_norm": 1.1521712014606824, "learning_rate": 1.5477284693435257e-05, "loss": 0.1656, "step": 21442 }, { "epoch": 2.542748725246057, "grad_norm": 0.45660154182440227, "learning_rate": 1.5475065332963106e-05, "loss": 0.0822, "step": 21443 }, { "epoch": 2.542867307008182, "grad_norm": 0.7739344402713508, "learning_rate": 1.5472846060299172e-05, "loss": 0.0998, "step": 21444 }, { "epoch": 2.542985888770307, "grad_norm": 0.6289935742838848, "learning_rate": 1.547062687546391e-05, "loss": 0.0859, "step": 21445 }, { "epoch": 2.543104470532432, "grad_norm": 0.644146346376722, "learning_rate": 1.5468407778477778e-05, "loss": 0.094, "step": 21446 }, { "epoch": 2.5432230522945574, "grad_norm": 0.9789036341628862, "learning_rate": 1.5466188769361244e-05, "loss": 0.1321, "step": 21447 }, { "epoch": 2.543341634056682, "grad_norm": 0.8966140991627146, "learning_rate": 1.5463969848134756e-05, "loss": 0.1209, "step": 21448 }, { "epoch": 2.543460215818807, "grad_norm": 0.6114925988228893, "learning_rate": 1.5461751014818775e-05, "loss": 0.0679, "step": 21449 }, { "epoch": 2.543578797580932, "grad_norm": 0.5033737544656481, "learning_rate": 1.5459532269433748e-05, "loss": 0.061, "step": 21450 }, { "epoch": 2.5436973793430573, "grad_norm": 0.6233589067309314, "learning_rate": 1.545731361200014e-05, "loss": 0.0851, "step": 21451 }, { "epoch": 2.543815961105182, "grad_norm": 0.524864858780048, "learning_rate": 1.5455095042538398e-05, "loss": 0.0695, "step": 21452 }, { "epoch": 2.543934542867307, "grad_norm": 0.6301121761405246, "learning_rate": 1.545287656106897e-05, "loss": 0.1054, "step": 21453 }, { "epoch": 2.544053124629432, "grad_norm": 0.7239488665118252, "learning_rate": 1.5450658167612316e-05, "loss": 0.1002, "step": 21454 }, { "epoch": 2.544171706391557, "grad_norm": 0.8667762040345961, "learning_rate": 1.5448439862188883e-05, "loss": 0.1026, "step": 21455 }, { "epoch": 2.544290288153682, "grad_norm": 0.7148979238998129, "learning_rate": 1.5446221644819124e-05, "loss": 0.093, "step": 21456 }, { "epoch": 2.5444088699158067, "grad_norm": 0.5987976989961494, "learning_rate": 1.544400351552348e-05, "loss": 0.0864, "step": 21457 }, { "epoch": 2.544527451677932, "grad_norm": 0.8563346723131567, "learning_rate": 1.5441785474322405e-05, "loss": 0.1164, "step": 21458 }, { "epoch": 2.544646033440057, "grad_norm": 0.6076988941050389, "learning_rate": 1.5439567521236358e-05, "loss": 0.0709, "step": 21459 }, { "epoch": 2.544764615202182, "grad_norm": 0.48474161477327726, "learning_rate": 1.5437349656285772e-05, "loss": 0.0779, "step": 21460 }, { "epoch": 2.5448831969643066, "grad_norm": 0.6433545354337432, "learning_rate": 1.5435131879491083e-05, "loss": 0.0803, "step": 21461 }, { "epoch": 2.545001778726432, "grad_norm": 0.38199027031267835, "learning_rate": 1.5432914190872757e-05, "loss": 0.0453, "step": 21462 }, { "epoch": 2.545120360488557, "grad_norm": 0.7789196129436371, "learning_rate": 1.5430696590451235e-05, "loss": 0.0753, "step": 21463 }, { "epoch": 2.545238942250682, "grad_norm": 0.8602037024729732, "learning_rate": 1.5428479078246944e-05, "loss": 0.0913, "step": 21464 }, { "epoch": 2.5453575240128066, "grad_norm": 0.6951208287350019, "learning_rate": 1.542626165428035e-05, "loss": 0.1, "step": 21465 }, { "epoch": 2.5454761057749318, "grad_norm": 0.7912608544861333, "learning_rate": 1.5424044318571877e-05, "loss": 0.1153, "step": 21466 }, { "epoch": 2.545594687537057, "grad_norm": 1.3073424004298193, "learning_rate": 1.5421827071141975e-05, "loss": 0.1364, "step": 21467 }, { "epoch": 2.5457132692991817, "grad_norm": 0.5463785718416431, "learning_rate": 1.541960991201108e-05, "loss": 0.0915, "step": 21468 }, { "epoch": 2.5458318510613065, "grad_norm": 0.5173562358048313, "learning_rate": 1.5417392841199632e-05, "loss": 0.0584, "step": 21469 }, { "epoch": 2.5459504328234317, "grad_norm": 1.1440123620017242, "learning_rate": 1.5415175858728076e-05, "loss": 0.1247, "step": 21470 }, { "epoch": 2.546069014585557, "grad_norm": 0.5773117610614737, "learning_rate": 1.5412958964616846e-05, "loss": 0.0834, "step": 21471 }, { "epoch": 2.5461875963476817, "grad_norm": 0.5004584238614178, "learning_rate": 1.541074215888637e-05, "loss": 0.0656, "step": 21472 }, { "epoch": 2.546306178109807, "grad_norm": 0.4580511828382621, "learning_rate": 1.5408525441557098e-05, "loss": 0.0666, "step": 21473 }, { "epoch": 2.5464247598719316, "grad_norm": 0.6678188521107324, "learning_rate": 1.540630881264946e-05, "loss": 0.0827, "step": 21474 }, { "epoch": 2.546543341634057, "grad_norm": 0.9138976732676265, "learning_rate": 1.540409227218389e-05, "loss": 0.1249, "step": 21475 }, { "epoch": 2.5466619233961816, "grad_norm": 0.499510770625477, "learning_rate": 1.5401875820180818e-05, "loss": 0.0755, "step": 21476 }, { "epoch": 2.546780505158307, "grad_norm": 0.6844799929605587, "learning_rate": 1.5399659456660682e-05, "loss": 0.0847, "step": 21477 }, { "epoch": 2.5468990869204315, "grad_norm": 0.7893369095330867, "learning_rate": 1.5397443181643915e-05, "loss": 0.1134, "step": 21478 }, { "epoch": 2.5470176686825567, "grad_norm": 0.6559199531640769, "learning_rate": 1.539522699515094e-05, "loss": 0.0902, "step": 21479 }, { "epoch": 2.5471362504446815, "grad_norm": 0.5614910780279838, "learning_rate": 1.5393010897202204e-05, "loss": 0.0683, "step": 21480 }, { "epoch": 2.5472548322068067, "grad_norm": 0.7058706454508963, "learning_rate": 1.5390794887818124e-05, "loss": 0.0839, "step": 21481 }, { "epoch": 2.5473734139689315, "grad_norm": 0.6620980111975824, "learning_rate": 1.5388578967019135e-05, "loss": 0.1026, "step": 21482 }, { "epoch": 2.5474919957310567, "grad_norm": 0.7335341191367418, "learning_rate": 1.5386363134825647e-05, "loss": 0.0899, "step": 21483 }, { "epoch": 2.5476105774931814, "grad_norm": 0.5748404841842454, "learning_rate": 1.5384147391258117e-05, "loss": 0.0666, "step": 21484 }, { "epoch": 2.5477291592553066, "grad_norm": 0.956147980821703, "learning_rate": 1.5381931736336953e-05, "loss": 0.1263, "step": 21485 }, { "epoch": 2.5478477410174314, "grad_norm": 0.6493586498898836, "learning_rate": 1.537971617008258e-05, "loss": 0.0819, "step": 21486 }, { "epoch": 2.5479663227795566, "grad_norm": 0.5636166612439507, "learning_rate": 1.5377500692515428e-05, "loss": 0.0787, "step": 21487 }, { "epoch": 2.5480849045416814, "grad_norm": 0.6431838170284558, "learning_rate": 1.537528530365592e-05, "loss": 0.0763, "step": 21488 }, { "epoch": 2.5482034863038066, "grad_norm": 0.7746290430475276, "learning_rate": 1.5373070003524482e-05, "loss": 0.0946, "step": 21489 }, { "epoch": 2.5483220680659313, "grad_norm": 0.7160616558953101, "learning_rate": 1.5370854792141532e-05, "loss": 0.0871, "step": 21490 }, { "epoch": 2.5484406498280565, "grad_norm": 0.5184570047247066, "learning_rate": 1.5368639669527483e-05, "loss": 0.0783, "step": 21491 }, { "epoch": 2.5485592315901813, "grad_norm": 0.8687831063111374, "learning_rate": 1.5366424635702775e-05, "loss": 0.103, "step": 21492 }, { "epoch": 2.5486778133523065, "grad_norm": 0.524412475977883, "learning_rate": 1.536420969068782e-05, "loss": 0.0667, "step": 21493 }, { "epoch": 2.5487963951144312, "grad_norm": 1.1075373021684356, "learning_rate": 1.536199483450303e-05, "loss": 0.1765, "step": 21494 }, { "epoch": 2.5489149768765564, "grad_norm": 0.5227129131950715, "learning_rate": 1.5359780067168832e-05, "loss": 0.0766, "step": 21495 }, { "epoch": 2.5490335586386816, "grad_norm": 0.46669929946426936, "learning_rate": 1.535756538870564e-05, "loss": 0.063, "step": 21496 }, { "epoch": 2.5491521404008064, "grad_norm": 0.9826810859167289, "learning_rate": 1.5355350799133873e-05, "loss": 0.1073, "step": 21497 }, { "epoch": 2.549270722162931, "grad_norm": 0.549185901708805, "learning_rate": 1.535313629847394e-05, "loss": 0.0775, "step": 21498 }, { "epoch": 2.5493893039250564, "grad_norm": 0.5628427233800181, "learning_rate": 1.535092188674626e-05, "loss": 0.066, "step": 21499 }, { "epoch": 2.5495078856871816, "grad_norm": 0.5671079796931414, "learning_rate": 1.534870756397125e-05, "loss": 0.083, "step": 21500 }, { "epoch": 2.5496264674493063, "grad_norm": 0.600349483300442, "learning_rate": 1.5346493330169327e-05, "loss": 0.0864, "step": 21501 }, { "epoch": 2.549745049211431, "grad_norm": 0.9406665216622622, "learning_rate": 1.5344279185360883e-05, "loss": 0.1381, "step": 21502 }, { "epoch": 2.5498636309735563, "grad_norm": 0.7734731884328327, "learning_rate": 1.534206512956636e-05, "loss": 0.0941, "step": 21503 }, { "epoch": 2.5499822127356815, "grad_norm": 0.4973297547910033, "learning_rate": 1.5339851162806147e-05, "loss": 0.0677, "step": 21504 }, { "epoch": 2.5501007944978062, "grad_norm": 0.5435677813430415, "learning_rate": 1.5337637285100653e-05, "loss": 0.0783, "step": 21505 }, { "epoch": 2.550219376259931, "grad_norm": 0.8005652512421672, "learning_rate": 1.5335423496470302e-05, "loss": 0.096, "step": 21506 }, { "epoch": 2.550337958022056, "grad_norm": 0.6899938943839169, "learning_rate": 1.5333209796935494e-05, "loss": 0.0875, "step": 21507 }, { "epoch": 2.5504565397841814, "grad_norm": 0.6756843285357619, "learning_rate": 1.533099618651664e-05, "loss": 0.0827, "step": 21508 }, { "epoch": 2.550575121546306, "grad_norm": 0.5848270113345713, "learning_rate": 1.5328782665234132e-05, "loss": 0.0771, "step": 21509 }, { "epoch": 2.550693703308431, "grad_norm": 0.7539631275597056, "learning_rate": 1.5326569233108402e-05, "loss": 0.0928, "step": 21510 }, { "epoch": 2.550812285070556, "grad_norm": 0.7484594525889698, "learning_rate": 1.5324355890159842e-05, "loss": 0.0958, "step": 21511 }, { "epoch": 2.5509308668326813, "grad_norm": 1.4495184427474699, "learning_rate": 1.5322142636408855e-05, "loss": 0.1372, "step": 21512 }, { "epoch": 2.551049448594806, "grad_norm": 0.4575057816101567, "learning_rate": 1.5319929471875832e-05, "loss": 0.0721, "step": 21513 }, { "epoch": 2.551168030356931, "grad_norm": 0.6731440760318099, "learning_rate": 1.53177163965812e-05, "loss": 0.0861, "step": 21514 }, { "epoch": 2.551286612119056, "grad_norm": 0.9305714541728859, "learning_rate": 1.531550341054535e-05, "loss": 0.1016, "step": 21515 }, { "epoch": 2.5514051938811813, "grad_norm": 0.6599011524908213, "learning_rate": 1.531329051378868e-05, "loss": 0.0834, "step": 21516 }, { "epoch": 2.551523775643306, "grad_norm": 0.6787023871294922, "learning_rate": 1.531107770633159e-05, "loss": 0.1017, "step": 21517 }, { "epoch": 2.5516423574054308, "grad_norm": 0.49962345968484945, "learning_rate": 1.530886498819448e-05, "loss": 0.0678, "step": 21518 }, { "epoch": 2.551760939167556, "grad_norm": 0.5638828639428904, "learning_rate": 1.530665235939776e-05, "loss": 0.0683, "step": 21519 }, { "epoch": 2.551879520929681, "grad_norm": 0.6929171020453118, "learning_rate": 1.5304439819961807e-05, "loss": 0.1081, "step": 21520 }, { "epoch": 2.551998102691806, "grad_norm": 0.5746706264906336, "learning_rate": 1.530222736990704e-05, "loss": 0.0686, "step": 21521 }, { "epoch": 2.552116684453931, "grad_norm": 0.6234282627737747, "learning_rate": 1.5300015009253842e-05, "loss": 0.0825, "step": 21522 }, { "epoch": 2.552235266216056, "grad_norm": 0.615533535388, "learning_rate": 1.5297802738022616e-05, "loss": 0.0889, "step": 21523 }, { "epoch": 2.552353847978181, "grad_norm": 0.4376768892716821, "learning_rate": 1.5295590556233735e-05, "loss": 0.0586, "step": 21524 }, { "epoch": 2.552472429740306, "grad_norm": 0.856561450005793, "learning_rate": 1.5293378463907624e-05, "loss": 0.1424, "step": 21525 }, { "epoch": 2.552591011502431, "grad_norm": 0.6097963834290009, "learning_rate": 1.529116646106466e-05, "loss": 0.0993, "step": 21526 }, { "epoch": 2.552709593264556, "grad_norm": 0.529370514111545, "learning_rate": 1.5288954547725225e-05, "loss": 0.0626, "step": 21527 }, { "epoch": 2.552828175026681, "grad_norm": 0.5202038813083469, "learning_rate": 1.5286742723909724e-05, "loss": 0.0791, "step": 21528 }, { "epoch": 2.552946756788806, "grad_norm": 0.4763423986612556, "learning_rate": 1.5284530989638545e-05, "loss": 0.0634, "step": 21529 }, { "epoch": 2.553065338550931, "grad_norm": 1.1495245062671344, "learning_rate": 1.528231934493208e-05, "loss": 0.1227, "step": 21530 }, { "epoch": 2.5531839203130557, "grad_norm": 0.7378088365454362, "learning_rate": 1.5280107789810717e-05, "loss": 0.1023, "step": 21531 }, { "epoch": 2.553302502075181, "grad_norm": 0.7000303954421144, "learning_rate": 1.5277896324294828e-05, "loss": 0.0864, "step": 21532 }, { "epoch": 2.5534210838373057, "grad_norm": 0.6317022533748192, "learning_rate": 1.5275684948404823e-05, "loss": 0.0903, "step": 21533 }, { "epoch": 2.553539665599431, "grad_norm": 0.6750427146372631, "learning_rate": 1.5273473662161076e-05, "loss": 0.0905, "step": 21534 }, { "epoch": 2.5536582473615557, "grad_norm": 0.801380869294865, "learning_rate": 1.527126246558397e-05, "loss": 0.0872, "step": 21535 }, { "epoch": 2.553776829123681, "grad_norm": 0.4344270657209867, "learning_rate": 1.5269051358693898e-05, "loss": 0.0639, "step": 21536 }, { "epoch": 2.5538954108858056, "grad_norm": 0.8117982343733693, "learning_rate": 1.5266840341511233e-05, "loss": 0.1041, "step": 21537 }, { "epoch": 2.554013992647931, "grad_norm": 0.747367452759796, "learning_rate": 1.5264629414056375e-05, "loss": 0.1041, "step": 21538 }, { "epoch": 2.5541325744100556, "grad_norm": 0.6489958032485993, "learning_rate": 1.5262418576349685e-05, "loss": 0.0632, "step": 21539 }, { "epoch": 2.554251156172181, "grad_norm": 0.5033593392146642, "learning_rate": 1.5260207828411564e-05, "loss": 0.086, "step": 21540 }, { "epoch": 2.5543697379343056, "grad_norm": 0.5547911340090377, "learning_rate": 1.5257997170262383e-05, "loss": 0.0791, "step": 21541 }, { "epoch": 2.5544883196964308, "grad_norm": 0.817274504252948, "learning_rate": 1.5255786601922523e-05, "loss": 0.1085, "step": 21542 }, { "epoch": 2.5546069014585555, "grad_norm": 0.48362533508957767, "learning_rate": 1.525357612341235e-05, "loss": 0.0756, "step": 21543 }, { "epoch": 2.5547254832206807, "grad_norm": 0.4849448539412303, "learning_rate": 1.5251365734752262e-05, "loss": 0.073, "step": 21544 }, { "epoch": 2.554844064982806, "grad_norm": 0.6699229826309621, "learning_rate": 1.5249155435962632e-05, "loss": 0.0793, "step": 21545 }, { "epoch": 2.5549626467449307, "grad_norm": 0.9567447350571209, "learning_rate": 1.5246945227063818e-05, "loss": 0.1037, "step": 21546 }, { "epoch": 2.5550812285070554, "grad_norm": 0.7191741995384846, "learning_rate": 1.5244735108076216e-05, "loss": 0.0727, "step": 21547 }, { "epoch": 2.5551998102691806, "grad_norm": 0.7371644389664014, "learning_rate": 1.5242525079020192e-05, "loss": 0.0979, "step": 21548 }, { "epoch": 2.555318392031306, "grad_norm": 0.45144490793855985, "learning_rate": 1.5240315139916128e-05, "loss": 0.0508, "step": 21549 }, { "epoch": 2.5554369737934306, "grad_norm": 0.5353253455466163, "learning_rate": 1.5238105290784377e-05, "loss": 0.0663, "step": 21550 }, { "epoch": 2.5555555555555554, "grad_norm": 0.6751711314775343, "learning_rate": 1.5235895531645336e-05, "loss": 0.1007, "step": 21551 }, { "epoch": 2.5556741373176806, "grad_norm": 0.6436186272622699, "learning_rate": 1.5233685862519364e-05, "loss": 0.0837, "step": 21552 }, { "epoch": 2.5557927190798058, "grad_norm": 0.5608886882365024, "learning_rate": 1.5231476283426832e-05, "loss": 0.0879, "step": 21553 }, { "epoch": 2.5559113008419305, "grad_norm": 0.71393162187896, "learning_rate": 1.5229266794388103e-05, "loss": 0.1135, "step": 21554 }, { "epoch": 2.5560298826040553, "grad_norm": 0.5518071095610267, "learning_rate": 1.5227057395423558e-05, "loss": 0.0845, "step": 21555 }, { "epoch": 2.5561484643661805, "grad_norm": 0.8540694201828075, "learning_rate": 1.5224848086553555e-05, "loss": 0.1272, "step": 21556 }, { "epoch": 2.5562670461283057, "grad_norm": 0.7786594827888234, "learning_rate": 1.5222638867798473e-05, "loss": 0.0891, "step": 21557 }, { "epoch": 2.5563856278904304, "grad_norm": 0.8172627050172733, "learning_rate": 1.522042973917866e-05, "loss": 0.1143, "step": 21558 }, { "epoch": 2.556504209652555, "grad_norm": 1.0522725207218766, "learning_rate": 1.5218220700714503e-05, "loss": 0.1301, "step": 21559 }, { "epoch": 2.5566227914146804, "grad_norm": 0.6218741935041552, "learning_rate": 1.5216011752426355e-05, "loss": 0.0869, "step": 21560 }, { "epoch": 2.5567413731768056, "grad_norm": 0.6670783372953689, "learning_rate": 1.5213802894334578e-05, "loss": 0.0885, "step": 21561 }, { "epoch": 2.5568599549389304, "grad_norm": 0.8533189327687157, "learning_rate": 1.5211594126459533e-05, "loss": 0.0964, "step": 21562 }, { "epoch": 2.556978536701055, "grad_norm": 0.47046287925409436, "learning_rate": 1.5209385448821592e-05, "loss": 0.061, "step": 21563 }, { "epoch": 2.5570971184631803, "grad_norm": 0.7877904224778446, "learning_rate": 1.5207176861441113e-05, "loss": 0.1373, "step": 21564 }, { "epoch": 2.5572157002253055, "grad_norm": 0.5656624633846677, "learning_rate": 1.5204968364338446e-05, "loss": 0.082, "step": 21565 }, { "epoch": 2.5573342819874303, "grad_norm": 0.5718776034419393, "learning_rate": 1.5202759957533968e-05, "loss": 0.0789, "step": 21566 }, { "epoch": 2.557452863749555, "grad_norm": 0.802213385299035, "learning_rate": 1.5200551641048022e-05, "loss": 0.1035, "step": 21567 }, { "epoch": 2.5575714455116803, "grad_norm": 0.5474683267121736, "learning_rate": 1.519834341490098e-05, "loss": 0.0757, "step": 21568 }, { "epoch": 2.5576900272738055, "grad_norm": 0.6456820387436741, "learning_rate": 1.5196135279113183e-05, "loss": 0.0914, "step": 21569 }, { "epoch": 2.55780860903593, "grad_norm": 0.49256358254202215, "learning_rate": 1.5193927233705007e-05, "loss": 0.0609, "step": 21570 }, { "epoch": 2.5579271907980554, "grad_norm": 0.5554933670644486, "learning_rate": 1.5191719278696795e-05, "loss": 0.0877, "step": 21571 }, { "epoch": 2.55804577256018, "grad_norm": 0.4525755948239112, "learning_rate": 1.5189511414108903e-05, "loss": 0.0681, "step": 21572 }, { "epoch": 2.5581643543223054, "grad_norm": 0.9855266267066317, "learning_rate": 1.5187303639961678e-05, "loss": 0.1278, "step": 21573 }, { "epoch": 2.55828293608443, "grad_norm": 0.6287277273350552, "learning_rate": 1.5185095956275489e-05, "loss": 0.0874, "step": 21574 }, { "epoch": 2.5584015178465553, "grad_norm": 0.8867984031848025, "learning_rate": 1.518288836307068e-05, "loss": 0.138, "step": 21575 }, { "epoch": 2.55852009960868, "grad_norm": 0.7649660844028009, "learning_rate": 1.5180680860367594e-05, "loss": 0.1038, "step": 21576 }, { "epoch": 2.5586386813708053, "grad_norm": 0.5521774311611686, "learning_rate": 1.5178473448186593e-05, "loss": 0.0716, "step": 21577 }, { "epoch": 2.55875726313293, "grad_norm": 0.7502675002995498, "learning_rate": 1.517626612654802e-05, "loss": 0.1062, "step": 21578 }, { "epoch": 2.5588758448950553, "grad_norm": 0.7428038468458193, "learning_rate": 1.5174058895472232e-05, "loss": 0.0983, "step": 21579 }, { "epoch": 2.55899442665718, "grad_norm": 0.47134660894857666, "learning_rate": 1.5171851754979564e-05, "loss": 0.0556, "step": 21580 }, { "epoch": 2.5591130084193052, "grad_norm": 0.4452400363851594, "learning_rate": 1.5169644705090378e-05, "loss": 0.0595, "step": 21581 }, { "epoch": 2.55923159018143, "grad_norm": 0.8087707627688798, "learning_rate": 1.5167437745825013e-05, "loss": 0.0928, "step": 21582 }, { "epoch": 2.559350171943555, "grad_norm": 0.9393552633573798, "learning_rate": 1.5165230877203817e-05, "loss": 0.1254, "step": 21583 }, { "epoch": 2.55946875370568, "grad_norm": 0.75587454669452, "learning_rate": 1.5163024099247119e-05, "loss": 0.1183, "step": 21584 }, { "epoch": 2.559587335467805, "grad_norm": 0.8666801441090518, "learning_rate": 1.5160817411975286e-05, "loss": 0.0925, "step": 21585 }, { "epoch": 2.55970591722993, "grad_norm": 0.508309015728604, "learning_rate": 1.515861081540865e-05, "loss": 0.0602, "step": 21586 }, { "epoch": 2.559824498992055, "grad_norm": 0.5378086757683902, "learning_rate": 1.5156404309567558e-05, "loss": 0.0886, "step": 21587 }, { "epoch": 2.55994308075418, "grad_norm": 0.8886868812189218, "learning_rate": 1.5154197894472333e-05, "loss": 0.1111, "step": 21588 }, { "epoch": 2.560061662516305, "grad_norm": 0.6058173741040422, "learning_rate": 1.5151991570143342e-05, "loss": 0.0842, "step": 21589 }, { "epoch": 2.56018024427843, "grad_norm": 0.5930147362267632, "learning_rate": 1.5149785336600913e-05, "loss": 0.0719, "step": 21590 }, { "epoch": 2.560298826040555, "grad_norm": 0.7174754996131719, "learning_rate": 1.5147579193865374e-05, "loss": 0.0915, "step": 21591 }, { "epoch": 2.56041740780268, "grad_norm": 0.7454602225265462, "learning_rate": 1.5145373141957084e-05, "loss": 0.1141, "step": 21592 }, { "epoch": 2.560535989564805, "grad_norm": 0.7526547527083474, "learning_rate": 1.5143167180896367e-05, "loss": 0.1239, "step": 21593 }, { "epoch": 2.56065457132693, "grad_norm": 0.60874423813648, "learning_rate": 1.5140961310703562e-05, "loss": 0.0827, "step": 21594 }, { "epoch": 2.560773153089055, "grad_norm": 0.7418962254164355, "learning_rate": 1.5138755531398996e-05, "loss": 0.09, "step": 21595 }, { "epoch": 2.5608917348511797, "grad_norm": 0.5761304807459101, "learning_rate": 1.5136549843003017e-05, "loss": 0.072, "step": 21596 }, { "epoch": 2.561010316613305, "grad_norm": 0.5189049548131666, "learning_rate": 1.5134344245535953e-05, "loss": 0.0703, "step": 21597 }, { "epoch": 2.56112889837543, "grad_norm": 0.7133618923613796, "learning_rate": 1.5132138739018136e-05, "loss": 0.0897, "step": 21598 }, { "epoch": 2.561247480137555, "grad_norm": 0.5076156837825103, "learning_rate": 1.5129933323469895e-05, "loss": 0.0627, "step": 21599 }, { "epoch": 2.5613660618996796, "grad_norm": 0.4840020260479851, "learning_rate": 1.5127727998911572e-05, "loss": 0.0668, "step": 21600 }, { "epoch": 2.561484643661805, "grad_norm": 0.583807374840249, "learning_rate": 1.5125522765363492e-05, "loss": 0.078, "step": 21601 }, { "epoch": 2.56160322542393, "grad_norm": 0.7137320991535598, "learning_rate": 1.512331762284598e-05, "loss": 0.0922, "step": 21602 }, { "epoch": 2.561721807186055, "grad_norm": 0.7249578771701454, "learning_rate": 1.5121112571379362e-05, "loss": 0.1105, "step": 21603 }, { "epoch": 2.5618403889481796, "grad_norm": 0.4693733682727568, "learning_rate": 1.5118907610983978e-05, "loss": 0.0649, "step": 21604 }, { "epoch": 2.5619589707103048, "grad_norm": 0.8654648798780028, "learning_rate": 1.511670274168015e-05, "loss": 0.0888, "step": 21605 }, { "epoch": 2.56207755247243, "grad_norm": 0.41193765234083096, "learning_rate": 1.5114497963488198e-05, "loss": 0.0667, "step": 21606 }, { "epoch": 2.5621961342345547, "grad_norm": 0.7793662448730261, "learning_rate": 1.5112293276428452e-05, "loss": 0.1249, "step": 21607 }, { "epoch": 2.5623147159966795, "grad_norm": 0.741628239750056, "learning_rate": 1.5110088680521245e-05, "loss": 0.1322, "step": 21608 }, { "epoch": 2.5624332977588047, "grad_norm": 0.694177962820042, "learning_rate": 1.510788417578689e-05, "loss": 0.0814, "step": 21609 }, { "epoch": 2.56255187952093, "grad_norm": 0.5854668993509184, "learning_rate": 1.5105679762245706e-05, "loss": 0.0923, "step": 21610 }, { "epoch": 2.5626704612830546, "grad_norm": 0.5948354501133889, "learning_rate": 1.5103475439918027e-05, "loss": 0.0742, "step": 21611 }, { "epoch": 2.5627890430451794, "grad_norm": 1.0591606325544272, "learning_rate": 1.5101271208824168e-05, "loss": 0.1376, "step": 21612 }, { "epoch": 2.5629076248073046, "grad_norm": 0.6427837584769859, "learning_rate": 1.5099067068984454e-05, "loss": 0.087, "step": 21613 }, { "epoch": 2.56302620656943, "grad_norm": 0.9129566004494236, "learning_rate": 1.509686302041919e-05, "loss": 0.0492, "step": 21614 }, { "epoch": 2.5631447883315546, "grad_norm": 0.7529971953263135, "learning_rate": 1.5094659063148714e-05, "loss": 0.0957, "step": 21615 }, { "epoch": 2.5632633700936793, "grad_norm": 0.8433332771390282, "learning_rate": 1.5092455197193328e-05, "loss": 0.1018, "step": 21616 }, { "epoch": 2.5633819518558045, "grad_norm": 0.8250739196398834, "learning_rate": 1.509025142257336e-05, "loss": 0.1262, "step": 21617 }, { "epoch": 2.5635005336179297, "grad_norm": 0.6312412098319896, "learning_rate": 1.5088047739309114e-05, "loss": 0.0713, "step": 21618 }, { "epoch": 2.5636191153800545, "grad_norm": 0.8555915671576594, "learning_rate": 1.5085844147420925e-05, "loss": 0.1051, "step": 21619 }, { "epoch": 2.5637376971421797, "grad_norm": 0.5956735273817276, "learning_rate": 1.508364064692909e-05, "loss": 0.0891, "step": 21620 }, { "epoch": 2.5638562789043045, "grad_norm": 0.6142613232801595, "learning_rate": 1.5081437237853921e-05, "loss": 0.0614, "step": 21621 }, { "epoch": 2.5639748606664297, "grad_norm": 0.5079918294752009, "learning_rate": 1.5079233920215747e-05, "loss": 0.0665, "step": 21622 }, { "epoch": 2.5640934424285544, "grad_norm": 0.5813681051246512, "learning_rate": 1.5077030694034872e-05, "loss": 0.0669, "step": 21623 }, { "epoch": 2.5642120241906796, "grad_norm": 0.574592859422881, "learning_rate": 1.5074827559331606e-05, "loss": 0.0832, "step": 21624 }, { "epoch": 2.5643306059528044, "grad_norm": 0.6781371765067258, "learning_rate": 1.5072624516126246e-05, "loss": 0.0771, "step": 21625 }, { "epoch": 2.5644491877149296, "grad_norm": 0.7410987414276756, "learning_rate": 1.5070421564439125e-05, "loss": 0.1174, "step": 21626 }, { "epoch": 2.5645677694770543, "grad_norm": 0.5340045796101812, "learning_rate": 1.5068218704290538e-05, "loss": 0.0921, "step": 21627 }, { "epoch": 2.5646863512391795, "grad_norm": 0.7193624165644557, "learning_rate": 1.50660159357008e-05, "loss": 0.0999, "step": 21628 }, { "epoch": 2.5648049330013043, "grad_norm": 0.7135751848431026, "learning_rate": 1.5063813258690202e-05, "loss": 0.0892, "step": 21629 }, { "epoch": 2.5649235147634295, "grad_norm": 0.7245288276012181, "learning_rate": 1.506161067327907e-05, "loss": 0.101, "step": 21630 }, { "epoch": 2.5650420965255543, "grad_norm": 0.6345234794903442, "learning_rate": 1.5059408179487702e-05, "loss": 0.1099, "step": 21631 }, { "epoch": 2.5651606782876795, "grad_norm": 0.5118128633552554, "learning_rate": 1.50572057773364e-05, "loss": 0.0704, "step": 21632 }, { "epoch": 2.565279260049804, "grad_norm": 0.6808180570674003, "learning_rate": 1.5055003466845457e-05, "loss": 0.1105, "step": 21633 }, { "epoch": 2.5653978418119294, "grad_norm": 0.5972296059501417, "learning_rate": 1.50528012480352e-05, "loss": 0.0837, "step": 21634 }, { "epoch": 2.565516423574054, "grad_norm": 0.6392264241060398, "learning_rate": 1.5050599120925912e-05, "loss": 0.0889, "step": 21635 }, { "epoch": 2.5656350053361794, "grad_norm": 0.8560960296856371, "learning_rate": 1.5048397085537897e-05, "loss": 0.139, "step": 21636 }, { "epoch": 2.565753587098304, "grad_norm": 0.8589151127290491, "learning_rate": 1.5046195141891456e-05, "loss": 0.1189, "step": 21637 }, { "epoch": 2.5658721688604293, "grad_norm": 0.8223494462556574, "learning_rate": 1.5043993290006898e-05, "loss": 0.1181, "step": 21638 }, { "epoch": 2.565990750622554, "grad_norm": 0.7833019578219851, "learning_rate": 1.5041791529904514e-05, "loss": 0.0938, "step": 21639 }, { "epoch": 2.5661093323846793, "grad_norm": 0.3442889724275852, "learning_rate": 1.5039589861604586e-05, "loss": 0.0444, "step": 21640 }, { "epoch": 2.566227914146804, "grad_norm": 0.5260364287013648, "learning_rate": 1.5037388285127441e-05, "loss": 0.0576, "step": 21641 }, { "epoch": 2.5663464959089293, "grad_norm": 0.8145095618803387, "learning_rate": 1.5035186800493354e-05, "loss": 0.1141, "step": 21642 }, { "epoch": 2.566465077671054, "grad_norm": 0.7382935330868678, "learning_rate": 1.5032985407722627e-05, "loss": 0.0963, "step": 21643 }, { "epoch": 2.5665836594331792, "grad_norm": 0.8710180035016022, "learning_rate": 1.503078410683554e-05, "loss": 0.094, "step": 21644 }, { "epoch": 2.566702241195304, "grad_norm": 0.6271282012444919, "learning_rate": 1.5028582897852411e-05, "loss": 0.0901, "step": 21645 }, { "epoch": 2.566820822957429, "grad_norm": 0.5889286618631938, "learning_rate": 1.5026381780793514e-05, "loss": 0.0642, "step": 21646 }, { "epoch": 2.5669394047195544, "grad_norm": 0.5161417142322506, "learning_rate": 1.5024180755679151e-05, "loss": 0.0667, "step": 21647 }, { "epoch": 2.567057986481679, "grad_norm": 0.8150782924770895, "learning_rate": 1.5021979822529602e-05, "loss": 0.1117, "step": 21648 }, { "epoch": 2.567176568243804, "grad_norm": 0.586412658057895, "learning_rate": 1.5019778981365167e-05, "loss": 0.077, "step": 21649 }, { "epoch": 2.567295150005929, "grad_norm": 0.5585231956063005, "learning_rate": 1.5017578232206137e-05, "loss": 0.0749, "step": 21650 }, { "epoch": 2.5674137317680543, "grad_norm": 0.534343747127927, "learning_rate": 1.5015377575072781e-05, "loss": 0.0703, "step": 21651 }, { "epoch": 2.567532313530179, "grad_norm": 0.700382611785473, "learning_rate": 1.5013177009985413e-05, "loss": 0.1035, "step": 21652 }, { "epoch": 2.567650895292304, "grad_norm": 0.7812694210297213, "learning_rate": 1.5010976536964302e-05, "loss": 0.1252, "step": 21653 }, { "epoch": 2.567769477054429, "grad_norm": 1.0388591769338569, "learning_rate": 1.500877615602974e-05, "loss": 0.1151, "step": 21654 }, { "epoch": 2.5678880588165542, "grad_norm": 1.112199254626833, "learning_rate": 1.5006575867201999e-05, "loss": 0.1386, "step": 21655 }, { "epoch": 2.568006640578679, "grad_norm": 0.5689346978709972, "learning_rate": 1.5004375670501386e-05, "loss": 0.0766, "step": 21656 }, { "epoch": 2.5681252223408038, "grad_norm": 0.6953000980744428, "learning_rate": 1.5002175565948167e-05, "loss": 0.0962, "step": 21657 }, { "epoch": 2.568243804102929, "grad_norm": 0.8379539004057394, "learning_rate": 1.4999975553562632e-05, "loss": 0.1195, "step": 21658 }, { "epoch": 2.568362385865054, "grad_norm": 0.9785013225130647, "learning_rate": 1.499777563336505e-05, "loss": 0.1804, "step": 21659 }, { "epoch": 2.568480967627179, "grad_norm": 0.6822317431587638, "learning_rate": 1.4995575805375722e-05, "loss": 0.0795, "step": 21660 }, { "epoch": 2.5685995493893037, "grad_norm": 0.6715105470330339, "learning_rate": 1.4993376069614918e-05, "loss": 0.0907, "step": 21661 }, { "epoch": 2.568718131151429, "grad_norm": 0.6433745939860942, "learning_rate": 1.499117642610291e-05, "loss": 0.0761, "step": 21662 }, { "epoch": 2.568836712913554, "grad_norm": 0.5570140828278738, "learning_rate": 1.4988976874859978e-05, "loss": 0.0788, "step": 21663 }, { "epoch": 2.568955294675679, "grad_norm": 0.4424933858333607, "learning_rate": 1.4986777415906412e-05, "loss": 0.065, "step": 21664 }, { "epoch": 2.5690738764378036, "grad_norm": 0.6706752807625477, "learning_rate": 1.4984578049262471e-05, "loss": 0.0967, "step": 21665 }, { "epoch": 2.569192458199929, "grad_norm": 0.715232374386694, "learning_rate": 1.4982378774948442e-05, "loss": 0.089, "step": 21666 }, { "epoch": 2.569311039962054, "grad_norm": 0.571970307782617, "learning_rate": 1.4980179592984592e-05, "loss": 0.0876, "step": 21667 }, { "epoch": 2.5694296217241788, "grad_norm": 0.8050458993280805, "learning_rate": 1.4977980503391206e-05, "loss": 0.133, "step": 21668 }, { "epoch": 2.569548203486304, "grad_norm": 0.603219641744991, "learning_rate": 1.4975781506188552e-05, "loss": 0.0847, "step": 21669 }, { "epoch": 2.5696667852484287, "grad_norm": 0.634634800643695, "learning_rate": 1.4973582601396888e-05, "loss": 0.104, "step": 21670 }, { "epoch": 2.569785367010554, "grad_norm": 0.6254110365704031, "learning_rate": 1.4971383789036509e-05, "loss": 0.0611, "step": 21671 }, { "epoch": 2.5699039487726787, "grad_norm": 0.7127538325373887, "learning_rate": 1.4969185069127673e-05, "loss": 0.1078, "step": 21672 }, { "epoch": 2.570022530534804, "grad_norm": 0.718140475145398, "learning_rate": 1.4966986441690647e-05, "loss": 0.1202, "step": 21673 }, { "epoch": 2.5701411122969287, "grad_norm": 0.6017448558045624, "learning_rate": 1.4964787906745698e-05, "loss": 0.0603, "step": 21674 }, { "epoch": 2.570259694059054, "grad_norm": 0.6273593483489913, "learning_rate": 1.4962589464313104e-05, "loss": 0.101, "step": 21675 }, { "epoch": 2.5703782758211786, "grad_norm": 0.5915141731842348, "learning_rate": 1.4960391114413125e-05, "loss": 0.0935, "step": 21676 }, { "epoch": 2.570496857583304, "grad_norm": 0.5232904734667644, "learning_rate": 1.4958192857066025e-05, "loss": 0.074, "step": 21677 }, { "epoch": 2.5706154393454286, "grad_norm": 0.5897842086567708, "learning_rate": 1.4955994692292075e-05, "loss": 0.0794, "step": 21678 }, { "epoch": 2.570734021107554, "grad_norm": 0.762754589899015, "learning_rate": 1.495379662011154e-05, "loss": 0.0966, "step": 21679 }, { "epoch": 2.5708526028696785, "grad_norm": 0.7491599304886862, "learning_rate": 1.4951598640544684e-05, "loss": 0.1103, "step": 21680 }, { "epoch": 2.5709711846318037, "grad_norm": 0.4826249215308687, "learning_rate": 1.4949400753611754e-05, "loss": 0.0727, "step": 21681 }, { "epoch": 2.5710897663939285, "grad_norm": 0.7302745939731571, "learning_rate": 1.4947202959333035e-05, "loss": 0.0881, "step": 21682 }, { "epoch": 2.5712083481560537, "grad_norm": 0.6602552869319273, "learning_rate": 1.4945005257728779e-05, "loss": 0.0939, "step": 21683 }, { "epoch": 2.5713269299181785, "grad_norm": 0.9686713077241917, "learning_rate": 1.4942807648819234e-05, "loss": 0.1315, "step": 21684 }, { "epoch": 2.5714455116803037, "grad_norm": 0.8359340336348573, "learning_rate": 1.4940610132624675e-05, "loss": 0.0926, "step": 21685 }, { "epoch": 2.5715640934424284, "grad_norm": 0.9400227972403319, "learning_rate": 1.493841270916535e-05, "loss": 0.1263, "step": 21686 }, { "epoch": 2.5716826752045536, "grad_norm": 0.6051028704995187, "learning_rate": 1.4936215378461533e-05, "loss": 0.0982, "step": 21687 }, { "epoch": 2.5718012569666784, "grad_norm": 0.6434527448922712, "learning_rate": 1.4934018140533463e-05, "loss": 0.0768, "step": 21688 }, { "epoch": 2.5719198387288036, "grad_norm": 0.9486424879845745, "learning_rate": 1.4931820995401396e-05, "loss": 0.1384, "step": 21689 }, { "epoch": 2.5720384204909283, "grad_norm": 0.4808848246263055, "learning_rate": 1.4929623943085602e-05, "loss": 0.0482, "step": 21690 }, { "epoch": 2.5721570022530535, "grad_norm": 0.626028306888111, "learning_rate": 1.4927426983606324e-05, "loss": 0.0791, "step": 21691 }, { "epoch": 2.5722755840151783, "grad_norm": 0.8022772004509159, "learning_rate": 1.4925230116983808e-05, "loss": 0.1161, "step": 21692 }, { "epoch": 2.5723941657773035, "grad_norm": 0.6211677418502322, "learning_rate": 1.492303334323833e-05, "loss": 0.0819, "step": 21693 }, { "epoch": 2.5725127475394283, "grad_norm": 0.832106339813787, "learning_rate": 1.4920836662390122e-05, "loss": 0.1432, "step": 21694 }, { "epoch": 2.5726313293015535, "grad_norm": 0.8088366121898353, "learning_rate": 1.4918640074459438e-05, "loss": 0.0974, "step": 21695 }, { "epoch": 2.5727499110636787, "grad_norm": 0.6281647132201728, "learning_rate": 1.4916443579466526e-05, "loss": 0.0648, "step": 21696 }, { "epoch": 2.5728684928258034, "grad_norm": 0.7048129593271925, "learning_rate": 1.4914247177431644e-05, "loss": 0.0796, "step": 21697 }, { "epoch": 2.572987074587928, "grad_norm": 0.8221617397142938, "learning_rate": 1.4912050868375037e-05, "loss": 0.1266, "step": 21698 }, { "epoch": 2.5731056563500534, "grad_norm": 0.6775433624893877, "learning_rate": 1.4909854652316954e-05, "loss": 0.0911, "step": 21699 }, { "epoch": 2.5732242381121786, "grad_norm": 0.49907579507674843, "learning_rate": 1.4907658529277624e-05, "loss": 0.0691, "step": 21700 }, { "epoch": 2.5733428198743034, "grad_norm": 0.6474134553311868, "learning_rate": 1.4905462499277317e-05, "loss": 0.0968, "step": 21701 }, { "epoch": 2.573461401636428, "grad_norm": 0.9669609733775465, "learning_rate": 1.4903266562336272e-05, "loss": 0.1267, "step": 21702 }, { "epoch": 2.5735799833985533, "grad_norm": 0.4397791900064576, "learning_rate": 1.4901070718474718e-05, "loss": 0.0643, "step": 21703 }, { "epoch": 2.5736985651606785, "grad_norm": 0.5288441900915787, "learning_rate": 1.4898874967712905e-05, "loss": 0.0826, "step": 21704 }, { "epoch": 2.5738171469228033, "grad_norm": 0.7284008350540951, "learning_rate": 1.4896679310071088e-05, "loss": 0.1005, "step": 21705 }, { "epoch": 2.573935728684928, "grad_norm": 0.592819740511121, "learning_rate": 1.4894483745569493e-05, "loss": 0.0953, "step": 21706 }, { "epoch": 2.5740543104470532, "grad_norm": 0.6556629916516582, "learning_rate": 1.489228827422836e-05, "loss": 0.0878, "step": 21707 }, { "epoch": 2.5741728922091784, "grad_norm": 0.5240090667409226, "learning_rate": 1.4890092896067942e-05, "loss": 0.0613, "step": 21708 }, { "epoch": 2.574291473971303, "grad_norm": 0.6005623963731114, "learning_rate": 1.488789761110847e-05, "loss": 0.0886, "step": 21709 }, { "epoch": 2.574410055733428, "grad_norm": 0.7668724731485362, "learning_rate": 1.4885702419370184e-05, "loss": 0.1032, "step": 21710 }, { "epoch": 2.574528637495553, "grad_norm": 0.45812941161944915, "learning_rate": 1.4883507320873307e-05, "loss": 0.0667, "step": 21711 }, { "epoch": 2.5746472192576784, "grad_norm": 0.627933372827679, "learning_rate": 1.4881312315638097e-05, "loss": 0.0893, "step": 21712 }, { "epoch": 2.574765801019803, "grad_norm": 0.7326715772764358, "learning_rate": 1.487911740368478e-05, "loss": 0.1003, "step": 21713 }, { "epoch": 2.574884382781928, "grad_norm": 0.8933160365353854, "learning_rate": 1.4876922585033586e-05, "loss": 0.1265, "step": 21714 }, { "epoch": 2.575002964544053, "grad_norm": 0.637647193138884, "learning_rate": 1.487472785970475e-05, "loss": 0.0909, "step": 21715 }, { "epoch": 2.5751215463061783, "grad_norm": 0.6437862655288752, "learning_rate": 1.4872533227718505e-05, "loss": 0.0819, "step": 21716 }, { "epoch": 2.575240128068303, "grad_norm": 0.8071747944800559, "learning_rate": 1.4870338689095092e-05, "loss": 0.117, "step": 21717 }, { "epoch": 2.575358709830428, "grad_norm": 0.7382188023295648, "learning_rate": 1.4868144243854737e-05, "loss": 0.0949, "step": 21718 }, { "epoch": 2.575477291592553, "grad_norm": 0.5366161517309066, "learning_rate": 1.4865949892017653e-05, "loss": 0.0696, "step": 21719 }, { "epoch": 2.575595873354678, "grad_norm": 1.0310473634157802, "learning_rate": 1.4863755633604092e-05, "loss": 0.1164, "step": 21720 }, { "epoch": 2.575714455116803, "grad_norm": 0.6363163874472662, "learning_rate": 1.486156146863428e-05, "loss": 0.0781, "step": 21721 }, { "epoch": 2.575833036878928, "grad_norm": 0.6138661206552454, "learning_rate": 1.4859367397128426e-05, "loss": 0.0598, "step": 21722 }, { "epoch": 2.575951618641053, "grad_norm": 0.756478703299259, "learning_rate": 1.485717341910678e-05, "loss": 0.1134, "step": 21723 }, { "epoch": 2.576070200403178, "grad_norm": 1.0028471528134524, "learning_rate": 1.4854979534589558e-05, "loss": 0.1066, "step": 21724 }, { "epoch": 2.576188782165303, "grad_norm": 0.5717410821576738, "learning_rate": 1.4852785743596975e-05, "loss": 0.089, "step": 21725 }, { "epoch": 2.576307363927428, "grad_norm": 0.9690371048057043, "learning_rate": 1.485059204614927e-05, "loss": 0.1084, "step": 21726 }, { "epoch": 2.576425945689553, "grad_norm": 0.6103300996159365, "learning_rate": 1.4848398442266655e-05, "loss": 0.083, "step": 21727 }, { "epoch": 2.576544527451678, "grad_norm": 0.6205994997492682, "learning_rate": 1.4846204931969365e-05, "loss": 0.0789, "step": 21728 }, { "epoch": 2.576663109213803, "grad_norm": 0.4673166897481148, "learning_rate": 1.4844011515277615e-05, "loss": 0.0761, "step": 21729 }, { "epoch": 2.576781690975928, "grad_norm": 0.6963422986929692, "learning_rate": 1.4841818192211615e-05, "loss": 0.1045, "step": 21730 }, { "epoch": 2.5769002727380528, "grad_norm": 0.7129043592672168, "learning_rate": 1.48396249627916e-05, "loss": 0.0974, "step": 21731 }, { "epoch": 2.577018854500178, "grad_norm": 0.7254610499177668, "learning_rate": 1.4837431827037787e-05, "loss": 0.1094, "step": 21732 }, { "epoch": 2.5771374362623027, "grad_norm": 1.0300367884628725, "learning_rate": 1.4835238784970385e-05, "loss": 0.1289, "step": 21733 }, { "epoch": 2.577256018024428, "grad_norm": 0.7956197751969057, "learning_rate": 1.4833045836609615e-05, "loss": 0.1152, "step": 21734 }, { "epoch": 2.5773745997865527, "grad_norm": 0.7486623024657607, "learning_rate": 1.4830852981975698e-05, "loss": 0.1124, "step": 21735 }, { "epoch": 2.577493181548678, "grad_norm": 0.8153017552791842, "learning_rate": 1.482866022108885e-05, "loss": 0.1124, "step": 21736 }, { "epoch": 2.5776117633108027, "grad_norm": 0.803876540296278, "learning_rate": 1.482646755396927e-05, "loss": 0.1129, "step": 21737 }, { "epoch": 2.577730345072928, "grad_norm": 0.7702943442520692, "learning_rate": 1.4824274980637196e-05, "loss": 0.0856, "step": 21738 }, { "epoch": 2.5778489268350526, "grad_norm": 0.632991507822899, "learning_rate": 1.4822082501112827e-05, "loss": 0.087, "step": 21739 }, { "epoch": 2.577967508597178, "grad_norm": 0.5636498931522826, "learning_rate": 1.481989011541638e-05, "loss": 0.0487, "step": 21740 }, { "epoch": 2.5780860903593026, "grad_norm": 0.7629009147613559, "learning_rate": 1.4817697823568046e-05, "loss": 0.0906, "step": 21741 }, { "epoch": 2.578204672121428, "grad_norm": 0.49488157014563944, "learning_rate": 1.4815505625588066e-05, "loss": 0.0635, "step": 21742 }, { "epoch": 2.5783232538835525, "grad_norm": 0.7100886911264174, "learning_rate": 1.4813313521496634e-05, "loss": 0.0802, "step": 21743 }, { "epoch": 2.5784418356456777, "grad_norm": 0.7590768286038645, "learning_rate": 1.4811121511313953e-05, "loss": 0.0911, "step": 21744 }, { "epoch": 2.578560417407803, "grad_norm": 0.7382663624817576, "learning_rate": 1.480892959506024e-05, "loss": 0.102, "step": 21745 }, { "epoch": 2.5786789991699277, "grad_norm": 0.6040406091153081, "learning_rate": 1.4806737772755696e-05, "loss": 0.0731, "step": 21746 }, { "epoch": 2.5787975809320525, "grad_norm": 0.8258329369519675, "learning_rate": 1.4804546044420537e-05, "loss": 0.0801, "step": 21747 }, { "epoch": 2.5789161626941777, "grad_norm": 0.7248536502234016, "learning_rate": 1.4802354410074962e-05, "loss": 0.1146, "step": 21748 }, { "epoch": 2.579034744456303, "grad_norm": 0.6576359662583103, "learning_rate": 1.4800162869739162e-05, "loss": 0.0749, "step": 21749 }, { "epoch": 2.5791533262184276, "grad_norm": 0.7558702461824902, "learning_rate": 1.4797971423433365e-05, "loss": 0.1161, "step": 21750 }, { "epoch": 2.5792719079805524, "grad_norm": 0.6610476409448179, "learning_rate": 1.4795780071177756e-05, "loss": 0.0873, "step": 21751 }, { "epoch": 2.5793904897426776, "grad_norm": 0.7472351946302279, "learning_rate": 1.4793588812992537e-05, "loss": 0.1094, "step": 21752 }, { "epoch": 2.579509071504803, "grad_norm": 0.5528221623751591, "learning_rate": 1.479139764889792e-05, "loss": 0.0826, "step": 21753 }, { "epoch": 2.5796276532669276, "grad_norm": 0.844263479575088, "learning_rate": 1.4789206578914099e-05, "loss": 0.0923, "step": 21754 }, { "epoch": 2.5797462350290523, "grad_norm": 0.711774993356527, "learning_rate": 1.4787015603061267e-05, "loss": 0.0861, "step": 21755 }, { "epoch": 2.5798648167911775, "grad_norm": 1.0121975611636214, "learning_rate": 1.4784824721359628e-05, "loss": 0.1379, "step": 21756 }, { "epoch": 2.5799833985533027, "grad_norm": 0.7712917282481041, "learning_rate": 1.4782633933829376e-05, "loss": 0.1088, "step": 21757 }, { "epoch": 2.5801019803154275, "grad_norm": 0.6238586333041432, "learning_rate": 1.4780443240490719e-05, "loss": 0.069, "step": 21758 }, { "epoch": 2.5802205620775522, "grad_norm": 0.9102398468088011, "learning_rate": 1.4778252641363838e-05, "loss": 0.1331, "step": 21759 }, { "epoch": 2.5803391438396774, "grad_norm": 0.6586803994014576, "learning_rate": 1.4776062136468927e-05, "loss": 0.0944, "step": 21760 }, { "epoch": 2.5804577256018026, "grad_norm": 0.5530106649977531, "learning_rate": 1.4773871725826193e-05, "loss": 0.0805, "step": 21761 }, { "epoch": 2.5805763073639274, "grad_norm": 0.6930010774635833, "learning_rate": 1.4771681409455822e-05, "loss": 0.09, "step": 21762 }, { "epoch": 2.580694889126052, "grad_norm": 0.7679093656184596, "learning_rate": 1.4769491187377998e-05, "loss": 0.1136, "step": 21763 }, { "epoch": 2.5808134708881774, "grad_norm": 0.8251742900100529, "learning_rate": 1.4767301059612928e-05, "loss": 0.1047, "step": 21764 }, { "epoch": 2.5809320526503026, "grad_norm": 0.5934908917119558, "learning_rate": 1.476511102618079e-05, "loss": 0.0817, "step": 21765 }, { "epoch": 2.5810506344124273, "grad_norm": 0.6269676968937224, "learning_rate": 1.4762921087101783e-05, "loss": 0.0583, "step": 21766 }, { "epoch": 2.581169216174552, "grad_norm": 0.7640254956383136, "learning_rate": 1.4760731242396078e-05, "loss": 0.1321, "step": 21767 }, { "epoch": 2.5812877979366773, "grad_norm": 0.520672830137325, "learning_rate": 1.4758541492083885e-05, "loss": 0.0699, "step": 21768 }, { "epoch": 2.5814063796988025, "grad_norm": 0.734013270822722, "learning_rate": 1.4756351836185382e-05, "loss": 0.0998, "step": 21769 }, { "epoch": 2.5815249614609272, "grad_norm": 1.1158770542942729, "learning_rate": 1.4754162274720752e-05, "loss": 0.1385, "step": 21770 }, { "epoch": 2.5816435432230525, "grad_norm": 0.5093019277391224, "learning_rate": 1.4751972807710173e-05, "loss": 0.0785, "step": 21771 }, { "epoch": 2.581762124985177, "grad_norm": 0.43308484435515665, "learning_rate": 1.4749783435173847e-05, "loss": 0.056, "step": 21772 }, { "epoch": 2.5818807067473024, "grad_norm": 0.46946366163170616, "learning_rate": 1.4747594157131946e-05, "loss": 0.0758, "step": 21773 }, { "epoch": 2.581999288509427, "grad_norm": 0.5771162567949781, "learning_rate": 1.4745404973604654e-05, "loss": 0.0923, "step": 21774 }, { "epoch": 2.5821178702715524, "grad_norm": 0.7133420577650004, "learning_rate": 1.474321588461215e-05, "loss": 0.1246, "step": 21775 }, { "epoch": 2.582236452033677, "grad_norm": 0.5933405029968767, "learning_rate": 1.4741026890174617e-05, "loss": 0.0761, "step": 21776 }, { "epoch": 2.5823550337958023, "grad_norm": 0.6123507631179006, "learning_rate": 1.4738837990312243e-05, "loss": 0.0697, "step": 21777 }, { "epoch": 2.582473615557927, "grad_norm": 0.6265104381339655, "learning_rate": 1.4736649185045187e-05, "loss": 0.0849, "step": 21778 }, { "epoch": 2.5825921973200523, "grad_norm": 0.37618166268537195, "learning_rate": 1.4734460474393651e-05, "loss": 0.0569, "step": 21779 }, { "epoch": 2.582710779082177, "grad_norm": 0.6856120405857276, "learning_rate": 1.4732271858377802e-05, "loss": 0.0947, "step": 21780 }, { "epoch": 2.5828293608443023, "grad_norm": 0.8209717751762402, "learning_rate": 1.4730083337017816e-05, "loss": 0.1033, "step": 21781 }, { "epoch": 2.582947942606427, "grad_norm": 0.9848514228787356, "learning_rate": 1.4727894910333856e-05, "loss": 0.1373, "step": 21782 }, { "epoch": 2.583066524368552, "grad_norm": 0.43828576542117875, "learning_rate": 1.472570657834612e-05, "loss": 0.0496, "step": 21783 }, { "epoch": 2.583185106130677, "grad_norm": 0.5504615960044469, "learning_rate": 1.4723518341074772e-05, "loss": 0.0782, "step": 21784 }, { "epoch": 2.583303687892802, "grad_norm": 0.6105902236571252, "learning_rate": 1.4721330198539974e-05, "loss": 0.0899, "step": 21785 }, { "epoch": 2.583422269654927, "grad_norm": 0.40449620787696483, "learning_rate": 1.4719142150761906e-05, "loss": 0.0482, "step": 21786 }, { "epoch": 2.583540851417052, "grad_norm": 0.7286299777791738, "learning_rate": 1.4716954197760744e-05, "loss": 0.0998, "step": 21787 }, { "epoch": 2.583659433179177, "grad_norm": 0.7210767039189184, "learning_rate": 1.4714766339556657e-05, "loss": 0.1189, "step": 21788 }, { "epoch": 2.583778014941302, "grad_norm": 0.608788752144289, "learning_rate": 1.4712578576169814e-05, "loss": 0.0656, "step": 21789 }, { "epoch": 2.583896596703427, "grad_norm": 0.738700770320291, "learning_rate": 1.4710390907620369e-05, "loss": 0.1046, "step": 21790 }, { "epoch": 2.584015178465552, "grad_norm": 0.6679379122298252, "learning_rate": 1.4708203333928513e-05, "loss": 0.0996, "step": 21791 }, { "epoch": 2.584133760227677, "grad_norm": 0.7505490537157985, "learning_rate": 1.4706015855114403e-05, "loss": 0.0978, "step": 21792 }, { "epoch": 2.584252341989802, "grad_norm": 0.7773780075034072, "learning_rate": 1.4703828471198189e-05, "loss": 0.0992, "step": 21793 }, { "epoch": 2.5843709237519272, "grad_norm": 0.7646436838756954, "learning_rate": 1.4701641182200065e-05, "loss": 0.0987, "step": 21794 }, { "epoch": 2.584489505514052, "grad_norm": 0.7512534631943374, "learning_rate": 1.4699453988140175e-05, "loss": 0.1076, "step": 21795 }, { "epoch": 2.5846080872761767, "grad_norm": 0.8153579790707216, "learning_rate": 1.4697266889038691e-05, "loss": 0.1065, "step": 21796 }, { "epoch": 2.584726669038302, "grad_norm": 0.5307333347330933, "learning_rate": 1.4695079884915764e-05, "loss": 0.0864, "step": 21797 }, { "epoch": 2.584845250800427, "grad_norm": 0.7130223298217921, "learning_rate": 1.4692892975791572e-05, "loss": 0.0939, "step": 21798 }, { "epoch": 2.584963832562552, "grad_norm": 0.5583135524854778, "learning_rate": 1.4690706161686268e-05, "loss": 0.0871, "step": 21799 }, { "epoch": 2.5850824143246767, "grad_norm": 0.5411204290376191, "learning_rate": 1.4688519442620013e-05, "loss": 0.0754, "step": 21800 }, { "epoch": 2.585200996086802, "grad_norm": 0.6494975197136568, "learning_rate": 1.4686332818612952e-05, "loss": 0.0782, "step": 21801 }, { "epoch": 2.585319577848927, "grad_norm": 0.9038696210095611, "learning_rate": 1.4684146289685263e-05, "loss": 0.1249, "step": 21802 }, { "epoch": 2.585438159611052, "grad_norm": 1.1416510069112726, "learning_rate": 1.4681959855857096e-05, "loss": 0.128, "step": 21803 }, { "epoch": 2.5855567413731766, "grad_norm": 0.9404993983329561, "learning_rate": 1.4679773517148604e-05, "loss": 0.1348, "step": 21804 }, { "epoch": 2.585675323135302, "grad_norm": 0.9993269070376036, "learning_rate": 1.4677587273579945e-05, "loss": 0.1207, "step": 21805 }, { "epoch": 2.585793904897427, "grad_norm": 0.6192944844229286, "learning_rate": 1.4675401125171272e-05, "loss": 0.0861, "step": 21806 }, { "epoch": 2.5859124866595518, "grad_norm": 0.5660927503825468, "learning_rate": 1.4673215071942741e-05, "loss": 0.0758, "step": 21807 }, { "epoch": 2.5860310684216765, "grad_norm": 1.2649721246894226, "learning_rate": 1.4671029113914498e-05, "loss": 0.1343, "step": 21808 }, { "epoch": 2.5861496501838017, "grad_norm": 0.95789899710996, "learning_rate": 1.4668843251106712e-05, "loss": 0.1455, "step": 21809 }, { "epoch": 2.586268231945927, "grad_norm": 0.957963158790284, "learning_rate": 1.4666657483539517e-05, "loss": 0.1194, "step": 21810 }, { "epoch": 2.5863868137080517, "grad_norm": 0.4739566167416007, "learning_rate": 1.4664471811233072e-05, "loss": 0.0555, "step": 21811 }, { "epoch": 2.5865053954701764, "grad_norm": 0.6124419189555691, "learning_rate": 1.4662286234207512e-05, "loss": 0.0802, "step": 21812 }, { "epoch": 2.5866239772323016, "grad_norm": 0.6426601552212815, "learning_rate": 1.4660100752483005e-05, "loss": 0.0998, "step": 21813 }, { "epoch": 2.586742558994427, "grad_norm": 0.6171044235575566, "learning_rate": 1.4657915366079683e-05, "loss": 0.0734, "step": 21814 }, { "epoch": 2.5868611407565516, "grad_norm": 0.5918732571316935, "learning_rate": 1.465573007501771e-05, "loss": 0.0892, "step": 21815 }, { "epoch": 2.5869797225186764, "grad_norm": 0.764862203424092, "learning_rate": 1.4653544879317205e-05, "loss": 0.1097, "step": 21816 }, { "epoch": 2.5870983042808016, "grad_norm": 0.9086965655406026, "learning_rate": 1.4651359778998339e-05, "loss": 0.1113, "step": 21817 }, { "epoch": 2.5872168860429268, "grad_norm": 0.6261062487085839, "learning_rate": 1.4649174774081248e-05, "loss": 0.0826, "step": 21818 }, { "epoch": 2.5873354678050515, "grad_norm": 0.6867252640893505, "learning_rate": 1.4646989864586069e-05, "loss": 0.1049, "step": 21819 }, { "epoch": 2.5874540495671767, "grad_norm": 0.7009093891775063, "learning_rate": 1.4644805050532942e-05, "loss": 0.0919, "step": 21820 }, { "epoch": 2.5875726313293015, "grad_norm": 0.5692399928648344, "learning_rate": 1.464262033194202e-05, "loss": 0.0825, "step": 21821 }, { "epoch": 2.5876912130914267, "grad_norm": 0.5275658929207139, "learning_rate": 1.4640435708833442e-05, "loss": 0.0716, "step": 21822 }, { "epoch": 2.5878097948535514, "grad_norm": 0.5343377701628462, "learning_rate": 1.463825118122733e-05, "loss": 0.0585, "step": 21823 }, { "epoch": 2.5879283766156767, "grad_norm": 1.1802523494584924, "learning_rate": 1.4636066749143846e-05, "loss": 0.1263, "step": 21824 }, { "epoch": 2.5880469583778014, "grad_norm": 0.5975004820123287, "learning_rate": 1.4633882412603111e-05, "loss": 0.0748, "step": 21825 }, { "epoch": 2.5881655401399266, "grad_norm": 0.6344168775799386, "learning_rate": 1.4631698171625277e-05, "loss": 0.0947, "step": 21826 }, { "epoch": 2.5882841219020514, "grad_norm": 1.0378387312758124, "learning_rate": 1.462951402623046e-05, "loss": 0.1354, "step": 21827 }, { "epoch": 2.5884027036641766, "grad_norm": 0.9206497938666299, "learning_rate": 1.4627329976438813e-05, "loss": 0.1093, "step": 21828 }, { "epoch": 2.5885212854263013, "grad_norm": 0.7282452157289558, "learning_rate": 1.4625146022270469e-05, "loss": 0.0845, "step": 21829 }, { "epoch": 2.5886398671884265, "grad_norm": 0.573627220875112, "learning_rate": 1.4622962163745555e-05, "loss": 0.08, "step": 21830 }, { "epoch": 2.5887584489505513, "grad_norm": 0.56032907593689, "learning_rate": 1.4620778400884194e-05, "loss": 0.0753, "step": 21831 }, { "epoch": 2.5888770307126765, "grad_norm": 0.6626105855416736, "learning_rate": 1.4618594733706537e-05, "loss": 0.1026, "step": 21832 }, { "epoch": 2.5889956124748013, "grad_norm": 0.5769811184071237, "learning_rate": 1.4616411162232708e-05, "loss": 0.0692, "step": 21833 }, { "epoch": 2.5891141942369265, "grad_norm": 0.682182347965938, "learning_rate": 1.4614227686482823e-05, "loss": 0.1032, "step": 21834 }, { "epoch": 2.589232775999051, "grad_norm": 0.6434721186352659, "learning_rate": 1.4612044306477036e-05, "loss": 0.0879, "step": 21835 }, { "epoch": 2.5893513577611764, "grad_norm": 0.42842258121234456, "learning_rate": 1.4609861022235456e-05, "loss": 0.061, "step": 21836 }, { "epoch": 2.589469939523301, "grad_norm": 0.700828992025767, "learning_rate": 1.4607677833778216e-05, "loss": 0.0632, "step": 21837 }, { "epoch": 2.5895885212854264, "grad_norm": 0.6219394189778532, "learning_rate": 1.4605494741125436e-05, "loss": 0.0782, "step": 21838 }, { "epoch": 2.589707103047551, "grad_norm": 0.5452188839732415, "learning_rate": 1.460331174429726e-05, "loss": 0.0722, "step": 21839 }, { "epoch": 2.5898256848096763, "grad_norm": 0.6314283683952631, "learning_rate": 1.46011288433138e-05, "loss": 0.0883, "step": 21840 }, { "epoch": 2.589944266571801, "grad_norm": 0.6479156039933638, "learning_rate": 1.4598946038195176e-05, "loss": 0.0902, "step": 21841 }, { "epoch": 2.5900628483339263, "grad_norm": 0.9375475878399752, "learning_rate": 1.459676332896151e-05, "loss": 0.0879, "step": 21842 }, { "epoch": 2.590181430096051, "grad_norm": 0.814710060310782, "learning_rate": 1.4594580715632932e-05, "loss": 0.1133, "step": 21843 }, { "epoch": 2.5903000118581763, "grad_norm": 0.490232984578286, "learning_rate": 1.4592398198229568e-05, "loss": 0.0629, "step": 21844 }, { "epoch": 2.590418593620301, "grad_norm": 0.9649545853254412, "learning_rate": 1.4590215776771521e-05, "loss": 0.1204, "step": 21845 }, { "epoch": 2.5905371753824262, "grad_norm": 0.7925282690876486, "learning_rate": 1.458803345127892e-05, "loss": 0.1012, "step": 21846 }, { "epoch": 2.5906557571445514, "grad_norm": 0.6233523372015507, "learning_rate": 1.4585851221771885e-05, "loss": 0.075, "step": 21847 }, { "epoch": 2.590774338906676, "grad_norm": 1.196658129667339, "learning_rate": 1.4583669088270536e-05, "loss": 0.1883, "step": 21848 }, { "epoch": 2.590892920668801, "grad_norm": 0.6656796660015559, "learning_rate": 1.4581487050794967e-05, "loss": 0.0649, "step": 21849 }, { "epoch": 2.591011502430926, "grad_norm": 0.6513694688295485, "learning_rate": 1.4579305109365331e-05, "loss": 0.0853, "step": 21850 }, { "epoch": 2.5911300841930514, "grad_norm": 0.8462903823806432, "learning_rate": 1.4577123264001714e-05, "loss": 0.128, "step": 21851 }, { "epoch": 2.591248665955176, "grad_norm": 0.5190570143188311, "learning_rate": 1.4574941514724238e-05, "loss": 0.0723, "step": 21852 }, { "epoch": 2.591367247717301, "grad_norm": 0.8465063643099384, "learning_rate": 1.457275986155302e-05, "loss": 0.1374, "step": 21853 }, { "epoch": 2.591485829479426, "grad_norm": 0.6180715958759728, "learning_rate": 1.4570578304508164e-05, "loss": 0.0788, "step": 21854 }, { "epoch": 2.5916044112415513, "grad_norm": 0.6756996986007032, "learning_rate": 1.45683968436098e-05, "loss": 0.1118, "step": 21855 }, { "epoch": 2.591722993003676, "grad_norm": 1.0225187041059296, "learning_rate": 1.4566215478878014e-05, "loss": 0.1284, "step": 21856 }, { "epoch": 2.591841574765801, "grad_norm": 0.6338084487485439, "learning_rate": 1.4564034210332927e-05, "loss": 0.085, "step": 21857 }, { "epoch": 2.591960156527926, "grad_norm": 0.5701002716434888, "learning_rate": 1.4561853037994649e-05, "loss": 0.081, "step": 21858 }, { "epoch": 2.592078738290051, "grad_norm": 0.6656742626160084, "learning_rate": 1.4559671961883286e-05, "loss": 0.1166, "step": 21859 }, { "epoch": 2.592197320052176, "grad_norm": 0.6514052021529805, "learning_rate": 1.455749098201895e-05, "loss": 0.0824, "step": 21860 }, { "epoch": 2.5923159018143007, "grad_norm": 0.7769790292038351, "learning_rate": 1.4555310098421727e-05, "loss": 0.0983, "step": 21861 }, { "epoch": 2.592434483576426, "grad_norm": 0.8690896641840874, "learning_rate": 1.4553129311111755e-05, "loss": 0.1322, "step": 21862 }, { "epoch": 2.592553065338551, "grad_norm": 1.0845065946585886, "learning_rate": 1.4550948620109109e-05, "loss": 0.1411, "step": 21863 }, { "epoch": 2.592671647100676, "grad_norm": 0.4678598888864147, "learning_rate": 1.4548768025433904e-05, "loss": 0.0776, "step": 21864 }, { "epoch": 2.5927902288628006, "grad_norm": 0.629996477674819, "learning_rate": 1.4546587527106242e-05, "loss": 0.0842, "step": 21865 }, { "epoch": 2.592908810624926, "grad_norm": 0.7044284102880053, "learning_rate": 1.4544407125146235e-05, "loss": 0.0969, "step": 21866 }, { "epoch": 2.593027392387051, "grad_norm": 0.6649477169412196, "learning_rate": 1.4542226819573963e-05, "loss": 0.0949, "step": 21867 }, { "epoch": 2.593145974149176, "grad_norm": 0.6932447908965876, "learning_rate": 1.454004661040953e-05, "loss": 0.084, "step": 21868 }, { "epoch": 2.593264555911301, "grad_norm": 0.7597309862884968, "learning_rate": 1.4537866497673047e-05, "loss": 0.0785, "step": 21869 }, { "epoch": 2.5933831376734258, "grad_norm": 0.6749460699391026, "learning_rate": 1.4535686481384603e-05, "loss": 0.0869, "step": 21870 }, { "epoch": 2.593501719435551, "grad_norm": 0.9001985482542, "learning_rate": 1.4533506561564306e-05, "loss": 0.1514, "step": 21871 }, { "epoch": 2.5936203011976757, "grad_norm": 0.9418552040751123, "learning_rate": 1.4531326738232226e-05, "loss": 0.1279, "step": 21872 }, { "epoch": 2.593738882959801, "grad_norm": 1.0785766334677316, "learning_rate": 1.4529147011408495e-05, "loss": 0.128, "step": 21873 }, { "epoch": 2.5938574647219257, "grad_norm": 0.6825180286492357, "learning_rate": 1.4526967381113177e-05, "loss": 0.0955, "step": 21874 }, { "epoch": 2.593976046484051, "grad_norm": 0.5887334917047614, "learning_rate": 1.4524787847366388e-05, "loss": 0.0671, "step": 21875 }, { "epoch": 2.5940946282461756, "grad_norm": 0.5228853760508094, "learning_rate": 1.4522608410188187e-05, "loss": 0.0713, "step": 21876 }, { "epoch": 2.594213210008301, "grad_norm": 0.49636602861551615, "learning_rate": 1.4520429069598712e-05, "loss": 0.0709, "step": 21877 }, { "epoch": 2.5943317917704256, "grad_norm": 0.798987557848694, "learning_rate": 1.4518249825618018e-05, "loss": 0.0952, "step": 21878 }, { "epoch": 2.594450373532551, "grad_norm": 0.521073936744459, "learning_rate": 1.4516070678266208e-05, "loss": 0.0668, "step": 21879 }, { "epoch": 2.5945689552946756, "grad_norm": 0.5462403865759258, "learning_rate": 1.451389162756337e-05, "loss": 0.0666, "step": 21880 }, { "epoch": 2.5946875370568008, "grad_norm": 0.47564381643212805, "learning_rate": 1.4511712673529593e-05, "loss": 0.0693, "step": 21881 }, { "epoch": 2.5948061188189255, "grad_norm": 0.6613749938766706, "learning_rate": 1.4509533816184972e-05, "loss": 0.0791, "step": 21882 }, { "epoch": 2.5949247005810507, "grad_norm": 0.7535750999404823, "learning_rate": 1.4507355055549565e-05, "loss": 0.1036, "step": 21883 }, { "epoch": 2.5950432823431755, "grad_norm": 0.7030252475088511, "learning_rate": 1.4505176391643499e-05, "loss": 0.115, "step": 21884 }, { "epoch": 2.5951618641053007, "grad_norm": 0.7226411234683512, "learning_rate": 1.4502997824486828e-05, "loss": 0.108, "step": 21885 }, { "epoch": 2.5952804458674255, "grad_norm": 0.630112700636151, "learning_rate": 1.4500819354099654e-05, "loss": 0.0764, "step": 21886 }, { "epoch": 2.5953990276295507, "grad_norm": 0.5329018974252739, "learning_rate": 1.4498640980502026e-05, "loss": 0.066, "step": 21887 }, { "epoch": 2.5955176093916754, "grad_norm": 0.751671932464693, "learning_rate": 1.4496462703714075e-05, "loss": 0.126, "step": 21888 }, { "epoch": 2.5956361911538006, "grad_norm": 0.7610935510092147, "learning_rate": 1.4494284523755847e-05, "loss": 0.0896, "step": 21889 }, { "epoch": 2.5957547729159254, "grad_norm": 0.5189908719553425, "learning_rate": 1.4492106440647435e-05, "loss": 0.078, "step": 21890 }, { "epoch": 2.5958733546780506, "grad_norm": 1.0857623308431144, "learning_rate": 1.4489928454408914e-05, "loss": 0.1327, "step": 21891 }, { "epoch": 2.5959919364401753, "grad_norm": 0.8368192280853721, "learning_rate": 1.4487750565060362e-05, "loss": 0.0766, "step": 21892 }, { "epoch": 2.5961105182023005, "grad_norm": 0.7211322831577193, "learning_rate": 1.448557277262187e-05, "loss": 0.0693, "step": 21893 }, { "epoch": 2.5962290999644253, "grad_norm": 0.5444049031318132, "learning_rate": 1.4483395077113485e-05, "loss": 0.0759, "step": 21894 }, { "epoch": 2.5963476817265505, "grad_norm": 1.0041425237287758, "learning_rate": 1.448121747855532e-05, "loss": 0.1321, "step": 21895 }, { "epoch": 2.5964662634886757, "grad_norm": 0.817323441106573, "learning_rate": 1.447903997696742e-05, "loss": 0.1067, "step": 21896 }, { "epoch": 2.5965848452508005, "grad_norm": 0.9637536957499933, "learning_rate": 1.4476862572369881e-05, "loss": 0.1474, "step": 21897 }, { "epoch": 2.5967034270129252, "grad_norm": 0.6044587505601111, "learning_rate": 1.4474685264782744e-05, "loss": 0.0946, "step": 21898 }, { "epoch": 2.5968220087750504, "grad_norm": 0.44860507084239276, "learning_rate": 1.4472508054226125e-05, "loss": 0.061, "step": 21899 }, { "epoch": 2.5969405905371756, "grad_norm": 0.5950359425072482, "learning_rate": 1.447033094072006e-05, "loss": 0.0815, "step": 21900 }, { "epoch": 2.5970591722993004, "grad_norm": 0.6608943024351142, "learning_rate": 1.4468153924284627e-05, "loss": 0.1082, "step": 21901 }, { "epoch": 2.597177754061425, "grad_norm": 0.5513874937950206, "learning_rate": 1.4465977004939907e-05, "loss": 0.0452, "step": 21902 }, { "epoch": 2.5972963358235504, "grad_norm": 0.4959952385383308, "learning_rate": 1.4463800182705956e-05, "loss": 0.0686, "step": 21903 }, { "epoch": 2.5974149175856756, "grad_norm": 0.7476107540597091, "learning_rate": 1.446162345760286e-05, "loss": 0.0864, "step": 21904 }, { "epoch": 2.5975334993478003, "grad_norm": 0.81857276337261, "learning_rate": 1.4459446829650664e-05, "loss": 0.1284, "step": 21905 }, { "epoch": 2.597652081109925, "grad_norm": 0.6722180335653745, "learning_rate": 1.4457270298869439e-05, "loss": 0.094, "step": 21906 }, { "epoch": 2.5977706628720503, "grad_norm": 0.7211016340671558, "learning_rate": 1.4455093865279254e-05, "loss": 0.1055, "step": 21907 }, { "epoch": 2.5978892446341755, "grad_norm": 0.8658192503839995, "learning_rate": 1.4452917528900173e-05, "loss": 0.1221, "step": 21908 }, { "epoch": 2.5980078263963002, "grad_norm": 0.5935646706945945, "learning_rate": 1.4450741289752259e-05, "loss": 0.0887, "step": 21909 }, { "epoch": 2.598126408158425, "grad_norm": 0.6251300078424941, "learning_rate": 1.4448565147855574e-05, "loss": 0.0661, "step": 21910 }, { "epoch": 2.59824498992055, "grad_norm": 0.7540830159370536, "learning_rate": 1.444638910323019e-05, "loss": 0.101, "step": 21911 }, { "epoch": 2.5983635716826754, "grad_norm": 0.6999104356863861, "learning_rate": 1.4444213155896147e-05, "loss": 0.0946, "step": 21912 }, { "epoch": 2.5984821534448, "grad_norm": 0.4537776944361022, "learning_rate": 1.4442037305873512e-05, "loss": 0.0581, "step": 21913 }, { "epoch": 2.598600735206925, "grad_norm": 0.47561548433033324, "learning_rate": 1.443986155318235e-05, "loss": 0.0566, "step": 21914 }, { "epoch": 2.59871931696905, "grad_norm": 0.7002137181979591, "learning_rate": 1.4437685897842718e-05, "loss": 0.1095, "step": 21915 }, { "epoch": 2.5988378987311753, "grad_norm": 0.7600646277041309, "learning_rate": 1.4435510339874664e-05, "loss": 0.107, "step": 21916 }, { "epoch": 2.5989564804933, "grad_norm": 0.8393710554470967, "learning_rate": 1.4433334879298249e-05, "loss": 0.1169, "step": 21917 }, { "epoch": 2.599075062255425, "grad_norm": 0.8423453564560103, "learning_rate": 1.443115951613353e-05, "loss": 0.0952, "step": 21918 }, { "epoch": 2.59919364401755, "grad_norm": 0.7607358007857821, "learning_rate": 1.4428984250400559e-05, "loss": 0.0922, "step": 21919 }, { "epoch": 2.5993122257796752, "grad_norm": 0.7766283969782733, "learning_rate": 1.4426809082119392e-05, "loss": 0.1161, "step": 21920 }, { "epoch": 2.5994308075418, "grad_norm": 0.6408704656399453, "learning_rate": 1.4424634011310079e-05, "loss": 0.0984, "step": 21921 }, { "epoch": 2.599549389303925, "grad_norm": 0.644643684277401, "learning_rate": 1.4422459037992681e-05, "loss": 0.0889, "step": 21922 }, { "epoch": 2.59966797106605, "grad_norm": 0.7646363635114254, "learning_rate": 1.4420284162187229e-05, "loss": 0.1104, "step": 21923 }, { "epoch": 2.599786552828175, "grad_norm": 0.46875542066733955, "learning_rate": 1.441810938391378e-05, "loss": 0.0625, "step": 21924 }, { "epoch": 2.5999051345903, "grad_norm": 0.8271353749623191, "learning_rate": 1.441593470319239e-05, "loss": 0.0931, "step": 21925 }, { "epoch": 2.600023716352425, "grad_norm": 0.6143227556634457, "learning_rate": 1.4413760120043115e-05, "loss": 0.0681, "step": 21926 }, { "epoch": 2.60014229811455, "grad_norm": 0.5652176239646199, "learning_rate": 1.441158563448598e-05, "loss": 0.0598, "step": 21927 }, { "epoch": 2.600260879876675, "grad_norm": 0.7572079283942202, "learning_rate": 1.4409411246541038e-05, "loss": 0.0815, "step": 21928 }, { "epoch": 2.6003794616388, "grad_norm": 0.5840458806668437, "learning_rate": 1.4407236956228337e-05, "loss": 0.0672, "step": 21929 }, { "epoch": 2.600498043400925, "grad_norm": 0.5487847173046987, "learning_rate": 1.4405062763567923e-05, "loss": 0.078, "step": 21930 }, { "epoch": 2.60061662516305, "grad_norm": 0.7255678808337844, "learning_rate": 1.4402888668579847e-05, "loss": 0.0787, "step": 21931 }, { "epoch": 2.600735206925175, "grad_norm": 0.5757746528575257, "learning_rate": 1.4400714671284123e-05, "loss": 0.0563, "step": 21932 }, { "epoch": 2.6008537886872998, "grad_norm": 0.6369798685970102, "learning_rate": 1.4398540771700833e-05, "loss": 0.0876, "step": 21933 }, { "epoch": 2.600972370449425, "grad_norm": 1.1328039052713466, "learning_rate": 1.4396366969849987e-05, "loss": 0.1532, "step": 21934 }, { "epoch": 2.6010909522115497, "grad_norm": 0.5510183329627586, "learning_rate": 1.4394193265751631e-05, "loss": 0.0809, "step": 21935 }, { "epoch": 2.601209533973675, "grad_norm": 0.6855714177168133, "learning_rate": 1.4392019659425813e-05, "loss": 0.0882, "step": 21936 }, { "epoch": 2.6013281157357997, "grad_norm": 0.4818063336760342, "learning_rate": 1.4389846150892572e-05, "loss": 0.067, "step": 21937 }, { "epoch": 2.601446697497925, "grad_norm": 0.4500511656433213, "learning_rate": 1.4387672740171926e-05, "loss": 0.0506, "step": 21938 }, { "epoch": 2.6015652792600497, "grad_norm": 0.565833215314141, "learning_rate": 1.4385499427283927e-05, "loss": 0.0745, "step": 21939 }, { "epoch": 2.601683861022175, "grad_norm": 0.7891899105527502, "learning_rate": 1.4383326212248607e-05, "loss": 0.1173, "step": 21940 }, { "epoch": 2.6018024427842996, "grad_norm": 0.4390213237851913, "learning_rate": 1.4381153095086e-05, "loss": 0.0516, "step": 21941 }, { "epoch": 2.601921024546425, "grad_norm": 0.832882930714744, "learning_rate": 1.4378980075816152e-05, "loss": 0.1084, "step": 21942 }, { "epoch": 2.6020396063085496, "grad_norm": 0.6157465995086525, "learning_rate": 1.4376807154459057e-05, "loss": 0.0561, "step": 21943 }, { "epoch": 2.602158188070675, "grad_norm": 1.0238998496398375, "learning_rate": 1.4374634331034798e-05, "loss": 0.1251, "step": 21944 }, { "epoch": 2.6022767698328, "grad_norm": 0.43801504308296424, "learning_rate": 1.437246160556337e-05, "loss": 0.056, "step": 21945 }, { "epoch": 2.6023953515949247, "grad_norm": 0.6480094903932844, "learning_rate": 1.4370288978064822e-05, "loss": 0.098, "step": 21946 }, { "epoch": 2.6025139333570495, "grad_norm": 0.8342263616727594, "learning_rate": 1.4368116448559154e-05, "loss": 0.127, "step": 21947 }, { "epoch": 2.6026325151191747, "grad_norm": 0.7280111786339267, "learning_rate": 1.4365944017066438e-05, "loss": 0.096, "step": 21948 }, { "epoch": 2.6027510968813, "grad_norm": 0.7756930917084097, "learning_rate": 1.4363771683606666e-05, "loss": 0.0889, "step": 21949 }, { "epoch": 2.6028696786434247, "grad_norm": 0.4282781177434303, "learning_rate": 1.4361599448199875e-05, "loss": 0.063, "step": 21950 }, { "epoch": 2.6029882604055494, "grad_norm": 1.00980511020672, "learning_rate": 1.4359427310866095e-05, "loss": 0.085, "step": 21951 }, { "epoch": 2.6031068421676746, "grad_norm": 0.7939143629600948, "learning_rate": 1.4357255271625344e-05, "loss": 0.1146, "step": 21952 }, { "epoch": 2.6032254239298, "grad_norm": 0.6210729608485229, "learning_rate": 1.435508333049766e-05, "loss": 0.0652, "step": 21953 }, { "epoch": 2.6033440056919246, "grad_norm": 0.45851687091808857, "learning_rate": 1.4352911487503033e-05, "loss": 0.0539, "step": 21954 }, { "epoch": 2.6034625874540493, "grad_norm": 0.8487725312404782, "learning_rate": 1.4350739742661523e-05, "loss": 0.108, "step": 21955 }, { "epoch": 2.6035811692161746, "grad_norm": 0.571299010340367, "learning_rate": 1.4348568095993128e-05, "loss": 0.0728, "step": 21956 }, { "epoch": 2.6036997509782998, "grad_norm": 0.6429748982808718, "learning_rate": 1.4346396547517872e-05, "loss": 0.0901, "step": 21957 }, { "epoch": 2.6038183327404245, "grad_norm": 0.6871948818640818, "learning_rate": 1.4344225097255775e-05, "loss": 0.0884, "step": 21958 }, { "epoch": 2.6039369145025493, "grad_norm": 0.5465864760181628, "learning_rate": 1.4342053745226857e-05, "loss": 0.0668, "step": 21959 }, { "epoch": 2.6040554962646745, "grad_norm": 0.7065958871326251, "learning_rate": 1.433988249145114e-05, "loss": 0.1228, "step": 21960 }, { "epoch": 2.6041740780267997, "grad_norm": 0.7608516475750442, "learning_rate": 1.4337711335948629e-05, "loss": 0.1064, "step": 21961 }, { "epoch": 2.6042926597889244, "grad_norm": 0.6477986701724795, "learning_rate": 1.4335540278739342e-05, "loss": 0.069, "step": 21962 }, { "epoch": 2.604411241551049, "grad_norm": 0.7973911132692165, "learning_rate": 1.4333369319843298e-05, "loss": 0.1099, "step": 21963 }, { "epoch": 2.6045298233131744, "grad_norm": 0.43892974169749177, "learning_rate": 1.4331198459280514e-05, "loss": 0.0731, "step": 21964 }, { "epoch": 2.6046484050752996, "grad_norm": 0.47568498593935515, "learning_rate": 1.4329027697070982e-05, "loss": 0.0636, "step": 21965 }, { "epoch": 2.6047669868374244, "grad_norm": 0.6652970359918451, "learning_rate": 1.4326857033234748e-05, "loss": 0.0782, "step": 21966 }, { "epoch": 2.604885568599549, "grad_norm": 0.6876341742636708, "learning_rate": 1.4324686467791792e-05, "loss": 0.1046, "step": 21967 }, { "epoch": 2.6050041503616743, "grad_norm": 0.7700344042803501, "learning_rate": 1.4322516000762137e-05, "loss": 0.1185, "step": 21968 }, { "epoch": 2.6051227321237995, "grad_norm": 0.4763278630152302, "learning_rate": 1.4320345632165793e-05, "loss": 0.0683, "step": 21969 }, { "epoch": 2.6052413138859243, "grad_norm": 0.8046687184194397, "learning_rate": 1.4318175362022762e-05, "loss": 0.1074, "step": 21970 }, { "epoch": 2.6053598956480495, "grad_norm": 0.6572968857727476, "learning_rate": 1.4316005190353069e-05, "loss": 0.0802, "step": 21971 }, { "epoch": 2.6054784774101742, "grad_norm": 0.6869110047228827, "learning_rate": 1.4313835117176694e-05, "loss": 0.0885, "step": 21972 }, { "epoch": 2.6055970591722994, "grad_norm": 0.7370054033158944, "learning_rate": 1.4311665142513656e-05, "loss": 0.0914, "step": 21973 }, { "epoch": 2.605715640934424, "grad_norm": 0.7933736332902336, "learning_rate": 1.4309495266383958e-05, "loss": 0.114, "step": 21974 }, { "epoch": 2.6058342226965494, "grad_norm": 0.7626327331658775, "learning_rate": 1.4307325488807616e-05, "loss": 0.091, "step": 21975 }, { "epoch": 2.605952804458674, "grad_norm": 0.6335595301907293, "learning_rate": 1.4305155809804608e-05, "loss": 0.076, "step": 21976 }, { "epoch": 2.6060713862207994, "grad_norm": 0.6520711288707952, "learning_rate": 1.4302986229394949e-05, "loss": 0.1014, "step": 21977 }, { "epoch": 2.606189967982924, "grad_norm": 0.6074123041009495, "learning_rate": 1.4300816747598639e-05, "loss": 0.0824, "step": 21978 }, { "epoch": 2.6063085497450493, "grad_norm": 1.0066834251479257, "learning_rate": 1.429864736443568e-05, "loss": 0.1294, "step": 21979 }, { "epoch": 2.606427131507174, "grad_norm": 0.6652064684411911, "learning_rate": 1.4296478079926067e-05, "loss": 0.107, "step": 21980 }, { "epoch": 2.6065457132692993, "grad_norm": 0.7353842707089959, "learning_rate": 1.4294308894089803e-05, "loss": 0.1129, "step": 21981 }, { "epoch": 2.606664295031424, "grad_norm": 0.6634663998500046, "learning_rate": 1.4292139806946894e-05, "loss": 0.0752, "step": 21982 }, { "epoch": 2.6067828767935493, "grad_norm": 0.6868012436688146, "learning_rate": 1.4289970818517312e-05, "loss": 0.1075, "step": 21983 }, { "epoch": 2.606901458555674, "grad_norm": 0.863441761624392, "learning_rate": 1.4287801928821068e-05, "loss": 0.1009, "step": 21984 }, { "epoch": 2.607020040317799, "grad_norm": 0.7747307251873706, "learning_rate": 1.4285633137878152e-05, "loss": 0.1081, "step": 21985 }, { "epoch": 2.607138622079924, "grad_norm": 0.7454826861950284, "learning_rate": 1.4283464445708572e-05, "loss": 0.088, "step": 21986 }, { "epoch": 2.607257203842049, "grad_norm": 0.6552469762355371, "learning_rate": 1.4281295852332297e-05, "loss": 0.081, "step": 21987 }, { "epoch": 2.607375785604174, "grad_norm": 0.9688757896919075, "learning_rate": 1.4279127357769328e-05, "loss": 0.1298, "step": 21988 }, { "epoch": 2.607494367366299, "grad_norm": 0.6318960274674293, "learning_rate": 1.427695896203966e-05, "loss": 0.1037, "step": 21989 }, { "epoch": 2.607612949128424, "grad_norm": 0.7072657134780546, "learning_rate": 1.4274790665163281e-05, "loss": 0.0971, "step": 21990 }, { "epoch": 2.607731530890549, "grad_norm": 0.4987905580284949, "learning_rate": 1.427262246716019e-05, "loss": 0.0686, "step": 21991 }, { "epoch": 2.607850112652674, "grad_norm": 0.6960246929704086, "learning_rate": 1.4270454368050345e-05, "loss": 0.0993, "step": 21992 }, { "epoch": 2.607968694414799, "grad_norm": 0.858453407148519, "learning_rate": 1.4268286367853773e-05, "loss": 0.1089, "step": 21993 }, { "epoch": 2.6080872761769243, "grad_norm": 0.6508930480242765, "learning_rate": 1.4266118466590433e-05, "loss": 0.1034, "step": 21994 }, { "epoch": 2.608205857939049, "grad_norm": 1.0798617016953551, "learning_rate": 1.4263950664280314e-05, "loss": 0.1197, "step": 21995 }, { "epoch": 2.608324439701174, "grad_norm": 0.6551486599253281, "learning_rate": 1.4261782960943407e-05, "loss": 0.0878, "step": 21996 }, { "epoch": 2.608443021463299, "grad_norm": 0.3718019515094267, "learning_rate": 1.4259615356599704e-05, "loss": 0.0534, "step": 21997 }, { "epoch": 2.608561603225424, "grad_norm": 0.8029631478372832, "learning_rate": 1.4257447851269163e-05, "loss": 0.1027, "step": 21998 }, { "epoch": 2.608680184987549, "grad_norm": 0.608005073678563, "learning_rate": 1.425528044497178e-05, "loss": 0.0692, "step": 21999 }, { "epoch": 2.6087987667496737, "grad_norm": 0.5077167697640207, "learning_rate": 1.4253113137727538e-05, "loss": 0.0853, "step": 22000 }, { "epoch": 2.608917348511799, "grad_norm": 0.6519510863812491, "learning_rate": 1.4250945929556414e-05, "loss": 0.0933, "step": 22001 }, { "epoch": 2.609035930273924, "grad_norm": 0.7530685576257208, "learning_rate": 1.4248778820478395e-05, "loss": 0.1075, "step": 22002 }, { "epoch": 2.609154512036049, "grad_norm": 0.6632582899756148, "learning_rate": 1.4246611810513432e-05, "loss": 0.1006, "step": 22003 }, { "epoch": 2.6092730937981736, "grad_norm": 0.8692552724377568, "learning_rate": 1.4244444899681541e-05, "loss": 0.1206, "step": 22004 }, { "epoch": 2.609391675560299, "grad_norm": 0.37423051511025546, "learning_rate": 1.424227808800267e-05, "loss": 0.0512, "step": 22005 }, { "epoch": 2.609510257322424, "grad_norm": 0.5827624088149662, "learning_rate": 1.4240111375496803e-05, "loss": 0.0781, "step": 22006 }, { "epoch": 2.609628839084549, "grad_norm": 0.8349939495767131, "learning_rate": 1.4237944762183914e-05, "loss": 0.0949, "step": 22007 }, { "epoch": 2.6097474208466735, "grad_norm": 0.5017979559421278, "learning_rate": 1.4235778248083975e-05, "loss": 0.0825, "step": 22008 }, { "epoch": 2.6098660026087988, "grad_norm": 0.6201845822195293, "learning_rate": 1.4233611833216975e-05, "loss": 0.0868, "step": 22009 }, { "epoch": 2.609984584370924, "grad_norm": 0.6589119723773808, "learning_rate": 1.4231445517602849e-05, "loss": 0.1032, "step": 22010 }, { "epoch": 2.6101031661330487, "grad_norm": 0.8378868432393067, "learning_rate": 1.4229279301261608e-05, "loss": 0.1373, "step": 22011 }, { "epoch": 2.6102217478951735, "grad_norm": 0.6032792385627211, "learning_rate": 1.4227113184213198e-05, "loss": 0.1029, "step": 22012 }, { "epoch": 2.6103403296572987, "grad_norm": 1.206624222905882, "learning_rate": 1.4224947166477597e-05, "loss": 0.178, "step": 22013 }, { "epoch": 2.610458911419424, "grad_norm": 0.6933874492398453, "learning_rate": 1.4222781248074758e-05, "loss": 0.1075, "step": 22014 }, { "epoch": 2.6105774931815486, "grad_norm": 0.9234596145340672, "learning_rate": 1.4220615429024675e-05, "loss": 0.127, "step": 22015 }, { "epoch": 2.6106960749436734, "grad_norm": 0.3973387888002447, "learning_rate": 1.4218449709347293e-05, "loss": 0.0613, "step": 22016 }, { "epoch": 2.6108146567057986, "grad_norm": 0.696097475169207, "learning_rate": 1.4216284089062581e-05, "loss": 0.0808, "step": 22017 }, { "epoch": 2.610933238467924, "grad_norm": 0.5859736349945868, "learning_rate": 1.4214118568190505e-05, "loss": 0.0906, "step": 22018 }, { "epoch": 2.6110518202300486, "grad_norm": 0.659072906313902, "learning_rate": 1.4211953146751028e-05, "loss": 0.0685, "step": 22019 }, { "epoch": 2.6111704019921738, "grad_norm": 0.7545274015083119, "learning_rate": 1.4209787824764126e-05, "loss": 0.1192, "step": 22020 }, { "epoch": 2.6112889837542985, "grad_norm": 0.8126813519890249, "learning_rate": 1.4207622602249732e-05, "loss": 0.135, "step": 22021 }, { "epoch": 2.6114075655164237, "grad_norm": 0.7879401780304492, "learning_rate": 1.4205457479227839e-05, "loss": 0.1172, "step": 22022 }, { "epoch": 2.6115261472785485, "grad_norm": 0.8057281858446868, "learning_rate": 1.420329245571838e-05, "loss": 0.0952, "step": 22023 }, { "epoch": 2.6116447290406737, "grad_norm": 0.5951317139414, "learning_rate": 1.4201127531741337e-05, "loss": 0.0715, "step": 22024 }, { "epoch": 2.6117633108027984, "grad_norm": 0.636747818244709, "learning_rate": 1.4198962707316635e-05, "loss": 0.0661, "step": 22025 }, { "epoch": 2.6118818925649236, "grad_norm": 0.6460736343122466, "learning_rate": 1.4196797982464272e-05, "loss": 0.097, "step": 22026 }, { "epoch": 2.6120004743270484, "grad_norm": 0.6414859164814994, "learning_rate": 1.4194633357204173e-05, "loss": 0.0926, "step": 22027 }, { "epoch": 2.6121190560891736, "grad_norm": 0.5986514588944696, "learning_rate": 1.4192468831556304e-05, "loss": 0.0885, "step": 22028 }, { "epoch": 2.6122376378512984, "grad_norm": 0.4950773419880529, "learning_rate": 1.419030440554062e-05, "loss": 0.0834, "step": 22029 }, { "epoch": 2.6123562196134236, "grad_norm": 0.45262106066950664, "learning_rate": 1.4188140079177072e-05, "loss": 0.0666, "step": 22030 }, { "epoch": 2.6124748013755483, "grad_norm": 0.5863136558830888, "learning_rate": 1.4185975852485628e-05, "loss": 0.0759, "step": 22031 }, { "epoch": 2.6125933831376735, "grad_norm": 0.5438509320253987, "learning_rate": 1.4183811725486213e-05, "loss": 0.0798, "step": 22032 }, { "epoch": 2.6127119648997983, "grad_norm": 0.8348074449100498, "learning_rate": 1.4181647698198791e-05, "loss": 0.1278, "step": 22033 }, { "epoch": 2.6128305466619235, "grad_norm": 0.6310382253071126, "learning_rate": 1.4179483770643309e-05, "loss": 0.0892, "step": 22034 }, { "epoch": 2.6129491284240483, "grad_norm": 0.5378481003319695, "learning_rate": 1.4177319942839728e-05, "loss": 0.084, "step": 22035 }, { "epoch": 2.6130677101861735, "grad_norm": 0.6085625805121494, "learning_rate": 1.4175156214807966e-05, "loss": 0.094, "step": 22036 }, { "epoch": 2.613186291948298, "grad_norm": 0.586867649946835, "learning_rate": 1.4172992586568013e-05, "loss": 0.0762, "step": 22037 }, { "epoch": 2.6133048737104234, "grad_norm": 0.8887690859236409, "learning_rate": 1.4170829058139775e-05, "loss": 0.1368, "step": 22038 }, { "epoch": 2.613423455472548, "grad_norm": 0.45566621924585266, "learning_rate": 1.4168665629543218e-05, "loss": 0.0696, "step": 22039 }, { "epoch": 2.6135420372346734, "grad_norm": 0.7466442645651531, "learning_rate": 1.4166502300798279e-05, "loss": 0.0941, "step": 22040 }, { "epoch": 2.613660618996798, "grad_norm": 1.3313633301940182, "learning_rate": 1.4164339071924906e-05, "loss": 0.1287, "step": 22041 }, { "epoch": 2.6137792007589233, "grad_norm": 0.33963971941990795, "learning_rate": 1.4162175942943048e-05, "loss": 0.0413, "step": 22042 }, { "epoch": 2.613897782521048, "grad_norm": 0.9437286618295434, "learning_rate": 1.4160012913872628e-05, "loss": 0.1296, "step": 22043 }, { "epoch": 2.6140163642831733, "grad_norm": 0.7776980464062251, "learning_rate": 1.4157849984733595e-05, "loss": 0.105, "step": 22044 }, { "epoch": 2.614134946045298, "grad_norm": 0.49808928722597695, "learning_rate": 1.4155687155545893e-05, "loss": 0.0691, "step": 22045 }, { "epoch": 2.6142535278074233, "grad_norm": 0.6025463951437224, "learning_rate": 1.4153524426329461e-05, "loss": 0.0878, "step": 22046 }, { "epoch": 2.6143721095695485, "grad_norm": 0.43384403483705175, "learning_rate": 1.4151361797104229e-05, "loss": 0.0649, "step": 22047 }, { "epoch": 2.6144906913316732, "grad_norm": 0.3993971393882321, "learning_rate": 1.4149199267890133e-05, "loss": 0.0467, "step": 22048 }, { "epoch": 2.614609273093798, "grad_norm": 0.7310734113482398, "learning_rate": 1.414703683870712e-05, "loss": 0.1037, "step": 22049 }, { "epoch": 2.614727854855923, "grad_norm": 0.42887847621311836, "learning_rate": 1.4144874509575112e-05, "loss": 0.0526, "step": 22050 }, { "epoch": 2.6148464366180484, "grad_norm": 0.6403932077826392, "learning_rate": 1.4142712280514053e-05, "loss": 0.0968, "step": 22051 }, { "epoch": 2.614965018380173, "grad_norm": 0.6661826163351403, "learning_rate": 1.4140550151543874e-05, "loss": 0.0984, "step": 22052 }, { "epoch": 2.615083600142298, "grad_norm": 0.4466440038848482, "learning_rate": 1.4138388122684515e-05, "loss": 0.0503, "step": 22053 }, { "epoch": 2.615202181904423, "grad_norm": 0.9054708551669358, "learning_rate": 1.4136226193955888e-05, "loss": 0.1356, "step": 22054 }, { "epoch": 2.6153207636665483, "grad_norm": 0.670468585587432, "learning_rate": 1.413406436537793e-05, "loss": 0.1093, "step": 22055 }, { "epoch": 2.615439345428673, "grad_norm": 0.557879932887071, "learning_rate": 1.4131902636970575e-05, "loss": 0.0617, "step": 22056 }, { "epoch": 2.615557927190798, "grad_norm": 0.6936947291936187, "learning_rate": 1.4129741008753755e-05, "loss": 0.0976, "step": 22057 }, { "epoch": 2.615676508952923, "grad_norm": 0.8842025207013137, "learning_rate": 1.41275794807474e-05, "loss": 0.1049, "step": 22058 }, { "epoch": 2.6157950907150482, "grad_norm": 0.49721401181884883, "learning_rate": 1.4125418052971407e-05, "loss": 0.0676, "step": 22059 }, { "epoch": 2.615913672477173, "grad_norm": 0.9241849166727963, "learning_rate": 1.4123256725445747e-05, "loss": 0.1305, "step": 22060 }, { "epoch": 2.6160322542392977, "grad_norm": 0.6968014938969033, "learning_rate": 1.4121095498190313e-05, "loss": 0.1008, "step": 22061 }, { "epoch": 2.616150836001423, "grad_norm": 0.4150478859714025, "learning_rate": 1.4118934371225045e-05, "loss": 0.0575, "step": 22062 }, { "epoch": 2.616269417763548, "grad_norm": 0.5677525816627891, "learning_rate": 1.411677334456984e-05, "loss": 0.0838, "step": 22063 }, { "epoch": 2.616387999525673, "grad_norm": 0.5824702041921683, "learning_rate": 1.411461241824466e-05, "loss": 0.0689, "step": 22064 }, { "epoch": 2.6165065812877977, "grad_norm": 0.6662346225076204, "learning_rate": 1.4112451592269393e-05, "loss": 0.0856, "step": 22065 }, { "epoch": 2.616625163049923, "grad_norm": 0.725868937504414, "learning_rate": 1.4110290866663973e-05, "loss": 0.0944, "step": 22066 }, { "epoch": 2.616743744812048, "grad_norm": 0.8628552457425012, "learning_rate": 1.4108130241448314e-05, "loss": 0.1119, "step": 22067 }, { "epoch": 2.616862326574173, "grad_norm": 0.8290815162911118, "learning_rate": 1.4105969716642342e-05, "loss": 0.1126, "step": 22068 }, { "epoch": 2.616980908336298, "grad_norm": 0.45724158994630126, "learning_rate": 1.4103809292265979e-05, "loss": 0.0624, "step": 22069 }, { "epoch": 2.617099490098423, "grad_norm": 0.8173260661929446, "learning_rate": 1.410164896833911e-05, "loss": 0.12, "step": 22070 }, { "epoch": 2.617218071860548, "grad_norm": 0.5891285068889657, "learning_rate": 1.4099488744881695e-05, "loss": 0.0793, "step": 22071 }, { "epoch": 2.6173366536226728, "grad_norm": 0.9957060998999003, "learning_rate": 1.4097328621913616e-05, "loss": 0.0987, "step": 22072 }, { "epoch": 2.617455235384798, "grad_norm": 0.8345897874678252, "learning_rate": 1.4095168599454808e-05, "loss": 0.1216, "step": 22073 }, { "epoch": 2.6175738171469227, "grad_norm": 0.8425111960352636, "learning_rate": 1.4093008677525155e-05, "loss": 0.1122, "step": 22074 }, { "epoch": 2.617692398909048, "grad_norm": 0.7685647131298633, "learning_rate": 1.4090848856144603e-05, "loss": 0.1128, "step": 22075 }, { "epoch": 2.6178109806711727, "grad_norm": 1.187519512795012, "learning_rate": 1.4088689135333039e-05, "loss": 0.1396, "step": 22076 }, { "epoch": 2.617929562433298, "grad_norm": 0.6053539248092762, "learning_rate": 1.408652951511038e-05, "loss": 0.0605, "step": 22077 }, { "epoch": 2.6180481441954226, "grad_norm": 0.6977890849943897, "learning_rate": 1.4084369995496537e-05, "loss": 0.1021, "step": 22078 }, { "epoch": 2.618166725957548, "grad_norm": 0.7414447879185294, "learning_rate": 1.4082210576511418e-05, "loss": 0.103, "step": 22079 }, { "epoch": 2.6182853077196726, "grad_norm": 0.7797282385648604, "learning_rate": 1.4080051258174936e-05, "loss": 0.0934, "step": 22080 }, { "epoch": 2.618403889481798, "grad_norm": 0.4645408528289416, "learning_rate": 1.4077892040506973e-05, "loss": 0.0644, "step": 22081 }, { "epoch": 2.6185224712439226, "grad_norm": 0.695107456979117, "learning_rate": 1.4075732923527474e-05, "loss": 0.0915, "step": 22082 }, { "epoch": 2.6186410530060478, "grad_norm": 0.933684612310306, "learning_rate": 1.4073573907256305e-05, "loss": 0.1398, "step": 22083 }, { "epoch": 2.6187596347681725, "grad_norm": 0.5130376230187196, "learning_rate": 1.40714149917134e-05, "loss": 0.0699, "step": 22084 }, { "epoch": 2.6188782165302977, "grad_norm": 0.7611352087072213, "learning_rate": 1.4069256176918627e-05, "loss": 0.1071, "step": 22085 }, { "epoch": 2.6189967982924225, "grad_norm": 0.4084123432899418, "learning_rate": 1.4067097462891926e-05, "loss": 0.064, "step": 22086 }, { "epoch": 2.6191153800545477, "grad_norm": 0.5315349710766702, "learning_rate": 1.406493884965317e-05, "loss": 0.0659, "step": 22087 }, { "epoch": 2.6192339618166725, "grad_norm": 0.9267917092551091, "learning_rate": 1.406278033722227e-05, "loss": 0.1384, "step": 22088 }, { "epoch": 2.6193525435787977, "grad_norm": 0.674446128634552, "learning_rate": 1.4060621925619122e-05, "loss": 0.0866, "step": 22089 }, { "epoch": 2.6194711253409224, "grad_norm": 0.7215348650471252, "learning_rate": 1.4058463614863627e-05, "loss": 0.0997, "step": 22090 }, { "epoch": 2.6195897071030476, "grad_norm": 0.4896739959868906, "learning_rate": 1.405630540497569e-05, "loss": 0.0707, "step": 22091 }, { "epoch": 2.6197082888651724, "grad_norm": 1.2320983608961948, "learning_rate": 1.4054147295975174e-05, "loss": 0.122, "step": 22092 }, { "epoch": 2.6198268706272976, "grad_norm": 0.7910570985542978, "learning_rate": 1.4051989287882016e-05, "loss": 0.1068, "step": 22093 }, { "epoch": 2.6199454523894223, "grad_norm": 0.5944947965236823, "learning_rate": 1.4049831380716089e-05, "loss": 0.0765, "step": 22094 }, { "epoch": 2.6200640341515475, "grad_norm": 0.7900748295899002, "learning_rate": 1.4047673574497292e-05, "loss": 0.1025, "step": 22095 }, { "epoch": 2.6201826159136727, "grad_norm": 0.8484444588310283, "learning_rate": 1.4045515869245496e-05, "loss": 0.1282, "step": 22096 }, { "epoch": 2.6203011976757975, "grad_norm": 0.5091275260517677, "learning_rate": 1.404335826498063e-05, "loss": 0.078, "step": 22097 }, { "epoch": 2.6204197794379223, "grad_norm": 0.5256236901404228, "learning_rate": 1.4041200761722556e-05, "loss": 0.0582, "step": 22098 }, { "epoch": 2.6205383612000475, "grad_norm": 0.9043511999296037, "learning_rate": 1.4039043359491172e-05, "loss": 0.1259, "step": 22099 }, { "epoch": 2.6206569429621727, "grad_norm": 0.54798289071529, "learning_rate": 1.4036886058306364e-05, "loss": 0.0815, "step": 22100 }, { "epoch": 2.6207755247242974, "grad_norm": 0.6936639019885543, "learning_rate": 1.4034728858188029e-05, "loss": 0.0915, "step": 22101 }, { "epoch": 2.620894106486422, "grad_norm": 0.45931446433298817, "learning_rate": 1.4032571759156051e-05, "loss": 0.0682, "step": 22102 }, { "epoch": 2.6210126882485474, "grad_norm": 0.9087489195381161, "learning_rate": 1.4030414761230304e-05, "loss": 0.1403, "step": 22103 }, { "epoch": 2.6211312700106726, "grad_norm": 0.5179112105506419, "learning_rate": 1.4028257864430682e-05, "loss": 0.0675, "step": 22104 }, { "epoch": 2.6212498517727973, "grad_norm": 0.6644394434462044, "learning_rate": 1.402610106877707e-05, "loss": 0.1117, "step": 22105 }, { "epoch": 2.621368433534922, "grad_norm": 0.7344412545105654, "learning_rate": 1.4023944374289358e-05, "loss": 0.0993, "step": 22106 }, { "epoch": 2.6214870152970473, "grad_norm": 0.7893790326316028, "learning_rate": 1.40217877809874e-05, "loss": 0.104, "step": 22107 }, { "epoch": 2.6216055970591725, "grad_norm": 0.4613142734995136, "learning_rate": 1.401963128889111e-05, "loss": 0.0616, "step": 22108 }, { "epoch": 2.6217241788212973, "grad_norm": 0.44862839606781835, "learning_rate": 1.401747489802035e-05, "loss": 0.0582, "step": 22109 }, { "epoch": 2.621842760583422, "grad_norm": 0.9686913155559255, "learning_rate": 1.4015318608395006e-05, "loss": 0.1282, "step": 22110 }, { "epoch": 2.6219613423455472, "grad_norm": 0.6818754974363416, "learning_rate": 1.4013162420034947e-05, "loss": 0.0846, "step": 22111 }, { "epoch": 2.6220799241076724, "grad_norm": 0.6098538300646913, "learning_rate": 1.4011006332960064e-05, "loss": 0.0886, "step": 22112 }, { "epoch": 2.622198505869797, "grad_norm": 0.7095376129649297, "learning_rate": 1.4008850347190234e-05, "loss": 0.1306, "step": 22113 }, { "epoch": 2.622317087631922, "grad_norm": 0.7763266087302444, "learning_rate": 1.4006694462745318e-05, "loss": 0.0853, "step": 22114 }, { "epoch": 2.622435669394047, "grad_norm": 0.8839987414679412, "learning_rate": 1.4004538679645197e-05, "loss": 0.1295, "step": 22115 }, { "epoch": 2.6225542511561724, "grad_norm": 0.6589532805745818, "learning_rate": 1.4002382997909743e-05, "loss": 0.0891, "step": 22116 }, { "epoch": 2.622672832918297, "grad_norm": 0.5212228359633704, "learning_rate": 1.4000227417558834e-05, "loss": 0.0751, "step": 22117 }, { "epoch": 2.6227914146804223, "grad_norm": 0.5788874289351714, "learning_rate": 1.399807193861235e-05, "loss": 0.0665, "step": 22118 }, { "epoch": 2.622909996442547, "grad_norm": 0.8174172432377894, "learning_rate": 1.399591656109013e-05, "loss": 0.0866, "step": 22119 }, { "epoch": 2.6230285782046723, "grad_norm": 0.626944515945624, "learning_rate": 1.3993761285012085e-05, "loss": 0.0898, "step": 22120 }, { "epoch": 2.623147159966797, "grad_norm": 0.41718093694748926, "learning_rate": 1.3991606110398056e-05, "loss": 0.0575, "step": 22121 }, { "epoch": 2.6232657417289222, "grad_norm": 0.78267180733908, "learning_rate": 1.3989451037267918e-05, "loss": 0.0943, "step": 22122 }, { "epoch": 2.623384323491047, "grad_norm": 0.7604390336073532, "learning_rate": 1.398729606564154e-05, "loss": 0.0914, "step": 22123 }, { "epoch": 2.623502905253172, "grad_norm": 0.7439342274590975, "learning_rate": 1.3985141195538797e-05, "loss": 0.0838, "step": 22124 }, { "epoch": 2.623621487015297, "grad_norm": 0.9279847640735366, "learning_rate": 1.3982986426979534e-05, "loss": 0.1451, "step": 22125 }, { "epoch": 2.623740068777422, "grad_norm": 0.6122406797529125, "learning_rate": 1.3980831759983625e-05, "loss": 0.0955, "step": 22126 }, { "epoch": 2.623858650539547, "grad_norm": 0.443038232159116, "learning_rate": 1.3978677194570938e-05, "loss": 0.0642, "step": 22127 }, { "epoch": 2.623977232301672, "grad_norm": 0.8851124787130357, "learning_rate": 1.3976522730761332e-05, "loss": 0.1, "step": 22128 }, { "epoch": 2.624095814063797, "grad_norm": 0.9078638729991346, "learning_rate": 1.3974368368574678e-05, "loss": 0.1036, "step": 22129 }, { "epoch": 2.624214395825922, "grad_norm": 0.9007282628903428, "learning_rate": 1.3972214108030807e-05, "loss": 0.1337, "step": 22130 }, { "epoch": 2.624332977588047, "grad_norm": 0.4920367164653129, "learning_rate": 1.3970059949149616e-05, "loss": 0.0673, "step": 22131 }, { "epoch": 2.624451559350172, "grad_norm": 0.5486185623107377, "learning_rate": 1.396790589195094e-05, "loss": 0.0721, "step": 22132 }, { "epoch": 2.624570141112297, "grad_norm": 0.6857830241260149, "learning_rate": 1.3965751936454651e-05, "loss": 0.1077, "step": 22133 }, { "epoch": 2.624688722874422, "grad_norm": 0.9104489564791624, "learning_rate": 1.396359808268058e-05, "loss": 0.1235, "step": 22134 }, { "epoch": 2.6248073046365468, "grad_norm": 0.7978423069121005, "learning_rate": 1.396144433064862e-05, "loss": 0.0873, "step": 22135 }, { "epoch": 2.624925886398672, "grad_norm": 0.7950951662726007, "learning_rate": 1.3959290680378594e-05, "loss": 0.1041, "step": 22136 }, { "epoch": 2.6250444681607967, "grad_norm": 0.9318946152938719, "learning_rate": 1.3957137131890374e-05, "loss": 0.1166, "step": 22137 }, { "epoch": 2.625163049922922, "grad_norm": 0.6637333748308374, "learning_rate": 1.3954983685203807e-05, "loss": 0.0875, "step": 22138 }, { "epoch": 2.6252816316850467, "grad_norm": 0.7707681898836856, "learning_rate": 1.3952830340338746e-05, "loss": 0.1061, "step": 22139 }, { "epoch": 2.625400213447172, "grad_norm": 0.6198413964884286, "learning_rate": 1.3950677097315052e-05, "loss": 0.0877, "step": 22140 }, { "epoch": 2.6255187952092967, "grad_norm": 0.9037560291642205, "learning_rate": 1.3948523956152543e-05, "loss": 0.1028, "step": 22141 }, { "epoch": 2.625637376971422, "grad_norm": 0.507952937411963, "learning_rate": 1.394637091687111e-05, "loss": 0.0603, "step": 22142 }, { "epoch": 2.6257559587335466, "grad_norm": 0.6026737851084257, "learning_rate": 1.3944217979490576e-05, "loss": 0.0795, "step": 22143 }, { "epoch": 2.625874540495672, "grad_norm": 0.9326740997783389, "learning_rate": 1.3942065144030803e-05, "loss": 0.1014, "step": 22144 }, { "epoch": 2.625993122257797, "grad_norm": 0.7672550111175824, "learning_rate": 1.3939912410511612e-05, "loss": 0.0817, "step": 22145 }, { "epoch": 2.6261117040199218, "grad_norm": 0.5568911894386099, "learning_rate": 1.3937759778952883e-05, "loss": 0.0805, "step": 22146 }, { "epoch": 2.6262302857820465, "grad_norm": 0.6049450965403951, "learning_rate": 1.3935607249374433e-05, "loss": 0.0885, "step": 22147 }, { "epoch": 2.6263488675441717, "grad_norm": 0.6264098961009751, "learning_rate": 1.3933454821796118e-05, "loss": 0.0935, "step": 22148 }, { "epoch": 2.626467449306297, "grad_norm": 0.5574360863286957, "learning_rate": 1.393130249623778e-05, "loss": 0.0739, "step": 22149 }, { "epoch": 2.6265860310684217, "grad_norm": 0.5697681261421522, "learning_rate": 1.3929150272719254e-05, "loss": 0.0717, "step": 22150 }, { "epoch": 2.6267046128305465, "grad_norm": 0.4319333482136354, "learning_rate": 1.3926998151260401e-05, "loss": 0.0571, "step": 22151 }, { "epoch": 2.6268231945926717, "grad_norm": 0.4302085906469913, "learning_rate": 1.3924846131881028e-05, "loss": 0.0639, "step": 22152 }, { "epoch": 2.626941776354797, "grad_norm": 0.9720234561848611, "learning_rate": 1.392269421460101e-05, "loss": 0.117, "step": 22153 }, { "epoch": 2.6270603581169216, "grad_norm": 0.7329467712992083, "learning_rate": 1.3920542399440157e-05, "loss": 0.1065, "step": 22154 }, { "epoch": 2.6271789398790464, "grad_norm": 0.7764919990311401, "learning_rate": 1.3918390686418326e-05, "loss": 0.101, "step": 22155 }, { "epoch": 2.6272975216411716, "grad_norm": 0.7912887565834839, "learning_rate": 1.3916239075555326e-05, "loss": 0.0917, "step": 22156 }, { "epoch": 2.627416103403297, "grad_norm": 0.5709022099476014, "learning_rate": 1.3914087566871029e-05, "loss": 0.0857, "step": 22157 }, { "epoch": 2.6275346851654215, "grad_norm": 0.7058198661260656, "learning_rate": 1.3911936160385241e-05, "loss": 0.0857, "step": 22158 }, { "epoch": 2.6276532669275463, "grad_norm": 0.5201187386982704, "learning_rate": 1.3909784856117802e-05, "loss": 0.0738, "step": 22159 }, { "epoch": 2.6277718486896715, "grad_norm": 0.5128811486622034, "learning_rate": 1.3907633654088548e-05, "loss": 0.0797, "step": 22160 }, { "epoch": 2.6278904304517967, "grad_norm": 0.59374815571535, "learning_rate": 1.3905482554317312e-05, "loss": 0.0796, "step": 22161 }, { "epoch": 2.6280090122139215, "grad_norm": 0.7997204202614849, "learning_rate": 1.390333155682393e-05, "loss": 0.1151, "step": 22162 }, { "epoch": 2.6281275939760462, "grad_norm": 0.7630002781916789, "learning_rate": 1.39011806616282e-05, "loss": 0.1117, "step": 22163 }, { "epoch": 2.6282461757381714, "grad_norm": 0.9696008582421144, "learning_rate": 1.3899029868749996e-05, "loss": 0.131, "step": 22164 }, { "epoch": 2.6283647575002966, "grad_norm": 0.5855431068055443, "learning_rate": 1.3896879178209115e-05, "loss": 0.0925, "step": 22165 }, { "epoch": 2.6284833392624214, "grad_norm": 0.4867148816912166, "learning_rate": 1.3894728590025393e-05, "loss": 0.0743, "step": 22166 }, { "epoch": 2.628601921024546, "grad_norm": 0.8491947220708185, "learning_rate": 1.3892578104218657e-05, "loss": 0.1087, "step": 22167 }, { "epoch": 2.6287205027866714, "grad_norm": 0.818668786175448, "learning_rate": 1.3890427720808725e-05, "loss": 0.1066, "step": 22168 }, { "epoch": 2.6288390845487966, "grad_norm": 0.5280692603776892, "learning_rate": 1.388827743981544e-05, "loss": 0.0757, "step": 22169 }, { "epoch": 2.6289576663109213, "grad_norm": 0.6912859236574017, "learning_rate": 1.3886127261258596e-05, "loss": 0.0954, "step": 22170 }, { "epoch": 2.6290762480730465, "grad_norm": 0.7848262301246707, "learning_rate": 1.3883977185158036e-05, "loss": 0.094, "step": 22171 }, { "epoch": 2.6291948298351713, "grad_norm": 0.42876699920622513, "learning_rate": 1.3881827211533572e-05, "loss": 0.0516, "step": 22172 }, { "epoch": 2.6293134115972965, "grad_norm": 0.6051052049434806, "learning_rate": 1.3879677340405037e-05, "loss": 0.0784, "step": 22173 }, { "epoch": 2.6294319933594212, "grad_norm": 0.6948694751839898, "learning_rate": 1.3877527571792231e-05, "loss": 0.1071, "step": 22174 }, { "epoch": 2.6295505751215464, "grad_norm": 0.8161680983241022, "learning_rate": 1.3875377905714981e-05, "loss": 0.1233, "step": 22175 }, { "epoch": 2.629669156883671, "grad_norm": 0.6224221909119257, "learning_rate": 1.3873228342193106e-05, "loss": 0.0936, "step": 22176 }, { "epoch": 2.6297877386457964, "grad_norm": 0.5173701176813528, "learning_rate": 1.3871078881246419e-05, "loss": 0.0555, "step": 22177 }, { "epoch": 2.629906320407921, "grad_norm": 0.7928521031219999, "learning_rate": 1.386892952289474e-05, "loss": 0.108, "step": 22178 }, { "epoch": 2.6300249021700464, "grad_norm": 0.6814001058646414, "learning_rate": 1.3866780267157877e-05, "loss": 0.0707, "step": 22179 }, { "epoch": 2.630143483932171, "grad_norm": 0.4088224181624444, "learning_rate": 1.3864631114055659e-05, "loss": 0.0564, "step": 22180 }, { "epoch": 2.6302620656942963, "grad_norm": 0.9204195170568712, "learning_rate": 1.3862482063607879e-05, "loss": 0.1423, "step": 22181 }, { "epoch": 2.630380647456421, "grad_norm": 0.5431104443226505, "learning_rate": 1.3860333115834356e-05, "loss": 0.0706, "step": 22182 }, { "epoch": 2.6304992292185463, "grad_norm": 0.40779525572753234, "learning_rate": 1.3858184270754903e-05, "loss": 0.0643, "step": 22183 }, { "epoch": 2.630617810980671, "grad_norm": 0.9517822844298176, "learning_rate": 1.3856035528389335e-05, "loss": 0.1234, "step": 22184 }, { "epoch": 2.6307363927427962, "grad_norm": 0.7141798361680881, "learning_rate": 1.3853886888757444e-05, "loss": 0.0745, "step": 22185 }, { "epoch": 2.630854974504921, "grad_norm": 0.5722959320038491, "learning_rate": 1.385173835187905e-05, "loss": 0.084, "step": 22186 }, { "epoch": 2.630973556267046, "grad_norm": 0.7240467350803335, "learning_rate": 1.3849589917773956e-05, "loss": 0.0933, "step": 22187 }, { "epoch": 2.631092138029171, "grad_norm": 0.5359581401396568, "learning_rate": 1.3847441586461968e-05, "loss": 0.0766, "step": 22188 }, { "epoch": 2.631210719791296, "grad_norm": 0.6082267547919531, "learning_rate": 1.3845293357962905e-05, "loss": 0.085, "step": 22189 }, { "epoch": 2.631329301553421, "grad_norm": 0.928895810540593, "learning_rate": 1.3843145232296537e-05, "loss": 0.0921, "step": 22190 }, { "epoch": 2.631447883315546, "grad_norm": 0.779867159925717, "learning_rate": 1.3840997209482708e-05, "loss": 0.1042, "step": 22191 }, { "epoch": 2.631566465077671, "grad_norm": 0.7592112976324823, "learning_rate": 1.3838849289541192e-05, "loss": 0.094, "step": 22192 }, { "epoch": 2.631685046839796, "grad_norm": 0.7860439131070778, "learning_rate": 1.3836701472491797e-05, "loss": 0.0843, "step": 22193 }, { "epoch": 2.6318036286019213, "grad_norm": 0.6402353985840931, "learning_rate": 1.3834553758354326e-05, "loss": 0.0994, "step": 22194 }, { "epoch": 2.631922210364046, "grad_norm": 0.46959561649001574, "learning_rate": 1.3832406147148588e-05, "loss": 0.0637, "step": 22195 }, { "epoch": 2.632040792126171, "grad_norm": 0.5018801484360986, "learning_rate": 1.383025863889436e-05, "loss": 0.057, "step": 22196 }, { "epoch": 2.632159373888296, "grad_norm": 0.6945522281298866, "learning_rate": 1.3828111233611452e-05, "loss": 0.0748, "step": 22197 }, { "epoch": 2.632277955650421, "grad_norm": 0.7304319376483229, "learning_rate": 1.3825963931319658e-05, "loss": 0.1061, "step": 22198 }, { "epoch": 2.632396537412546, "grad_norm": 0.5171804165439179, "learning_rate": 1.3823816732038772e-05, "loss": 0.0903, "step": 22199 }, { "epoch": 2.6325151191746707, "grad_norm": 0.6019883484958491, "learning_rate": 1.3821669635788604e-05, "loss": 0.0697, "step": 22200 }, { "epoch": 2.632633700936796, "grad_norm": 0.6753107879008461, "learning_rate": 1.3819522642588912e-05, "loss": 0.0897, "step": 22201 }, { "epoch": 2.632752282698921, "grad_norm": 0.6199375590575888, "learning_rate": 1.3817375752459527e-05, "loss": 0.0915, "step": 22202 }, { "epoch": 2.632870864461046, "grad_norm": 1.032341743781071, "learning_rate": 1.3815228965420218e-05, "loss": 0.1307, "step": 22203 }, { "epoch": 2.6329894462231707, "grad_norm": 0.634497929145572, "learning_rate": 1.3813082281490794e-05, "loss": 0.0906, "step": 22204 }, { "epoch": 2.633108027985296, "grad_norm": 0.6915668285963579, "learning_rate": 1.3810935700691013e-05, "loss": 0.0876, "step": 22205 }, { "epoch": 2.633226609747421, "grad_norm": 0.9311953356826111, "learning_rate": 1.3808789223040703e-05, "loss": 0.1026, "step": 22206 }, { "epoch": 2.633345191509546, "grad_norm": 0.8039110211361633, "learning_rate": 1.380664284855962e-05, "loss": 0.0805, "step": 22207 }, { "epoch": 2.6334637732716706, "grad_norm": 1.2485535100646803, "learning_rate": 1.3804496577267567e-05, "loss": 0.1522, "step": 22208 }, { "epoch": 2.633582355033796, "grad_norm": 0.8587375744360274, "learning_rate": 1.3802350409184326e-05, "loss": 0.138, "step": 22209 }, { "epoch": 2.633700936795921, "grad_norm": 0.5124076038757015, "learning_rate": 1.380020434432968e-05, "loss": 0.0742, "step": 22210 }, { "epoch": 2.6338195185580457, "grad_norm": 0.6849855549893764, "learning_rate": 1.3798058382723427e-05, "loss": 0.0976, "step": 22211 }, { "epoch": 2.6339381003201705, "grad_norm": 0.5959526373017247, "learning_rate": 1.3795912524385323e-05, "loss": 0.0928, "step": 22212 }, { "epoch": 2.6340566820822957, "grad_norm": 0.6581394563203158, "learning_rate": 1.3793766769335185e-05, "loss": 0.1033, "step": 22213 }, { "epoch": 2.634175263844421, "grad_norm": 0.7360942860469871, "learning_rate": 1.3791621117592762e-05, "loss": 0.0975, "step": 22214 }, { "epoch": 2.6342938456065457, "grad_norm": 0.5996263231738402, "learning_rate": 1.378947556917785e-05, "loss": 0.0953, "step": 22215 }, { "epoch": 2.6344124273686704, "grad_norm": 0.6257862628555317, "learning_rate": 1.3787330124110227e-05, "loss": 0.0647, "step": 22216 }, { "epoch": 2.6345310091307956, "grad_norm": 0.47243843537866426, "learning_rate": 1.3785184782409666e-05, "loss": 0.0776, "step": 22217 }, { "epoch": 2.634649590892921, "grad_norm": 0.7809582307063082, "learning_rate": 1.3783039544095962e-05, "loss": 0.1134, "step": 22218 }, { "epoch": 2.6347681726550456, "grad_norm": 0.5640445780646849, "learning_rate": 1.3780894409188868e-05, "loss": 0.0686, "step": 22219 }, { "epoch": 2.634886754417171, "grad_norm": 0.5487077626956448, "learning_rate": 1.3778749377708173e-05, "loss": 0.0807, "step": 22220 }, { "epoch": 2.6350053361792956, "grad_norm": 0.6614289505482247, "learning_rate": 1.3776604449673641e-05, "loss": 0.0863, "step": 22221 }, { "epoch": 2.6351239179414208, "grad_norm": 0.8117087400258742, "learning_rate": 1.3774459625105068e-05, "loss": 0.1121, "step": 22222 }, { "epoch": 2.6352424997035455, "grad_norm": 0.6731307564216018, "learning_rate": 1.3772314904022193e-05, "loss": 0.0831, "step": 22223 }, { "epoch": 2.6353610814656707, "grad_norm": 0.3668819362489026, "learning_rate": 1.3770170286444822e-05, "loss": 0.0456, "step": 22224 }, { "epoch": 2.6354796632277955, "grad_norm": 0.6231658107247141, "learning_rate": 1.3768025772392701e-05, "loss": 0.0664, "step": 22225 }, { "epoch": 2.6355982449899207, "grad_norm": 0.8109993158603936, "learning_rate": 1.376588136188561e-05, "loss": 0.1143, "step": 22226 }, { "epoch": 2.6357168267520454, "grad_norm": 0.6039973736083312, "learning_rate": 1.3763737054943315e-05, "loss": 0.0867, "step": 22227 }, { "epoch": 2.6358354085141706, "grad_norm": 0.879719824785237, "learning_rate": 1.3761592851585589e-05, "loss": 0.1121, "step": 22228 }, { "epoch": 2.6359539902762954, "grad_norm": 0.6499741106417386, "learning_rate": 1.3759448751832204e-05, "loss": 0.0945, "step": 22229 }, { "epoch": 2.6360725720384206, "grad_norm": 0.468690792165345, "learning_rate": 1.3757304755702904e-05, "loss": 0.0679, "step": 22230 }, { "epoch": 2.6361911538005454, "grad_norm": 0.6297925689834445, "learning_rate": 1.3755160863217472e-05, "loss": 0.0987, "step": 22231 }, { "epoch": 2.6363097355626706, "grad_norm": 0.6773955963661832, "learning_rate": 1.3753017074395666e-05, "loss": 0.0931, "step": 22232 }, { "epoch": 2.6364283173247953, "grad_norm": 1.989505281096847, "learning_rate": 1.3750873389257262e-05, "loss": 0.0777, "step": 22233 }, { "epoch": 2.6365468990869205, "grad_norm": 0.9214782140601401, "learning_rate": 1.3748729807821997e-05, "loss": 0.1094, "step": 22234 }, { "epoch": 2.6366654808490453, "grad_norm": 0.5907870698673934, "learning_rate": 1.3746586330109651e-05, "loss": 0.0773, "step": 22235 }, { "epoch": 2.6367840626111705, "grad_norm": 0.8660722457766189, "learning_rate": 1.3744442956139974e-05, "loss": 0.1238, "step": 22236 }, { "epoch": 2.6369026443732952, "grad_norm": 0.6017584808035219, "learning_rate": 1.3742299685932733e-05, "loss": 0.0842, "step": 22237 }, { "epoch": 2.6370212261354204, "grad_norm": 0.9342749784505846, "learning_rate": 1.3740156519507686e-05, "loss": 0.124, "step": 22238 }, { "epoch": 2.637139807897545, "grad_norm": 0.7779451954055261, "learning_rate": 1.3738013456884585e-05, "loss": 0.1117, "step": 22239 }, { "epoch": 2.6372583896596704, "grad_norm": 0.6230090202151877, "learning_rate": 1.3735870498083198e-05, "loss": 0.1053, "step": 22240 }, { "epoch": 2.637376971421795, "grad_norm": 0.6084111073666847, "learning_rate": 1.3733727643123264e-05, "loss": 0.0766, "step": 22241 }, { "epoch": 2.6374955531839204, "grad_norm": 0.6266536198399347, "learning_rate": 1.3731584892024546e-05, "loss": 0.0867, "step": 22242 }, { "epoch": 2.6376141349460456, "grad_norm": 0.7869616432309493, "learning_rate": 1.3729442244806797e-05, "loss": 0.1064, "step": 22243 }, { "epoch": 2.6377327167081703, "grad_norm": 0.7946330747531307, "learning_rate": 1.3727299701489782e-05, "loss": 0.1305, "step": 22244 }, { "epoch": 2.637851298470295, "grad_norm": 0.6661820777026175, "learning_rate": 1.3725157262093225e-05, "loss": 0.0933, "step": 22245 }, { "epoch": 2.6379698802324203, "grad_norm": 0.7921410157074745, "learning_rate": 1.3723014926636896e-05, "loss": 0.0851, "step": 22246 }, { "epoch": 2.6380884619945455, "grad_norm": 0.7455175831710226, "learning_rate": 1.372087269514054e-05, "loss": 0.1184, "step": 22247 }, { "epoch": 2.6382070437566703, "grad_norm": 0.8625832954143843, "learning_rate": 1.3718730567623905e-05, "loss": 0.1319, "step": 22248 }, { "epoch": 2.638325625518795, "grad_norm": 0.48004858106230847, "learning_rate": 1.371658854410674e-05, "loss": 0.0624, "step": 22249 }, { "epoch": 2.63844420728092, "grad_norm": 0.7348558660078384, "learning_rate": 1.3714446624608793e-05, "loss": 0.0953, "step": 22250 }, { "epoch": 2.6385627890430454, "grad_norm": 0.56136359535958, "learning_rate": 1.3712304809149817e-05, "loss": 0.093, "step": 22251 }, { "epoch": 2.63868137080517, "grad_norm": 0.895492094261596, "learning_rate": 1.3710163097749542e-05, "loss": 0.1314, "step": 22252 }, { "epoch": 2.638799952567295, "grad_norm": 0.9034569066076977, "learning_rate": 1.370802149042772e-05, "loss": 0.0991, "step": 22253 }, { "epoch": 2.63891853432942, "grad_norm": 0.6497242170569917, "learning_rate": 1.3705879987204087e-05, "loss": 0.0741, "step": 22254 }, { "epoch": 2.6390371160915453, "grad_norm": 1.0478984109730474, "learning_rate": 1.3703738588098402e-05, "loss": 0.1002, "step": 22255 }, { "epoch": 2.63915569785367, "grad_norm": 0.5367270213450344, "learning_rate": 1.370159729313039e-05, "loss": 0.0771, "step": 22256 }, { "epoch": 2.639274279615795, "grad_norm": 0.9875703313956785, "learning_rate": 1.369945610231979e-05, "loss": 0.1258, "step": 22257 }, { "epoch": 2.63939286137792, "grad_norm": 0.8600354591529581, "learning_rate": 1.3697315015686352e-05, "loss": 0.124, "step": 22258 }, { "epoch": 2.6395114431400453, "grad_norm": 0.8646319852760859, "learning_rate": 1.3695174033249805e-05, "loss": 0.1329, "step": 22259 }, { "epoch": 2.63963002490217, "grad_norm": 0.8220237193832501, "learning_rate": 1.3693033155029902e-05, "loss": 0.0953, "step": 22260 }, { "epoch": 2.639748606664295, "grad_norm": 0.8368729448048801, "learning_rate": 1.369089238104635e-05, "loss": 0.1075, "step": 22261 }, { "epoch": 2.63986718842642, "grad_norm": 0.5183473037701828, "learning_rate": 1.368875171131892e-05, "loss": 0.0665, "step": 22262 }, { "epoch": 2.639985770188545, "grad_norm": 0.7966170747165653, "learning_rate": 1.3686611145867317e-05, "loss": 0.1227, "step": 22263 }, { "epoch": 2.64010435195067, "grad_norm": 0.727170424746305, "learning_rate": 1.3684470684711287e-05, "loss": 0.0983, "step": 22264 }, { "epoch": 2.6402229337127947, "grad_norm": 0.8324062679521113, "learning_rate": 1.3682330327870563e-05, "loss": 0.1304, "step": 22265 }, { "epoch": 2.64034151547492, "grad_norm": 0.9396823368751351, "learning_rate": 1.3680190075364874e-05, "loss": 0.1108, "step": 22266 }, { "epoch": 2.640460097237045, "grad_norm": 0.6011808344617622, "learning_rate": 1.367804992721396e-05, "loss": 0.0824, "step": 22267 }, { "epoch": 2.64057867899917, "grad_norm": 0.7957646558450844, "learning_rate": 1.367590988343752e-05, "loss": 0.0813, "step": 22268 }, { "epoch": 2.640697260761295, "grad_norm": 0.5921617195058195, "learning_rate": 1.3673769944055331e-05, "loss": 0.0867, "step": 22269 }, { "epoch": 2.64081584252342, "grad_norm": 0.7783224661630448, "learning_rate": 1.367163010908708e-05, "loss": 0.1186, "step": 22270 }, { "epoch": 2.640934424285545, "grad_norm": 1.1540002146973427, "learning_rate": 1.3669490378552515e-05, "loss": 0.1079, "step": 22271 }, { "epoch": 2.64105300604767, "grad_norm": 0.5870514841936023, "learning_rate": 1.3667350752471336e-05, "loss": 0.0788, "step": 22272 }, { "epoch": 2.641171587809795, "grad_norm": 0.7290364567207867, "learning_rate": 1.3665211230863308e-05, "loss": 0.0919, "step": 22273 }, { "epoch": 2.6412901695719198, "grad_norm": 0.675075329340873, "learning_rate": 1.3663071813748124e-05, "loss": 0.1063, "step": 22274 }, { "epoch": 2.641408751334045, "grad_norm": 0.5998197254765072, "learning_rate": 1.3660932501145513e-05, "loss": 0.1003, "step": 22275 }, { "epoch": 2.6415273330961697, "grad_norm": 0.7342591438569843, "learning_rate": 1.3658793293075198e-05, "loss": 0.102, "step": 22276 }, { "epoch": 2.641645914858295, "grad_norm": 0.5144864754169727, "learning_rate": 1.3656654189556904e-05, "loss": 0.0685, "step": 22277 }, { "epoch": 2.6417644966204197, "grad_norm": 0.47388077657754435, "learning_rate": 1.3654515190610356e-05, "loss": 0.0658, "step": 22278 }, { "epoch": 2.641883078382545, "grad_norm": 0.7867178336679954, "learning_rate": 1.3652376296255248e-05, "loss": 0.1044, "step": 22279 }, { "epoch": 2.6420016601446696, "grad_norm": 1.1234680239382326, "learning_rate": 1.3650237506511331e-05, "loss": 0.1212, "step": 22280 }, { "epoch": 2.642120241906795, "grad_norm": 0.7253028319182525, "learning_rate": 1.36480988213983e-05, "loss": 0.0917, "step": 22281 }, { "epoch": 2.6422388236689196, "grad_norm": 0.47562949616306605, "learning_rate": 1.3645960240935888e-05, "loss": 0.0636, "step": 22282 }, { "epoch": 2.642357405431045, "grad_norm": 0.8924424319818337, "learning_rate": 1.3643821765143778e-05, "loss": 0.1356, "step": 22283 }, { "epoch": 2.6424759871931696, "grad_norm": 0.6699982850162018, "learning_rate": 1.3641683394041721e-05, "loss": 0.0817, "step": 22284 }, { "epoch": 2.6425945689552948, "grad_norm": 0.6539467015682556, "learning_rate": 1.3639545127649412e-05, "loss": 0.0931, "step": 22285 }, { "epoch": 2.6427131507174195, "grad_norm": 0.5854449441297692, "learning_rate": 1.363740696598656e-05, "loss": 0.0806, "step": 22286 }, { "epoch": 2.6428317324795447, "grad_norm": 0.6490480132122486, "learning_rate": 1.3635268909072885e-05, "loss": 0.0795, "step": 22287 }, { "epoch": 2.6429503142416695, "grad_norm": 0.5114373031357491, "learning_rate": 1.3633130956928095e-05, "loss": 0.0784, "step": 22288 }, { "epoch": 2.6430688960037947, "grad_norm": 0.4542647398573746, "learning_rate": 1.3630993109571904e-05, "loss": 0.0655, "step": 22289 }, { "epoch": 2.6431874777659194, "grad_norm": 0.8029890594360549, "learning_rate": 1.362885536702401e-05, "loss": 0.0958, "step": 22290 }, { "epoch": 2.6433060595280446, "grad_norm": 0.7794406605723889, "learning_rate": 1.3626717729304122e-05, "loss": 0.1112, "step": 22291 }, { "epoch": 2.6434246412901694, "grad_norm": 0.5792831289746059, "learning_rate": 1.3624580196431952e-05, "loss": 0.0885, "step": 22292 }, { "epoch": 2.6435432230522946, "grad_norm": 0.6434121572359125, "learning_rate": 1.3622442768427212e-05, "loss": 0.0957, "step": 22293 }, { "epoch": 2.6436618048144194, "grad_norm": 0.5996318668310618, "learning_rate": 1.3620305445309579e-05, "loss": 0.0911, "step": 22294 }, { "epoch": 2.6437803865765446, "grad_norm": 0.6436668690642515, "learning_rate": 1.3618168227098793e-05, "loss": 0.0971, "step": 22295 }, { "epoch": 2.6438989683386698, "grad_norm": 0.7780706115429668, "learning_rate": 1.3616031113814531e-05, "loss": 0.0882, "step": 22296 }, { "epoch": 2.6440175501007945, "grad_norm": 0.9640570783466451, "learning_rate": 1.3613894105476499e-05, "loss": 0.1247, "step": 22297 }, { "epoch": 2.6441361318629193, "grad_norm": 0.6478577770377933, "learning_rate": 1.3611757202104402e-05, "loss": 0.0822, "step": 22298 }, { "epoch": 2.6442547136250445, "grad_norm": 0.9016167297335723, "learning_rate": 1.3609620403717943e-05, "loss": 0.0997, "step": 22299 }, { "epoch": 2.6443732953871697, "grad_norm": 0.6116366166334369, "learning_rate": 1.3607483710336822e-05, "loss": 0.1077, "step": 22300 }, { "epoch": 2.6444918771492945, "grad_norm": 0.36713593671397315, "learning_rate": 1.3605347121980722e-05, "loss": 0.0521, "step": 22301 }, { "epoch": 2.644610458911419, "grad_norm": 0.5869737888249048, "learning_rate": 1.3603210638669345e-05, "loss": 0.0611, "step": 22302 }, { "epoch": 2.6447290406735444, "grad_norm": 0.63658818258785, "learning_rate": 1.3601074260422392e-05, "loss": 0.0906, "step": 22303 }, { "epoch": 2.6448476224356696, "grad_norm": 0.5740372226669672, "learning_rate": 1.359893798725957e-05, "loss": 0.089, "step": 22304 }, { "epoch": 2.6449662041977944, "grad_norm": 0.6041451585720002, "learning_rate": 1.3596801819200543e-05, "loss": 0.0865, "step": 22305 }, { "epoch": 2.645084785959919, "grad_norm": 0.5438856934024773, "learning_rate": 1.3594665756265023e-05, "loss": 0.0814, "step": 22306 }, { "epoch": 2.6452033677220443, "grad_norm": 0.7237948726754726, "learning_rate": 1.3592529798472698e-05, "loss": 0.1006, "step": 22307 }, { "epoch": 2.6453219494841695, "grad_norm": 0.674215258116859, "learning_rate": 1.3590393945843258e-05, "loss": 0.0857, "step": 22308 }, { "epoch": 2.6454405312462943, "grad_norm": 0.7095410638021892, "learning_rate": 1.3588258198396397e-05, "loss": 0.0928, "step": 22309 }, { "epoch": 2.645559113008419, "grad_norm": 0.8122148492561208, "learning_rate": 1.3586122556151798e-05, "loss": 0.1067, "step": 22310 }, { "epoch": 2.6456776947705443, "grad_norm": 0.7861276537211432, "learning_rate": 1.3583987019129165e-05, "loss": 0.1017, "step": 22311 }, { "epoch": 2.6457962765326695, "grad_norm": 0.7081552448995176, "learning_rate": 1.358185158734816e-05, "loss": 0.1161, "step": 22312 }, { "epoch": 2.6459148582947942, "grad_norm": 0.7094526681358254, "learning_rate": 1.3579716260828484e-05, "loss": 0.0907, "step": 22313 }, { "epoch": 2.646033440056919, "grad_norm": 0.7921679972750866, "learning_rate": 1.357758103958982e-05, "loss": 0.1051, "step": 22314 }, { "epoch": 2.646152021819044, "grad_norm": 0.7950575013185712, "learning_rate": 1.3575445923651862e-05, "loss": 0.0957, "step": 22315 }, { "epoch": 2.6462706035811694, "grad_norm": 0.6263597402939008, "learning_rate": 1.3573310913034271e-05, "loss": 0.0743, "step": 22316 }, { "epoch": 2.646389185343294, "grad_norm": 0.8282487508292655, "learning_rate": 1.3571176007756742e-05, "loss": 0.123, "step": 22317 }, { "epoch": 2.6465077671054194, "grad_norm": 0.6958813844603023, "learning_rate": 1.3569041207838957e-05, "loss": 0.0873, "step": 22318 }, { "epoch": 2.646626348867544, "grad_norm": 0.4343599679462023, "learning_rate": 1.3566906513300589e-05, "loss": 0.0597, "step": 22319 }, { "epoch": 2.6467449306296693, "grad_norm": 0.8063947657026559, "learning_rate": 1.3564771924161338e-05, "loss": 0.0878, "step": 22320 }, { "epoch": 2.646863512391794, "grad_norm": 0.6377965969254634, "learning_rate": 1.3562637440440845e-05, "loss": 0.0709, "step": 22321 }, { "epoch": 2.6469820941539193, "grad_norm": 0.3919075055630714, "learning_rate": 1.356050306215883e-05, "loss": 0.0588, "step": 22322 }, { "epoch": 2.647100675916044, "grad_norm": 1.0921658829163259, "learning_rate": 1.3558368789334938e-05, "loss": 0.1157, "step": 22323 }, { "epoch": 2.6472192576781692, "grad_norm": 0.4130211399963169, "learning_rate": 1.3556234621988855e-05, "loss": 0.0677, "step": 22324 }, { "epoch": 2.647337839440294, "grad_norm": 0.9645555641139941, "learning_rate": 1.3554100560140257e-05, "loss": 0.1261, "step": 22325 }, { "epoch": 2.647456421202419, "grad_norm": 0.6407070660300077, "learning_rate": 1.3551966603808814e-05, "loss": 0.0866, "step": 22326 }, { "epoch": 2.647575002964544, "grad_norm": 0.6467561613296895, "learning_rate": 1.3549832753014214e-05, "loss": 0.0836, "step": 22327 }, { "epoch": 2.647693584726669, "grad_norm": 0.7507721832121049, "learning_rate": 1.3547699007776087e-05, "loss": 0.0927, "step": 22328 }, { "epoch": 2.647812166488794, "grad_norm": 0.6981454797136427, "learning_rate": 1.354556536811416e-05, "loss": 0.0751, "step": 22329 }, { "epoch": 2.647930748250919, "grad_norm": 0.6396209055442964, "learning_rate": 1.3543431834048059e-05, "loss": 0.0787, "step": 22330 }, { "epoch": 2.648049330013044, "grad_norm": 0.6698337930888734, "learning_rate": 1.354129840559748e-05, "loss": 0.0755, "step": 22331 }, { "epoch": 2.648167911775169, "grad_norm": 0.4895550014089284, "learning_rate": 1.3539165082782056e-05, "loss": 0.0539, "step": 22332 }, { "epoch": 2.648286493537294, "grad_norm": 0.6551944507654328, "learning_rate": 1.3537031865621496e-05, "loss": 0.0767, "step": 22333 }, { "epoch": 2.648405075299419, "grad_norm": 1.0978412298902758, "learning_rate": 1.3534898754135438e-05, "loss": 0.1638, "step": 22334 }, { "epoch": 2.648523657061544, "grad_norm": 0.6332610492926669, "learning_rate": 1.353276574834355e-05, "loss": 0.0756, "step": 22335 }, { "epoch": 2.648642238823669, "grad_norm": 1.3321234019303214, "learning_rate": 1.3530632848265507e-05, "loss": 0.2136, "step": 22336 }, { "epoch": 2.6487608205857938, "grad_norm": 0.7376251067049799, "learning_rate": 1.3528500053920956e-05, "loss": 0.0993, "step": 22337 }, { "epoch": 2.648879402347919, "grad_norm": 0.7791418049799559, "learning_rate": 1.3526367365329584e-05, "loss": 0.0948, "step": 22338 }, { "epoch": 2.6489979841100437, "grad_norm": 0.9809228865410576, "learning_rate": 1.3524234782511012e-05, "loss": 0.1547, "step": 22339 }, { "epoch": 2.649116565872169, "grad_norm": 0.7692211597074591, "learning_rate": 1.352210230548494e-05, "loss": 0.1038, "step": 22340 }, { "epoch": 2.6492351476342937, "grad_norm": 0.9892910020846395, "learning_rate": 1.3519969934271005e-05, "loss": 0.1106, "step": 22341 }, { "epoch": 2.649353729396419, "grad_norm": 0.8182760507425587, "learning_rate": 1.3517837668888875e-05, "loss": 0.0824, "step": 22342 }, { "epoch": 2.6494723111585436, "grad_norm": 0.8209116020975719, "learning_rate": 1.3515705509358184e-05, "loss": 0.1034, "step": 22343 }, { "epoch": 2.649590892920669, "grad_norm": 0.6000567136805243, "learning_rate": 1.3513573455698625e-05, "loss": 0.0822, "step": 22344 }, { "epoch": 2.649709474682794, "grad_norm": 0.50546255287739, "learning_rate": 1.351144150792982e-05, "loss": 0.0899, "step": 22345 }, { "epoch": 2.649828056444919, "grad_norm": 0.8474080599220745, "learning_rate": 1.350930966607144e-05, "loss": 0.1483, "step": 22346 }, { "epoch": 2.6499466382070436, "grad_norm": 0.8581963893717277, "learning_rate": 1.3507177930143133e-05, "loss": 0.0991, "step": 22347 }, { "epoch": 2.6500652199691688, "grad_norm": 0.41770074459340734, "learning_rate": 1.350504630016455e-05, "loss": 0.0636, "step": 22348 }, { "epoch": 2.650183801731294, "grad_norm": 0.7560780022103756, "learning_rate": 1.3502914776155357e-05, "loss": 0.0898, "step": 22349 }, { "epoch": 2.6503023834934187, "grad_norm": 0.4551065211893728, "learning_rate": 1.3500783358135169e-05, "loss": 0.0582, "step": 22350 }, { "epoch": 2.6504209652555435, "grad_norm": 0.33019807383573796, "learning_rate": 1.3498652046123677e-05, "loss": 0.0462, "step": 22351 }, { "epoch": 2.6505395470176687, "grad_norm": 0.7470003301305436, "learning_rate": 1.3496520840140503e-05, "loss": 0.0878, "step": 22352 }, { "epoch": 2.650658128779794, "grad_norm": 0.5667743981405591, "learning_rate": 1.3494389740205305e-05, "loss": 0.0816, "step": 22353 }, { "epoch": 2.6507767105419187, "grad_norm": 0.6766170544527722, "learning_rate": 1.3492258746337707e-05, "loss": 0.0851, "step": 22354 }, { "epoch": 2.6508952923040434, "grad_norm": 0.7017844760563791, "learning_rate": 1.3490127858557395e-05, "loss": 0.0916, "step": 22355 }, { "epoch": 2.6510138740661686, "grad_norm": 0.5333119200856681, "learning_rate": 1.3487997076883973e-05, "loss": 0.0692, "step": 22356 }, { "epoch": 2.651132455828294, "grad_norm": 0.7051426181411415, "learning_rate": 1.3485866401337105e-05, "loss": 0.0736, "step": 22357 }, { "epoch": 2.6512510375904186, "grad_norm": 0.7627332659122888, "learning_rate": 1.3483735831936428e-05, "loss": 0.1035, "step": 22358 }, { "epoch": 2.6513696193525433, "grad_norm": 0.6363993954861621, "learning_rate": 1.3481605368701589e-05, "loss": 0.0841, "step": 22359 }, { "epoch": 2.6514882011146685, "grad_norm": 0.7875903348717576, "learning_rate": 1.3479475011652226e-05, "loss": 0.0977, "step": 22360 }, { "epoch": 2.6516067828767937, "grad_norm": 0.52000868083316, "learning_rate": 1.3477344760807969e-05, "loss": 0.0677, "step": 22361 }, { "epoch": 2.6517253646389185, "grad_norm": 1.0859185698372573, "learning_rate": 1.3475214616188467e-05, "loss": 0.1401, "step": 22362 }, { "epoch": 2.6518439464010433, "grad_norm": 0.6384404439853918, "learning_rate": 1.347308457781335e-05, "loss": 0.089, "step": 22363 }, { "epoch": 2.6519625281631685, "grad_norm": 0.7256074690987778, "learning_rate": 1.3470954645702268e-05, "loss": 0.1007, "step": 22364 }, { "epoch": 2.6520811099252937, "grad_norm": 0.5080146590907124, "learning_rate": 1.346882481987483e-05, "loss": 0.0694, "step": 22365 }, { "epoch": 2.6521996916874184, "grad_norm": 0.6011597563145563, "learning_rate": 1.3466695100350701e-05, "loss": 0.0861, "step": 22366 }, { "epoch": 2.652318273449543, "grad_norm": 0.6221114842936272, "learning_rate": 1.3464565487149494e-05, "loss": 0.0867, "step": 22367 }, { "epoch": 2.6524368552116684, "grad_norm": 0.6179259806795024, "learning_rate": 1.3462435980290846e-05, "loss": 0.0838, "step": 22368 }, { "epoch": 2.6525554369737936, "grad_norm": 0.5770580095983447, "learning_rate": 1.346030657979439e-05, "loss": 0.0653, "step": 22369 }, { "epoch": 2.6526740187359183, "grad_norm": 0.8552710866281239, "learning_rate": 1.3458177285679757e-05, "loss": 0.1268, "step": 22370 }, { "epoch": 2.6527926004980436, "grad_norm": 0.574505951890669, "learning_rate": 1.3456048097966586e-05, "loss": 0.0696, "step": 22371 }, { "epoch": 2.6529111822601683, "grad_norm": 1.06526329208451, "learning_rate": 1.3453919016674484e-05, "loss": 0.1104, "step": 22372 }, { "epoch": 2.6530297640222935, "grad_norm": 0.5721819413653225, "learning_rate": 1.3451790041823092e-05, "loss": 0.0732, "step": 22373 }, { "epoch": 2.6531483457844183, "grad_norm": 0.5500039485919671, "learning_rate": 1.3449661173432032e-05, "loss": 0.08, "step": 22374 }, { "epoch": 2.6532669275465435, "grad_norm": 0.6181097080996742, "learning_rate": 1.3447532411520936e-05, "loss": 0.0863, "step": 22375 }, { "epoch": 2.6533855093086682, "grad_norm": 0.6465968489713814, "learning_rate": 1.3445403756109432e-05, "loss": 0.1112, "step": 22376 }, { "epoch": 2.6535040910707934, "grad_norm": 0.49092561788914174, "learning_rate": 1.3443275207217116e-05, "loss": 0.0807, "step": 22377 }, { "epoch": 2.653622672832918, "grad_norm": 0.6354866601071564, "learning_rate": 1.344114676486365e-05, "loss": 0.0943, "step": 22378 }, { "epoch": 2.6537412545950434, "grad_norm": 0.6157762999720815, "learning_rate": 1.3439018429068628e-05, "loss": 0.069, "step": 22379 }, { "epoch": 2.653859836357168, "grad_norm": 0.8414706823319743, "learning_rate": 1.3436890199851679e-05, "loss": 0.1085, "step": 22380 }, { "epoch": 2.6539784181192934, "grad_norm": 0.5408184497298884, "learning_rate": 1.3434762077232422e-05, "loss": 0.0803, "step": 22381 }, { "epoch": 2.654096999881418, "grad_norm": 1.1708259067745304, "learning_rate": 1.3432634061230488e-05, "loss": 0.1183, "step": 22382 }, { "epoch": 2.6542155816435433, "grad_norm": 0.685648906271257, "learning_rate": 1.3430506151865468e-05, "loss": 0.1043, "step": 22383 }, { "epoch": 2.654334163405668, "grad_norm": 0.675117747536794, "learning_rate": 1.3428378349157001e-05, "loss": 0.0787, "step": 22384 }, { "epoch": 2.6544527451677933, "grad_norm": 0.6994008783631318, "learning_rate": 1.3426250653124692e-05, "loss": 0.0908, "step": 22385 }, { "epoch": 2.654571326929918, "grad_norm": 0.744327650009034, "learning_rate": 1.3424123063788158e-05, "loss": 0.1125, "step": 22386 }, { "epoch": 2.6546899086920432, "grad_norm": 0.5687776890252535, "learning_rate": 1.3421995581167026e-05, "loss": 0.0716, "step": 22387 }, { "epoch": 2.654808490454168, "grad_norm": 0.5040096790515542, "learning_rate": 1.3419868205280878e-05, "loss": 0.0642, "step": 22388 }, { "epoch": 2.654927072216293, "grad_norm": 1.031997632640187, "learning_rate": 1.3417740936149361e-05, "loss": 0.1266, "step": 22389 }, { "epoch": 2.655045653978418, "grad_norm": 1.1194900511058625, "learning_rate": 1.3415613773792063e-05, "loss": 0.1299, "step": 22390 }, { "epoch": 2.655164235740543, "grad_norm": 0.7851439051294935, "learning_rate": 1.3413486718228607e-05, "loss": 0.1197, "step": 22391 }, { "epoch": 2.655282817502668, "grad_norm": 0.5419916566019246, "learning_rate": 1.341135976947858e-05, "loss": 0.0692, "step": 22392 }, { "epoch": 2.655401399264793, "grad_norm": 0.7931895039277034, "learning_rate": 1.3409232927561621e-05, "loss": 0.0963, "step": 22393 }, { "epoch": 2.6555199810269183, "grad_norm": 0.6451054345977111, "learning_rate": 1.340710619249731e-05, "loss": 0.0939, "step": 22394 }, { "epoch": 2.655638562789043, "grad_norm": 0.5193334533098175, "learning_rate": 1.3404979564305268e-05, "loss": 0.0636, "step": 22395 }, { "epoch": 2.655757144551168, "grad_norm": 0.7961317690483893, "learning_rate": 1.3402853043005092e-05, "loss": 0.0981, "step": 22396 }, { "epoch": 2.655875726313293, "grad_norm": 0.7414180385071996, "learning_rate": 1.3400726628616393e-05, "loss": 0.0975, "step": 22397 }, { "epoch": 2.6559943080754183, "grad_norm": 0.6277473095766107, "learning_rate": 1.3398600321158777e-05, "loss": 0.0839, "step": 22398 }, { "epoch": 2.656112889837543, "grad_norm": 0.5362848009075686, "learning_rate": 1.339647412065182e-05, "loss": 0.0719, "step": 22399 }, { "epoch": 2.6562314715996678, "grad_norm": 0.9166106352708544, "learning_rate": 1.3394348027115164e-05, "loss": 0.1541, "step": 22400 }, { "epoch": 2.656350053361793, "grad_norm": 0.6350178024530285, "learning_rate": 1.3392222040568376e-05, "loss": 0.0811, "step": 22401 }, { "epoch": 2.656468635123918, "grad_norm": 0.4913492914267452, "learning_rate": 1.339009616103108e-05, "loss": 0.0824, "step": 22402 }, { "epoch": 2.656587216886043, "grad_norm": 0.866264080041984, "learning_rate": 1.3387970388522836e-05, "loss": 0.1221, "step": 22403 }, { "epoch": 2.6567057986481677, "grad_norm": 0.8272121721870792, "learning_rate": 1.3385844723063284e-05, "loss": 0.1068, "step": 22404 }, { "epoch": 2.656824380410293, "grad_norm": 0.49376756988135784, "learning_rate": 1.3383719164671998e-05, "loss": 0.0641, "step": 22405 }, { "epoch": 2.656942962172418, "grad_norm": 0.9391437955072227, "learning_rate": 1.3381593713368573e-05, "loss": 0.1691, "step": 22406 }, { "epoch": 2.657061543934543, "grad_norm": 0.6842462248478755, "learning_rate": 1.3379468369172604e-05, "loss": 0.1062, "step": 22407 }, { "epoch": 2.6571801256966676, "grad_norm": 0.8790591306427025, "learning_rate": 1.337734313210369e-05, "loss": 0.1168, "step": 22408 }, { "epoch": 2.657298707458793, "grad_norm": 0.8816985108213671, "learning_rate": 1.3375218002181428e-05, "loss": 0.1451, "step": 22409 }, { "epoch": 2.657417289220918, "grad_norm": 0.8291035499835709, "learning_rate": 1.3373092979425383e-05, "loss": 0.1266, "step": 22410 }, { "epoch": 2.657535870983043, "grad_norm": 0.5785898123081427, "learning_rate": 1.337096806385518e-05, "loss": 0.0792, "step": 22411 }, { "epoch": 2.6576544527451675, "grad_norm": 0.7566743200168137, "learning_rate": 1.3368843255490383e-05, "loss": 0.0975, "step": 22412 }, { "epoch": 2.6577730345072927, "grad_norm": 0.7058434871760788, "learning_rate": 1.3366718554350597e-05, "loss": 0.1029, "step": 22413 }, { "epoch": 2.657891616269418, "grad_norm": 0.7050785335660046, "learning_rate": 1.336459396045538e-05, "loss": 0.1063, "step": 22414 }, { "epoch": 2.6580101980315427, "grad_norm": 0.8487139551582903, "learning_rate": 1.3362469473824357e-05, "loss": 0.0783, "step": 22415 }, { "epoch": 2.6581287797936675, "grad_norm": 0.7371115687277869, "learning_rate": 1.3360345094477089e-05, "loss": 0.0953, "step": 22416 }, { "epoch": 2.6582473615557927, "grad_norm": 0.4243011362685989, "learning_rate": 1.3358220822433162e-05, "loss": 0.0612, "step": 22417 }, { "epoch": 2.658365943317918, "grad_norm": 0.6226591017653567, "learning_rate": 1.3356096657712164e-05, "loss": 0.0874, "step": 22418 }, { "epoch": 2.6584845250800426, "grad_norm": 0.5508245675434108, "learning_rate": 1.3353972600333678e-05, "loss": 0.0807, "step": 22419 }, { "epoch": 2.658603106842168, "grad_norm": 0.6145860560080613, "learning_rate": 1.3351848650317288e-05, "loss": 0.0762, "step": 22420 }, { "epoch": 2.6587216886042926, "grad_norm": 0.6701534941552142, "learning_rate": 1.3349724807682557e-05, "loss": 0.0778, "step": 22421 }, { "epoch": 2.658840270366418, "grad_norm": 0.5298993742711228, "learning_rate": 1.3347601072449093e-05, "loss": 0.08, "step": 22422 }, { "epoch": 2.6589588521285425, "grad_norm": 0.5049555068860782, "learning_rate": 1.3345477444636447e-05, "loss": 0.0586, "step": 22423 }, { "epoch": 2.6590774338906678, "grad_norm": 0.5849987727013093, "learning_rate": 1.3343353924264207e-05, "loss": 0.0848, "step": 22424 }, { "epoch": 2.6591960156527925, "grad_norm": 0.767606881256208, "learning_rate": 1.3341230511351955e-05, "loss": 0.1221, "step": 22425 }, { "epoch": 2.6593145974149177, "grad_norm": 0.7089199647771987, "learning_rate": 1.3339107205919254e-05, "loss": 0.098, "step": 22426 }, { "epoch": 2.6594331791770425, "grad_norm": 0.639793393905938, "learning_rate": 1.3336984007985697e-05, "loss": 0.0992, "step": 22427 }, { "epoch": 2.6595517609391677, "grad_norm": 0.820550869140673, "learning_rate": 1.3334860917570835e-05, "loss": 0.1093, "step": 22428 }, { "epoch": 2.6596703427012924, "grad_norm": 0.6118879663495935, "learning_rate": 1.333273793469425e-05, "loss": 0.0869, "step": 22429 }, { "epoch": 2.6597889244634176, "grad_norm": 0.7445666144948621, "learning_rate": 1.3330615059375517e-05, "loss": 0.0984, "step": 22430 }, { "epoch": 2.6599075062255424, "grad_norm": 0.5791254705942529, "learning_rate": 1.3328492291634212e-05, "loss": 0.0881, "step": 22431 }, { "epoch": 2.6600260879876676, "grad_norm": 0.77210292159417, "learning_rate": 1.3326369631489885e-05, "loss": 0.1236, "step": 22432 }, { "epoch": 2.6601446697497924, "grad_norm": 0.7583768914574568, "learning_rate": 1.3324247078962116e-05, "loss": 0.0979, "step": 22433 }, { "epoch": 2.6602632515119176, "grad_norm": 0.5189143690408851, "learning_rate": 1.3322124634070472e-05, "loss": 0.0718, "step": 22434 }, { "epoch": 2.6603818332740423, "grad_norm": 0.6575158284779892, "learning_rate": 1.3320002296834514e-05, "loss": 0.0806, "step": 22435 }, { "epoch": 2.6605004150361675, "grad_norm": 0.6763895436385835, "learning_rate": 1.3317880067273813e-05, "loss": 0.0823, "step": 22436 }, { "epoch": 2.6606189967982923, "grad_norm": 0.5254427939897763, "learning_rate": 1.3315757945407936e-05, "loss": 0.0823, "step": 22437 }, { "epoch": 2.6607375785604175, "grad_norm": 0.6345636980210823, "learning_rate": 1.3313635931256448e-05, "loss": 0.0896, "step": 22438 }, { "epoch": 2.6608561603225422, "grad_norm": 0.8052778163050947, "learning_rate": 1.33115140248389e-05, "loss": 0.1045, "step": 22439 }, { "epoch": 2.6609747420846674, "grad_norm": 0.6046268823599645, "learning_rate": 1.3309392226174858e-05, "loss": 0.0895, "step": 22440 }, { "epoch": 2.661093323846792, "grad_norm": 0.5507189618283719, "learning_rate": 1.3307270535283884e-05, "loss": 0.0771, "step": 22441 }, { "epoch": 2.6612119056089174, "grad_norm": 0.7285015518336657, "learning_rate": 1.3305148952185546e-05, "loss": 0.1041, "step": 22442 }, { "epoch": 2.6613304873710426, "grad_norm": 0.4472958029172166, "learning_rate": 1.3303027476899382e-05, "loss": 0.0605, "step": 22443 }, { "epoch": 2.6614490691331674, "grad_norm": 0.8214893793757132, "learning_rate": 1.3300906109444963e-05, "loss": 0.1338, "step": 22444 }, { "epoch": 2.661567650895292, "grad_norm": 0.6330771129637435, "learning_rate": 1.3298784849841844e-05, "loss": 0.077, "step": 22445 }, { "epoch": 2.6616862326574173, "grad_norm": 0.7155178879551087, "learning_rate": 1.329666369810958e-05, "loss": 0.0874, "step": 22446 }, { "epoch": 2.6618048144195425, "grad_norm": 0.6127504524333259, "learning_rate": 1.3294542654267733e-05, "loss": 0.0857, "step": 22447 }, { "epoch": 2.6619233961816673, "grad_norm": 0.6303653149845752, "learning_rate": 1.329242171833583e-05, "loss": 0.0979, "step": 22448 }, { "epoch": 2.662041977943792, "grad_norm": 0.4694227884877666, "learning_rate": 1.329030089033346e-05, "loss": 0.0523, "step": 22449 }, { "epoch": 2.6621605597059173, "grad_norm": 0.6562839955459945, "learning_rate": 1.3288180170280146e-05, "loss": 0.0846, "step": 22450 }, { "epoch": 2.6622791414680425, "grad_norm": 0.5217468090903937, "learning_rate": 1.3286059558195452e-05, "loss": 0.0725, "step": 22451 }, { "epoch": 2.662397723230167, "grad_norm": 0.47437416605155425, "learning_rate": 1.328393905409892e-05, "loss": 0.0624, "step": 22452 }, { "epoch": 2.662516304992292, "grad_norm": 0.5591201995061557, "learning_rate": 1.328181865801012e-05, "loss": 0.0705, "step": 22453 }, { "epoch": 2.662634886754417, "grad_norm": 0.7126168649876842, "learning_rate": 1.3279698369948567e-05, "loss": 0.0973, "step": 22454 }, { "epoch": 2.6627534685165424, "grad_norm": 0.4576086896012243, "learning_rate": 1.3277578189933823e-05, "loss": 0.0537, "step": 22455 }, { "epoch": 2.662872050278667, "grad_norm": 0.6372267520825832, "learning_rate": 1.3275458117985434e-05, "loss": 0.1053, "step": 22456 }, { "epoch": 2.662990632040792, "grad_norm": 0.6351668193232108, "learning_rate": 1.3273338154122943e-05, "loss": 0.0917, "step": 22457 }, { "epoch": 2.663109213802917, "grad_norm": 0.7353219157655141, "learning_rate": 1.3271218298365904e-05, "loss": 0.0716, "step": 22458 }, { "epoch": 2.6632277955650423, "grad_norm": 0.7327992426092528, "learning_rate": 1.3269098550733827e-05, "loss": 0.1142, "step": 22459 }, { "epoch": 2.663346377327167, "grad_norm": 0.6932339488996301, "learning_rate": 1.32669789112463e-05, "loss": 0.092, "step": 22460 }, { "epoch": 2.663464959089292, "grad_norm": 0.796770140145577, "learning_rate": 1.3264859379922829e-05, "loss": 0.0857, "step": 22461 }, { "epoch": 2.663583540851417, "grad_norm": 0.7647214537783893, "learning_rate": 1.3262739956782972e-05, "loss": 0.0922, "step": 22462 }, { "epoch": 2.6637021226135422, "grad_norm": 0.6486676284477038, "learning_rate": 1.3260620641846242e-05, "loss": 0.0907, "step": 22463 }, { "epoch": 2.663820704375667, "grad_norm": 0.7440429510528012, "learning_rate": 1.3258501435132212e-05, "loss": 0.1033, "step": 22464 }, { "epoch": 2.6639392861377917, "grad_norm": 0.5578018343913153, "learning_rate": 1.3256382336660389e-05, "loss": 0.0822, "step": 22465 }, { "epoch": 2.664057867899917, "grad_norm": 0.6530233572361547, "learning_rate": 1.3254263346450324e-05, "loss": 0.0996, "step": 22466 }, { "epoch": 2.664176449662042, "grad_norm": 0.7232963825095917, "learning_rate": 1.3252144464521544e-05, "loss": 0.1004, "step": 22467 }, { "epoch": 2.664295031424167, "grad_norm": 0.9456924179014553, "learning_rate": 1.3250025690893586e-05, "loss": 0.0963, "step": 22468 }, { "epoch": 2.664413613186292, "grad_norm": 0.7340584602581961, "learning_rate": 1.3247907025585992e-05, "loss": 0.1315, "step": 22469 }, { "epoch": 2.664532194948417, "grad_norm": 0.6321511422862255, "learning_rate": 1.3245788468618268e-05, "loss": 0.0929, "step": 22470 }, { "epoch": 2.664650776710542, "grad_norm": 0.8406107973248735, "learning_rate": 1.3243670020009977e-05, "loss": 0.1105, "step": 22471 }, { "epoch": 2.664769358472667, "grad_norm": 0.5081360674079848, "learning_rate": 1.3241551679780623e-05, "loss": 0.0674, "step": 22472 }, { "epoch": 2.664887940234792, "grad_norm": 0.6016269363229536, "learning_rate": 1.3239433447949743e-05, "loss": 0.089, "step": 22473 }, { "epoch": 2.665006521996917, "grad_norm": 0.6594904072670792, "learning_rate": 1.3237315324536864e-05, "loss": 0.0874, "step": 22474 }, { "epoch": 2.665125103759042, "grad_norm": 0.6949524360961182, "learning_rate": 1.3235197309561514e-05, "loss": 0.0885, "step": 22475 }, { "epoch": 2.6652436855211667, "grad_norm": 0.5588776909669847, "learning_rate": 1.3233079403043224e-05, "loss": 0.0708, "step": 22476 }, { "epoch": 2.665362267283292, "grad_norm": 0.6816993975273699, "learning_rate": 1.3230961605001506e-05, "loss": 0.0696, "step": 22477 }, { "epoch": 2.6654808490454167, "grad_norm": 0.4278146003401951, "learning_rate": 1.3228843915455893e-05, "loss": 0.0631, "step": 22478 }, { "epoch": 2.665599430807542, "grad_norm": 0.9180550725982773, "learning_rate": 1.3226726334425902e-05, "loss": 0.1285, "step": 22479 }, { "epoch": 2.6657180125696667, "grad_norm": 0.46025989845458337, "learning_rate": 1.3224608861931065e-05, "loss": 0.066, "step": 22480 }, { "epoch": 2.665836594331792, "grad_norm": 0.535353733242164, "learning_rate": 1.3222491497990874e-05, "loss": 0.0679, "step": 22481 }, { "epoch": 2.6659551760939166, "grad_norm": 0.6458543993607475, "learning_rate": 1.3220374242624892e-05, "loss": 0.0982, "step": 22482 }, { "epoch": 2.666073757856042, "grad_norm": 0.7549070358604119, "learning_rate": 1.3218257095852604e-05, "loss": 0.0952, "step": 22483 }, { "epoch": 2.6661923396181666, "grad_norm": 0.5995009616947008, "learning_rate": 1.3216140057693535e-05, "loss": 0.0797, "step": 22484 }, { "epoch": 2.666310921380292, "grad_norm": 0.5749846485756692, "learning_rate": 1.3214023128167209e-05, "loss": 0.0855, "step": 22485 }, { "epoch": 2.6664295031424166, "grad_norm": 0.8126639111252257, "learning_rate": 1.321190630729313e-05, "loss": 0.123, "step": 22486 }, { "epoch": 2.6665480849045418, "grad_norm": 0.8080673185783322, "learning_rate": 1.3209789595090832e-05, "loss": 0.0917, "step": 22487 }, { "epoch": 2.6666666666666665, "grad_norm": 0.5728512842470257, "learning_rate": 1.3207672991579808e-05, "loss": 0.0749, "step": 22488 }, { "epoch": 2.6667852484287917, "grad_norm": 0.7411836756777824, "learning_rate": 1.3205556496779578e-05, "loss": 0.1361, "step": 22489 }, { "epoch": 2.6669038301909165, "grad_norm": 0.6652720118189394, "learning_rate": 1.3203440110709653e-05, "loss": 0.0876, "step": 22490 }, { "epoch": 2.6670224119530417, "grad_norm": 0.748383246525158, "learning_rate": 1.3201323833389557e-05, "loss": 0.1025, "step": 22491 }, { "epoch": 2.6671409937151664, "grad_norm": 0.5960270656536343, "learning_rate": 1.3199207664838762e-05, "loss": 0.0745, "step": 22492 }, { "epoch": 2.6672595754772916, "grad_norm": 0.5908379279142165, "learning_rate": 1.3197091605076822e-05, "loss": 0.074, "step": 22493 }, { "epoch": 2.6673781572394164, "grad_norm": 0.5869325862795102, "learning_rate": 1.3194975654123214e-05, "loss": 0.0549, "step": 22494 }, { "epoch": 2.6674967390015416, "grad_norm": 0.707571119830174, "learning_rate": 1.3192859811997457e-05, "loss": 0.0734, "step": 22495 }, { "epoch": 2.667615320763667, "grad_norm": 0.6723050965212071, "learning_rate": 1.3190744078719052e-05, "loss": 0.065, "step": 22496 }, { "epoch": 2.6677339025257916, "grad_norm": 0.5054257466112098, "learning_rate": 1.3188628454307506e-05, "loss": 0.0682, "step": 22497 }, { "epoch": 2.6678524842879163, "grad_norm": 0.838571202296284, "learning_rate": 1.318651293878233e-05, "loss": 0.112, "step": 22498 }, { "epoch": 2.6679710660500415, "grad_norm": 0.604181781400712, "learning_rate": 1.3184397532163007e-05, "loss": 0.109, "step": 22499 }, { "epoch": 2.6680896478121667, "grad_norm": 0.6084951192188945, "learning_rate": 1.3182282234469052e-05, "loss": 0.0708, "step": 22500 }, { "epoch": 2.6682082295742915, "grad_norm": 0.7899293029752712, "learning_rate": 1.318016704571996e-05, "loss": 0.116, "step": 22501 }, { "epoch": 2.6683268113364162, "grad_norm": 0.6259195271962176, "learning_rate": 1.3178051965935246e-05, "loss": 0.073, "step": 22502 }, { "epoch": 2.6684453930985415, "grad_norm": 0.7578006500054932, "learning_rate": 1.3175936995134385e-05, "loss": 0.1026, "step": 22503 }, { "epoch": 2.6685639748606667, "grad_norm": 0.4403115222826238, "learning_rate": 1.3173822133336888e-05, "loss": 0.0578, "step": 22504 }, { "epoch": 2.6686825566227914, "grad_norm": 0.7322169197610137, "learning_rate": 1.3171707380562246e-05, "loss": 0.1094, "step": 22505 }, { "epoch": 2.668801138384916, "grad_norm": 1.0375836559357705, "learning_rate": 1.3169592736829956e-05, "loss": 0.129, "step": 22506 }, { "epoch": 2.6689197201470414, "grad_norm": 0.5876456592696075, "learning_rate": 1.3167478202159516e-05, "loss": 0.0791, "step": 22507 }, { "epoch": 2.6690383019091666, "grad_norm": 0.7263845168104083, "learning_rate": 1.3165363776570416e-05, "loss": 0.0998, "step": 22508 }, { "epoch": 2.6691568836712913, "grad_norm": 0.6081879447095457, "learning_rate": 1.3163249460082161e-05, "loss": 0.0893, "step": 22509 }, { "epoch": 2.669275465433416, "grad_norm": 0.47166798478382943, "learning_rate": 1.3161135252714219e-05, "loss": 0.0637, "step": 22510 }, { "epoch": 2.6693940471955413, "grad_norm": 0.7297959040572417, "learning_rate": 1.315902115448609e-05, "loss": 0.0889, "step": 22511 }, { "epoch": 2.6695126289576665, "grad_norm": 0.8003942491403716, "learning_rate": 1.3156907165417271e-05, "loss": 0.1198, "step": 22512 }, { "epoch": 2.6696312107197913, "grad_norm": 0.6343017785350753, "learning_rate": 1.315479328552725e-05, "loss": 0.0879, "step": 22513 }, { "epoch": 2.669749792481916, "grad_norm": 0.6811776572516606, "learning_rate": 1.31526795148355e-05, "loss": 0.074, "step": 22514 }, { "epoch": 2.669868374244041, "grad_norm": 0.6716160865651479, "learning_rate": 1.3150565853361518e-05, "loss": 0.0964, "step": 22515 }, { "epoch": 2.6699869560061664, "grad_norm": 0.6078943109542597, "learning_rate": 1.3148452301124788e-05, "loss": 0.0827, "step": 22516 }, { "epoch": 2.670105537768291, "grad_norm": 0.9435191383194063, "learning_rate": 1.3146338858144797e-05, "loss": 0.1322, "step": 22517 }, { "epoch": 2.6702241195304164, "grad_norm": 0.5962735599829234, "learning_rate": 1.3144225524441034e-05, "loss": 0.0998, "step": 22518 }, { "epoch": 2.670342701292541, "grad_norm": 0.4798409382001057, "learning_rate": 1.314211230003295e-05, "loss": 0.0796, "step": 22519 }, { "epoch": 2.6704612830546663, "grad_norm": 0.6233764832367209, "learning_rate": 1.3139999184940071e-05, "loss": 0.1041, "step": 22520 }, { "epoch": 2.670579864816791, "grad_norm": 0.8137069297344217, "learning_rate": 1.313788617918185e-05, "loss": 0.0958, "step": 22521 }, { "epoch": 2.6706984465789163, "grad_norm": 0.5213185373021973, "learning_rate": 1.3135773282777768e-05, "loss": 0.076, "step": 22522 }, { "epoch": 2.670817028341041, "grad_norm": 0.5148815844000691, "learning_rate": 1.3133660495747308e-05, "loss": 0.0644, "step": 22523 }, { "epoch": 2.6709356101031663, "grad_norm": 0.6690004749598457, "learning_rate": 1.3131547818109947e-05, "loss": 0.1112, "step": 22524 }, { "epoch": 2.671054191865291, "grad_norm": 0.695653300800375, "learning_rate": 1.3129435249885173e-05, "loss": 0.0808, "step": 22525 }, { "epoch": 2.6711727736274162, "grad_norm": 0.6767937819232538, "learning_rate": 1.3127322791092427e-05, "loss": 0.0655, "step": 22526 }, { "epoch": 2.671291355389541, "grad_norm": 0.4652638734154, "learning_rate": 1.3125210441751223e-05, "loss": 0.0639, "step": 22527 }, { "epoch": 2.671409937151666, "grad_norm": 0.8142550874721474, "learning_rate": 1.3123098201881012e-05, "loss": 0.0946, "step": 22528 }, { "epoch": 2.671528518913791, "grad_norm": 0.9159056501010985, "learning_rate": 1.3120986071501279e-05, "loss": 0.1287, "step": 22529 }, { "epoch": 2.671647100675916, "grad_norm": 0.9383881151837395, "learning_rate": 1.3118874050631469e-05, "loss": 0.1001, "step": 22530 }, { "epoch": 2.671765682438041, "grad_norm": 0.6829298237272435, "learning_rate": 1.3116762139291089e-05, "loss": 0.0762, "step": 22531 }, { "epoch": 2.671884264200166, "grad_norm": 0.885322432647032, "learning_rate": 1.3114650337499578e-05, "loss": 0.1118, "step": 22532 }, { "epoch": 2.672002845962291, "grad_norm": 0.63959464851416, "learning_rate": 1.311253864527642e-05, "loss": 0.0858, "step": 22533 }, { "epoch": 2.672121427724416, "grad_norm": 0.7319527723998631, "learning_rate": 1.3110427062641078e-05, "loss": 0.1007, "step": 22534 }, { "epoch": 2.672240009486541, "grad_norm": 0.553495636622783, "learning_rate": 1.3108315589613018e-05, "loss": 0.0861, "step": 22535 }, { "epoch": 2.672358591248666, "grad_norm": 0.8097738547380178, "learning_rate": 1.3106204226211713e-05, "loss": 0.1055, "step": 22536 }, { "epoch": 2.672477173010791, "grad_norm": 0.7025386572783162, "learning_rate": 1.3104092972456602e-05, "loss": 0.0853, "step": 22537 }, { "epoch": 2.672595754772916, "grad_norm": 0.8928333583174839, "learning_rate": 1.3101981828367186e-05, "loss": 0.1138, "step": 22538 }, { "epoch": 2.6727143365350408, "grad_norm": 0.5722382818014681, "learning_rate": 1.3099870793962899e-05, "loss": 0.0701, "step": 22539 }, { "epoch": 2.672832918297166, "grad_norm": 0.6801978174545812, "learning_rate": 1.3097759869263215e-05, "loss": 0.0868, "step": 22540 }, { "epoch": 2.6729515000592907, "grad_norm": 1.1737511240022422, "learning_rate": 1.3095649054287573e-05, "loss": 0.1465, "step": 22541 }, { "epoch": 2.673070081821416, "grad_norm": 0.7844933381075219, "learning_rate": 1.309353834905547e-05, "loss": 0.0825, "step": 22542 }, { "epoch": 2.6731886635835407, "grad_norm": 0.7559803969098862, "learning_rate": 1.3091427753586333e-05, "loss": 0.0845, "step": 22543 }, { "epoch": 2.673307245345666, "grad_norm": 0.5093099934713377, "learning_rate": 1.3089317267899626e-05, "loss": 0.0786, "step": 22544 }, { "epoch": 2.673425827107791, "grad_norm": 0.5257702430103471, "learning_rate": 1.3087206892014809e-05, "loss": 0.0612, "step": 22545 }, { "epoch": 2.673544408869916, "grad_norm": 0.9904086250391138, "learning_rate": 1.3085096625951338e-05, "loss": 0.1271, "step": 22546 }, { "epoch": 2.6736629906320406, "grad_norm": 0.3660725551266328, "learning_rate": 1.308298646972867e-05, "loss": 0.0469, "step": 22547 }, { "epoch": 2.673781572394166, "grad_norm": 0.6686395017488083, "learning_rate": 1.3080876423366249e-05, "loss": 0.1062, "step": 22548 }, { "epoch": 2.673900154156291, "grad_norm": 0.6969277428866165, "learning_rate": 1.3078766486883526e-05, "loss": 0.1009, "step": 22549 }, { "epoch": 2.6740187359184158, "grad_norm": 0.5945445320917959, "learning_rate": 1.3076656660299963e-05, "loss": 0.0715, "step": 22550 }, { "epoch": 2.6741373176805405, "grad_norm": 0.6617036234482043, "learning_rate": 1.3074546943635008e-05, "loss": 0.0975, "step": 22551 }, { "epoch": 2.6742558994426657, "grad_norm": 0.8432212403150774, "learning_rate": 1.3072437336908092e-05, "loss": 0.1337, "step": 22552 }, { "epoch": 2.674374481204791, "grad_norm": 0.5998808945628443, "learning_rate": 1.3070327840138693e-05, "loss": 0.0776, "step": 22553 }, { "epoch": 2.6744930629669157, "grad_norm": 1.3045267082035905, "learning_rate": 1.3068218453346234e-05, "loss": 0.1809, "step": 22554 }, { "epoch": 2.6746116447290404, "grad_norm": 0.5472729700189259, "learning_rate": 1.3066109176550167e-05, "loss": 0.0749, "step": 22555 }, { "epoch": 2.6747302264911657, "grad_norm": 0.41719885440746557, "learning_rate": 1.3064000009769945e-05, "loss": 0.0551, "step": 22556 }, { "epoch": 2.674848808253291, "grad_norm": 0.608588556688199, "learning_rate": 1.3061890953025e-05, "loss": 0.0851, "step": 22557 }, { "epoch": 2.6749673900154156, "grad_norm": 0.7481995998797529, "learning_rate": 1.3059782006334792e-05, "loss": 0.084, "step": 22558 }, { "epoch": 2.6750859717775404, "grad_norm": 0.6063700621325764, "learning_rate": 1.3057673169718743e-05, "loss": 0.0754, "step": 22559 }, { "epoch": 2.6752045535396656, "grad_norm": 0.768959103070248, "learning_rate": 1.3055564443196302e-05, "loss": 0.1229, "step": 22560 }, { "epoch": 2.6753231353017908, "grad_norm": 0.46113714805090067, "learning_rate": 1.3053455826786908e-05, "loss": 0.0621, "step": 22561 }, { "epoch": 2.6754417170639155, "grad_norm": 0.5976749485633504, "learning_rate": 1.3051347320510015e-05, "loss": 0.0845, "step": 22562 }, { "epoch": 2.6755602988260403, "grad_norm": 0.7582802416129806, "learning_rate": 1.3049238924385037e-05, "loss": 0.1071, "step": 22563 }, { "epoch": 2.6756788805881655, "grad_norm": 0.539339396528167, "learning_rate": 1.304713063843142e-05, "loss": 0.0472, "step": 22564 }, { "epoch": 2.6757974623502907, "grad_norm": 0.5365091344307127, "learning_rate": 1.30450224626686e-05, "loss": 0.0775, "step": 22565 }, { "epoch": 2.6759160441124155, "grad_norm": 0.5852040160349374, "learning_rate": 1.3042914397116014e-05, "loss": 0.0657, "step": 22566 }, { "epoch": 2.67603462587454, "grad_norm": 0.6230579094285195, "learning_rate": 1.3040806441793097e-05, "loss": 0.0888, "step": 22567 }, { "epoch": 2.6761532076366654, "grad_norm": 0.6008179650082505, "learning_rate": 1.3038698596719274e-05, "loss": 0.1001, "step": 22568 }, { "epoch": 2.6762717893987906, "grad_norm": 0.7495241102454784, "learning_rate": 1.3036590861913994e-05, "loss": 0.1131, "step": 22569 }, { "epoch": 2.6763903711609154, "grad_norm": 0.5708030110920733, "learning_rate": 1.3034483237396666e-05, "loss": 0.0961, "step": 22570 }, { "epoch": 2.6765089529230406, "grad_norm": 0.6023630725712099, "learning_rate": 1.303237572318673e-05, "loss": 0.0884, "step": 22571 }, { "epoch": 2.6766275346851653, "grad_norm": 0.6670894635291013, "learning_rate": 1.3030268319303612e-05, "loss": 0.0797, "step": 22572 }, { "epoch": 2.6767461164472905, "grad_norm": 0.5445366737897201, "learning_rate": 1.3028161025766756e-05, "loss": 0.0856, "step": 22573 }, { "epoch": 2.6768646982094153, "grad_norm": 0.5786607568376005, "learning_rate": 1.3026053842595562e-05, "loss": 0.0868, "step": 22574 }, { "epoch": 2.6769832799715405, "grad_norm": 0.9557764025136658, "learning_rate": 1.302394676980947e-05, "loss": 0.1377, "step": 22575 }, { "epoch": 2.6771018617336653, "grad_norm": 0.7755827801835828, "learning_rate": 1.3021839807427899e-05, "loss": 0.1031, "step": 22576 }, { "epoch": 2.6772204434957905, "grad_norm": 0.7154862803918858, "learning_rate": 1.3019732955470279e-05, "loss": 0.0992, "step": 22577 }, { "epoch": 2.6773390252579152, "grad_norm": 0.7804949838433117, "learning_rate": 1.301762621395603e-05, "loss": 0.102, "step": 22578 }, { "epoch": 2.6774576070200404, "grad_norm": 0.8546001355114005, "learning_rate": 1.3015519582904572e-05, "loss": 0.1059, "step": 22579 }, { "epoch": 2.677576188782165, "grad_norm": 0.6024873667861805, "learning_rate": 1.3013413062335339e-05, "loss": 0.078, "step": 22580 }, { "epoch": 2.6776947705442904, "grad_norm": 0.6989664533450602, "learning_rate": 1.3011306652267726e-05, "loss": 0.0983, "step": 22581 }, { "epoch": 2.677813352306415, "grad_norm": 0.7872324917177467, "learning_rate": 1.3009200352721168e-05, "loss": 0.1124, "step": 22582 }, { "epoch": 2.6779319340685404, "grad_norm": 0.4457466582979142, "learning_rate": 1.3007094163715075e-05, "loss": 0.061, "step": 22583 }, { "epoch": 2.678050515830665, "grad_norm": 0.4454620976672655, "learning_rate": 1.3004988085268868e-05, "loss": 0.0638, "step": 22584 }, { "epoch": 2.6781690975927903, "grad_norm": 0.7642290496076534, "learning_rate": 1.3002882117401971e-05, "loss": 0.0996, "step": 22585 }, { "epoch": 2.678287679354915, "grad_norm": 0.7793633558365591, "learning_rate": 1.3000776260133773e-05, "loss": 0.1126, "step": 22586 }, { "epoch": 2.6784062611170403, "grad_norm": 0.8598984686876435, "learning_rate": 1.2998670513483718e-05, "loss": 0.1451, "step": 22587 }, { "epoch": 2.678524842879165, "grad_norm": 0.4226059163639984, "learning_rate": 1.2996564877471198e-05, "loss": 0.0531, "step": 22588 }, { "epoch": 2.6786434246412902, "grad_norm": 0.6558943411705176, "learning_rate": 1.299445935211564e-05, "loss": 0.0821, "step": 22589 }, { "epoch": 2.678762006403415, "grad_norm": 0.7877057358038038, "learning_rate": 1.2992353937436424e-05, "loss": 0.1128, "step": 22590 }, { "epoch": 2.67888058816554, "grad_norm": 0.515969814248458, "learning_rate": 1.2990248633452998e-05, "loss": 0.0736, "step": 22591 }, { "epoch": 2.678999169927665, "grad_norm": 0.5560232823728928, "learning_rate": 1.2988143440184743e-05, "loss": 0.0775, "step": 22592 }, { "epoch": 2.67911775168979, "grad_norm": 0.8928168315379594, "learning_rate": 1.2986038357651076e-05, "loss": 0.12, "step": 22593 }, { "epoch": 2.6792363334519154, "grad_norm": 0.8959685394457125, "learning_rate": 1.2983933385871405e-05, "loss": 0.1154, "step": 22594 }, { "epoch": 2.67935491521404, "grad_norm": 0.7332237947999771, "learning_rate": 1.2981828524865131e-05, "loss": 0.0928, "step": 22595 }, { "epoch": 2.679473496976165, "grad_norm": 0.6430865194443881, "learning_rate": 1.297972377465167e-05, "loss": 0.0977, "step": 22596 }, { "epoch": 2.67959207873829, "grad_norm": 0.7872516924675793, "learning_rate": 1.2977619135250395e-05, "loss": 0.0986, "step": 22597 }, { "epoch": 2.6797106605004153, "grad_norm": 0.8465221659676099, "learning_rate": 1.297551460668075e-05, "loss": 0.1018, "step": 22598 }, { "epoch": 2.67982924226254, "grad_norm": 0.6132168467964908, "learning_rate": 1.2973410188962104e-05, "loss": 0.0834, "step": 22599 }, { "epoch": 2.679947824024665, "grad_norm": 0.55155907354466, "learning_rate": 1.2971305882113882e-05, "loss": 0.0724, "step": 22600 }, { "epoch": 2.68006640578679, "grad_norm": 0.602773298926713, "learning_rate": 1.2969201686155446e-05, "loss": 0.0752, "step": 22601 }, { "epoch": 2.680184987548915, "grad_norm": 0.6772003995382532, "learning_rate": 1.2967097601106237e-05, "loss": 0.0931, "step": 22602 }, { "epoch": 2.68030356931104, "grad_norm": 0.5013828927436841, "learning_rate": 1.2964993626985622e-05, "loss": 0.0597, "step": 22603 }, { "epoch": 2.6804221510731647, "grad_norm": 0.6927189242159728, "learning_rate": 1.2962889763813008e-05, "loss": 0.0876, "step": 22604 }, { "epoch": 2.68054073283529, "grad_norm": 0.9444979662087531, "learning_rate": 1.296078601160779e-05, "loss": 0.08, "step": 22605 }, { "epoch": 2.680659314597415, "grad_norm": 0.5263653131016401, "learning_rate": 1.2958682370389364e-05, "loss": 0.0652, "step": 22606 }, { "epoch": 2.68077789635954, "grad_norm": 0.8634956589013477, "learning_rate": 1.2956578840177126e-05, "loss": 0.1248, "step": 22607 }, { "epoch": 2.6808964781216647, "grad_norm": 0.8224833484780887, "learning_rate": 1.2954475420990441e-05, "loss": 0.0978, "step": 22608 }, { "epoch": 2.68101505988379, "grad_norm": 0.681683332656767, "learning_rate": 1.2952372112848748e-05, "loss": 0.1007, "step": 22609 }, { "epoch": 2.681133641645915, "grad_norm": 0.8029408613082729, "learning_rate": 1.2950268915771397e-05, "loss": 0.1204, "step": 22610 }, { "epoch": 2.68125222340804, "grad_norm": 0.5668308634950144, "learning_rate": 1.2948165829777797e-05, "loss": 0.1, "step": 22611 }, { "epoch": 2.6813708051701646, "grad_norm": 0.840276803549313, "learning_rate": 1.2946062854887313e-05, "loss": 0.1084, "step": 22612 }, { "epoch": 2.6814893869322898, "grad_norm": 0.9922874734572971, "learning_rate": 1.294395999111937e-05, "loss": 0.1487, "step": 22613 }, { "epoch": 2.681607968694415, "grad_norm": 0.7732838906662184, "learning_rate": 1.2941857238493319e-05, "loss": 0.1048, "step": 22614 }, { "epoch": 2.6817265504565397, "grad_norm": 0.5312101629266369, "learning_rate": 1.2939754597028559e-05, "loss": 0.0846, "step": 22615 }, { "epoch": 2.6818451322186645, "grad_norm": 0.6340044117603783, "learning_rate": 1.2937652066744472e-05, "loss": 0.0802, "step": 22616 }, { "epoch": 2.6819637139807897, "grad_norm": 0.5590043966177853, "learning_rate": 1.2935549647660439e-05, "loss": 0.0642, "step": 22617 }, { "epoch": 2.682082295742915, "grad_norm": 0.6326294674493468, "learning_rate": 1.2933447339795853e-05, "loss": 0.0976, "step": 22618 }, { "epoch": 2.6822008775050397, "grad_norm": 0.772358510742775, "learning_rate": 1.2931345143170077e-05, "loss": 0.1104, "step": 22619 }, { "epoch": 2.682319459267165, "grad_norm": 0.7177861821910948, "learning_rate": 1.2929243057802501e-05, "loss": 0.1115, "step": 22620 }, { "epoch": 2.6824380410292896, "grad_norm": 0.6279231858499384, "learning_rate": 1.2927141083712502e-05, "loss": 0.077, "step": 22621 }, { "epoch": 2.682556622791415, "grad_norm": 0.6998931109227527, "learning_rate": 1.2925039220919466e-05, "loss": 0.0981, "step": 22622 }, { "epoch": 2.6826752045535396, "grad_norm": 0.48002255840685587, "learning_rate": 1.2922937469442741e-05, "loss": 0.0535, "step": 22623 }, { "epoch": 2.682793786315665, "grad_norm": 0.8856892828373653, "learning_rate": 1.2920835829301741e-05, "loss": 0.0932, "step": 22624 }, { "epoch": 2.6829123680777895, "grad_norm": 0.8538375115299279, "learning_rate": 1.2918734300515816e-05, "loss": 0.1186, "step": 22625 }, { "epoch": 2.6830309498399147, "grad_norm": 0.611275326158649, "learning_rate": 1.2916632883104344e-05, "loss": 0.0856, "step": 22626 }, { "epoch": 2.6831495316020395, "grad_norm": 0.35883720298190264, "learning_rate": 1.2914531577086697e-05, "loss": 0.0495, "step": 22627 }, { "epoch": 2.6832681133641647, "grad_norm": 0.6751086444940376, "learning_rate": 1.2912430382482251e-05, "loss": 0.0954, "step": 22628 }, { "epoch": 2.6833866951262895, "grad_norm": 0.7734813449562702, "learning_rate": 1.2910329299310383e-05, "loss": 0.1102, "step": 22629 }, { "epoch": 2.6835052768884147, "grad_norm": 0.6642451811057386, "learning_rate": 1.2908228327590444e-05, "loss": 0.0865, "step": 22630 }, { "epoch": 2.6836238586505394, "grad_norm": 0.7830406115771968, "learning_rate": 1.2906127467341813e-05, "loss": 0.1012, "step": 22631 }, { "epoch": 2.6837424404126646, "grad_norm": 0.7516967905983076, "learning_rate": 1.2904026718583857e-05, "loss": 0.0877, "step": 22632 }, { "epoch": 2.6838610221747894, "grad_norm": 0.779702776837603, "learning_rate": 1.2901926081335941e-05, "loss": 0.1191, "step": 22633 }, { "epoch": 2.6839796039369146, "grad_norm": 0.644065032080313, "learning_rate": 1.289982555561744e-05, "loss": 0.0907, "step": 22634 }, { "epoch": 2.6840981856990394, "grad_norm": 0.8191456447255977, "learning_rate": 1.2897725141447686e-05, "loss": 0.1254, "step": 22635 }, { "epoch": 2.6842167674611646, "grad_norm": 0.7682378688115873, "learning_rate": 1.289562483884609e-05, "loss": 0.107, "step": 22636 }, { "epoch": 2.6843353492232893, "grad_norm": 0.4961955207599558, "learning_rate": 1.2893524647831977e-05, "loss": 0.076, "step": 22637 }, { "epoch": 2.6844539309854145, "grad_norm": 0.6084416927062515, "learning_rate": 1.2891424568424726e-05, "loss": 0.0783, "step": 22638 }, { "epoch": 2.6845725127475393, "grad_norm": 0.7895022820155893, "learning_rate": 1.2889324600643688e-05, "loss": 0.1059, "step": 22639 }, { "epoch": 2.6846910945096645, "grad_norm": 0.5394857424793932, "learning_rate": 1.2887224744508233e-05, "loss": 0.073, "step": 22640 }, { "epoch": 2.6848096762717892, "grad_norm": 1.0965841259979192, "learning_rate": 1.2885125000037706e-05, "loss": 0.1649, "step": 22641 }, { "epoch": 2.6849282580339144, "grad_norm": 0.49983519568062734, "learning_rate": 1.288302536725147e-05, "loss": 0.0682, "step": 22642 }, { "epoch": 2.6850468397960396, "grad_norm": 0.6748702996729742, "learning_rate": 1.288092584616888e-05, "loss": 0.0855, "step": 22643 }, { "epoch": 2.6851654215581644, "grad_norm": 0.561522219066556, "learning_rate": 1.2878826436809294e-05, "loss": 0.0655, "step": 22644 }, { "epoch": 2.685284003320289, "grad_norm": 0.5190045685327433, "learning_rate": 1.2876727139192074e-05, "loss": 0.0577, "step": 22645 }, { "epoch": 2.6854025850824144, "grad_norm": 0.803480557222526, "learning_rate": 1.2874627953336544e-05, "loss": 0.1119, "step": 22646 }, { "epoch": 2.6855211668445396, "grad_norm": 0.9388068697901837, "learning_rate": 1.2872528879262091e-05, "loss": 0.1428, "step": 22647 }, { "epoch": 2.6856397486066643, "grad_norm": 0.8278226041566398, "learning_rate": 1.2870429916988042e-05, "loss": 0.1131, "step": 22648 }, { "epoch": 2.685758330368789, "grad_norm": 0.8435568621108217, "learning_rate": 1.2868331066533757e-05, "loss": 0.1138, "step": 22649 }, { "epoch": 2.6858769121309143, "grad_norm": 0.5384964873037766, "learning_rate": 1.2866232327918582e-05, "loss": 0.0742, "step": 22650 }, { "epoch": 2.6859954938930395, "grad_norm": 0.6297222039348218, "learning_rate": 1.2864133701161873e-05, "loss": 0.0717, "step": 22651 }, { "epoch": 2.6861140756551642, "grad_norm": 0.8162208279300454, "learning_rate": 1.2862035186282961e-05, "loss": 0.1102, "step": 22652 }, { "epoch": 2.686232657417289, "grad_norm": 1.0896833710125664, "learning_rate": 1.2859936783301201e-05, "loss": 0.1168, "step": 22653 }, { "epoch": 2.686351239179414, "grad_norm": 0.8442768283272248, "learning_rate": 1.2857838492235938e-05, "loss": 0.1249, "step": 22654 }, { "epoch": 2.6864698209415394, "grad_norm": 0.9050368229979442, "learning_rate": 1.2855740313106512e-05, "loss": 0.1393, "step": 22655 }, { "epoch": 2.686588402703664, "grad_norm": 0.6635551297519555, "learning_rate": 1.2853642245932279e-05, "loss": 0.0836, "step": 22656 }, { "epoch": 2.686706984465789, "grad_norm": 0.6029486728710902, "learning_rate": 1.2851544290732547e-05, "loss": 0.0796, "step": 22657 }, { "epoch": 2.686825566227914, "grad_norm": 0.6238188519703463, "learning_rate": 1.2849446447526703e-05, "loss": 0.0943, "step": 22658 }, { "epoch": 2.6869441479900393, "grad_norm": 0.820610606373146, "learning_rate": 1.2847348716334052e-05, "loss": 0.1144, "step": 22659 }, { "epoch": 2.687062729752164, "grad_norm": 0.6650553414997744, "learning_rate": 1.2845251097173949e-05, "loss": 0.0922, "step": 22660 }, { "epoch": 2.687181311514289, "grad_norm": 0.6166728237903555, "learning_rate": 1.284315359006571e-05, "loss": 0.0786, "step": 22661 }, { "epoch": 2.687299893276414, "grad_norm": 0.9116481490605151, "learning_rate": 1.2841056195028709e-05, "loss": 0.0947, "step": 22662 }, { "epoch": 2.6874184750385393, "grad_norm": 0.7257384742951588, "learning_rate": 1.2838958912082244e-05, "loss": 0.1017, "step": 22663 }, { "epoch": 2.687537056800664, "grad_norm": 0.6000004375240212, "learning_rate": 1.283686174124567e-05, "loss": 0.0801, "step": 22664 }, { "epoch": 2.6876556385627888, "grad_norm": 0.5659495189000521, "learning_rate": 1.2834764682538313e-05, "loss": 0.0814, "step": 22665 }, { "epoch": 2.687774220324914, "grad_norm": 0.8945074949274806, "learning_rate": 1.283266773597951e-05, "loss": 0.1105, "step": 22666 }, { "epoch": 2.687892802087039, "grad_norm": 0.7233521829452693, "learning_rate": 1.2830570901588596e-05, "loss": 0.0814, "step": 22667 }, { "epoch": 2.688011383849164, "grad_norm": 0.5628142672995153, "learning_rate": 1.2828474179384875e-05, "loss": 0.0647, "step": 22668 }, { "epoch": 2.688129965611289, "grad_norm": 0.9517943618211849, "learning_rate": 1.282637756938772e-05, "loss": 0.1237, "step": 22669 }, { "epoch": 2.688248547373414, "grad_norm": 0.7883951748852276, "learning_rate": 1.2824281071616424e-05, "loss": 0.1114, "step": 22670 }, { "epoch": 2.688367129135539, "grad_norm": 0.5898198134822347, "learning_rate": 1.2822184686090333e-05, "loss": 0.0862, "step": 22671 }, { "epoch": 2.688485710897664, "grad_norm": 0.4455873516508463, "learning_rate": 1.282008841282875e-05, "loss": 0.0639, "step": 22672 }, { "epoch": 2.688604292659789, "grad_norm": 0.7240027923832637, "learning_rate": 1.2817992251851035e-05, "loss": 0.0986, "step": 22673 }, { "epoch": 2.688722874421914, "grad_norm": 0.8634321375389595, "learning_rate": 1.2815896203176481e-05, "loss": 0.114, "step": 22674 }, { "epoch": 2.688841456184039, "grad_norm": 0.5883618816805731, "learning_rate": 1.2813800266824424e-05, "loss": 0.0897, "step": 22675 }, { "epoch": 2.688960037946164, "grad_norm": 0.4709347078044427, "learning_rate": 1.2811704442814185e-05, "loss": 0.0598, "step": 22676 }, { "epoch": 2.689078619708289, "grad_norm": 0.5210881954664954, "learning_rate": 1.2809608731165085e-05, "loss": 0.0773, "step": 22677 }, { "epoch": 2.6891972014704137, "grad_norm": 0.6967156372828484, "learning_rate": 1.2807513131896453e-05, "loss": 0.0721, "step": 22678 }, { "epoch": 2.689315783232539, "grad_norm": 0.7184347620096857, "learning_rate": 1.280541764502758e-05, "loss": 0.0929, "step": 22679 }, { "epoch": 2.6894343649946637, "grad_norm": 0.5242849811702933, "learning_rate": 1.2803322270577816e-05, "loss": 0.0786, "step": 22680 }, { "epoch": 2.689552946756789, "grad_norm": 0.5292645674866578, "learning_rate": 1.2801227008566464e-05, "loss": 0.0745, "step": 22681 }, { "epoch": 2.6896715285189137, "grad_norm": 0.8444688514139957, "learning_rate": 1.2799131859012831e-05, "loss": 0.1115, "step": 22682 }, { "epoch": 2.689790110281039, "grad_norm": 0.9573062524650158, "learning_rate": 1.2797036821936248e-05, "loss": 0.1202, "step": 22683 }, { "epoch": 2.6899086920431636, "grad_norm": 0.5923501326680854, "learning_rate": 1.2794941897356017e-05, "loss": 0.0754, "step": 22684 }, { "epoch": 2.690027273805289, "grad_norm": 0.7005881614422458, "learning_rate": 1.2792847085291465e-05, "loss": 0.0802, "step": 22685 }, { "epoch": 2.6901458555674136, "grad_norm": 0.5866115123047189, "learning_rate": 1.2790752385761884e-05, "loss": 0.0699, "step": 22686 }, { "epoch": 2.690264437329539, "grad_norm": 0.5634834566192848, "learning_rate": 1.2788657798786593e-05, "loss": 0.0692, "step": 22687 }, { "epoch": 2.6903830190916636, "grad_norm": 0.5539177053486987, "learning_rate": 1.2786563324384904e-05, "loss": 0.0858, "step": 22688 }, { "epoch": 2.6905016008537888, "grad_norm": 0.5547846843556626, "learning_rate": 1.2784468962576136e-05, "loss": 0.0628, "step": 22689 }, { "epoch": 2.6906201826159135, "grad_norm": 1.225163933453412, "learning_rate": 1.278237471337957e-05, "loss": 0.1326, "step": 22690 }, { "epoch": 2.6907387643780387, "grad_norm": 0.647136713290715, "learning_rate": 1.278028057681453e-05, "loss": 0.093, "step": 22691 }, { "epoch": 2.690857346140164, "grad_norm": 0.8336036597636207, "learning_rate": 1.2778186552900318e-05, "loss": 0.1272, "step": 22692 }, { "epoch": 2.6909759279022887, "grad_norm": 0.7059278712312694, "learning_rate": 1.2776092641656234e-05, "loss": 0.0778, "step": 22693 }, { "epoch": 2.6910945096644134, "grad_norm": 0.5486106623814532, "learning_rate": 1.2773998843101592e-05, "loss": 0.0675, "step": 22694 }, { "epoch": 2.6912130914265386, "grad_norm": 0.7799844212557053, "learning_rate": 1.2771905157255688e-05, "loss": 0.0908, "step": 22695 }, { "epoch": 2.691331673188664, "grad_norm": 0.6331366498284883, "learning_rate": 1.2769811584137832e-05, "loss": 0.0815, "step": 22696 }, { "epoch": 2.6914502549507886, "grad_norm": 0.5732080735812091, "learning_rate": 1.2767718123767303e-05, "loss": 0.0808, "step": 22697 }, { "epoch": 2.6915688367129134, "grad_norm": 0.6494235169326608, "learning_rate": 1.2765624776163416e-05, "loss": 0.105, "step": 22698 }, { "epoch": 2.6916874184750386, "grad_norm": 0.5322120167540427, "learning_rate": 1.2763531541345464e-05, "loss": 0.0815, "step": 22699 }, { "epoch": 2.6918060002371638, "grad_norm": 1.0682987176848144, "learning_rate": 1.2761438419332754e-05, "loss": 0.1448, "step": 22700 }, { "epoch": 2.6919245819992885, "grad_norm": 0.532037379812489, "learning_rate": 1.2759345410144566e-05, "loss": 0.0727, "step": 22701 }, { "epoch": 2.6920431637614133, "grad_norm": 0.5858687410497273, "learning_rate": 1.2757252513800205e-05, "loss": 0.0885, "step": 22702 }, { "epoch": 2.6921617455235385, "grad_norm": 0.6708315590432451, "learning_rate": 1.275515973031896e-05, "loss": 0.0891, "step": 22703 }, { "epoch": 2.6922803272856637, "grad_norm": 0.6465279679674759, "learning_rate": 1.2753067059720125e-05, "loss": 0.0863, "step": 22704 }, { "epoch": 2.6923989090477884, "grad_norm": 0.8461458061323175, "learning_rate": 1.2750974502023005e-05, "loss": 0.0892, "step": 22705 }, { "epoch": 2.692517490809913, "grad_norm": 0.6884100616377786, "learning_rate": 1.2748882057246864e-05, "loss": 0.0929, "step": 22706 }, { "epoch": 2.6926360725720384, "grad_norm": 1.0122048509222914, "learning_rate": 1.2746789725411023e-05, "loss": 0.133, "step": 22707 }, { "epoch": 2.6927546543341636, "grad_norm": 0.7799189200738704, "learning_rate": 1.2744697506534744e-05, "loss": 0.081, "step": 22708 }, { "epoch": 2.6928732360962884, "grad_norm": 0.5806361554165879, "learning_rate": 1.2742605400637325e-05, "loss": 0.0722, "step": 22709 }, { "epoch": 2.692991817858413, "grad_norm": 0.8439791872339972, "learning_rate": 1.2740513407738059e-05, "loss": 0.1294, "step": 22710 }, { "epoch": 2.6931103996205383, "grad_norm": 0.6892327064446699, "learning_rate": 1.273842152785623e-05, "loss": 0.0969, "step": 22711 }, { "epoch": 2.6932289813826635, "grad_norm": 0.7442483493005982, "learning_rate": 1.273632976101111e-05, "loss": 0.1036, "step": 22712 }, { "epoch": 2.6933475631447883, "grad_norm": 0.605087677901092, "learning_rate": 1.273423810722199e-05, "loss": 0.0883, "step": 22713 }, { "epoch": 2.693466144906913, "grad_norm": 0.6296173101571032, "learning_rate": 1.2732146566508155e-05, "loss": 0.0841, "step": 22714 }, { "epoch": 2.6935847266690383, "grad_norm": 0.6553535504718296, "learning_rate": 1.2730055138888886e-05, "loss": 0.09, "step": 22715 }, { "epoch": 2.6937033084311635, "grad_norm": 0.7019030850484651, "learning_rate": 1.2727963824383469e-05, "loss": 0.079, "step": 22716 }, { "epoch": 2.693821890193288, "grad_norm": 0.5357386368162855, "learning_rate": 1.2725872623011159e-05, "loss": 0.0876, "step": 22717 }, { "epoch": 2.6939404719554134, "grad_norm": 0.7198348065927875, "learning_rate": 1.2723781534791268e-05, "loss": 0.108, "step": 22718 }, { "epoch": 2.694059053717538, "grad_norm": 0.6465112287929389, "learning_rate": 1.2721690559743053e-05, "loss": 0.0825, "step": 22719 }, { "epoch": 2.6941776354796634, "grad_norm": 0.705006458967942, "learning_rate": 1.2719599697885798e-05, "loss": 0.1031, "step": 22720 }, { "epoch": 2.694296217241788, "grad_norm": 0.6324114384865103, "learning_rate": 1.271750894923876e-05, "loss": 0.087, "step": 22721 }, { "epoch": 2.6944147990039133, "grad_norm": 0.7740025820539483, "learning_rate": 1.2715418313821246e-05, "loss": 0.0911, "step": 22722 }, { "epoch": 2.694533380766038, "grad_norm": 0.670932296731335, "learning_rate": 1.2713327791652501e-05, "loss": 0.0938, "step": 22723 }, { "epoch": 2.6946519625281633, "grad_norm": 0.6416768760788588, "learning_rate": 1.2711237382751806e-05, "loss": 0.0861, "step": 22724 }, { "epoch": 2.694770544290288, "grad_norm": 0.9209888406094003, "learning_rate": 1.2709147087138435e-05, "loss": 0.1158, "step": 22725 }, { "epoch": 2.6948891260524133, "grad_norm": 0.7313138612174371, "learning_rate": 1.2707056904831652e-05, "loss": 0.0923, "step": 22726 }, { "epoch": 2.695007707814538, "grad_norm": 0.5958644377380091, "learning_rate": 1.270496683585074e-05, "loss": 0.0774, "step": 22727 }, { "epoch": 2.6951262895766632, "grad_norm": 0.6597566816130558, "learning_rate": 1.2702876880214939e-05, "loss": 0.0771, "step": 22728 }, { "epoch": 2.695244871338788, "grad_norm": 0.6699954876689014, "learning_rate": 1.2700787037943554e-05, "loss": 0.0992, "step": 22729 }, { "epoch": 2.695363453100913, "grad_norm": 0.793372103057005, "learning_rate": 1.2698697309055818e-05, "loss": 0.1213, "step": 22730 }, { "epoch": 2.695482034863038, "grad_norm": 0.6519483671296115, "learning_rate": 1.2696607693571012e-05, "loss": 0.0904, "step": 22731 }, { "epoch": 2.695600616625163, "grad_norm": 0.4441598468101067, "learning_rate": 1.2694518191508392e-05, "loss": 0.0683, "step": 22732 }, { "epoch": 2.695719198387288, "grad_norm": 0.7165968269370394, "learning_rate": 1.2692428802887224e-05, "loss": 0.1055, "step": 22733 }, { "epoch": 2.695837780149413, "grad_norm": 0.5324137991379294, "learning_rate": 1.269033952772678e-05, "loss": 0.07, "step": 22734 }, { "epoch": 2.695956361911538, "grad_norm": 0.6247029104998374, "learning_rate": 1.2688250366046293e-05, "loss": 0.0793, "step": 22735 }, { "epoch": 2.696074943673663, "grad_norm": 0.5387210650355754, "learning_rate": 1.2686161317865055e-05, "loss": 0.0586, "step": 22736 }, { "epoch": 2.696193525435788, "grad_norm": 0.5029929192715705, "learning_rate": 1.2684072383202306e-05, "loss": 0.0707, "step": 22737 }, { "epoch": 2.696312107197913, "grad_norm": 0.5135868078229175, "learning_rate": 1.268198356207731e-05, "loss": 0.072, "step": 22738 }, { "epoch": 2.696430688960038, "grad_norm": 0.5423198595512981, "learning_rate": 1.2679894854509306e-05, "loss": 0.0846, "step": 22739 }, { "epoch": 2.696549270722163, "grad_norm": 0.7709149546288857, "learning_rate": 1.267780626051758e-05, "loss": 0.071, "step": 22740 }, { "epoch": 2.6966678524842878, "grad_norm": 0.7321824371871846, "learning_rate": 1.267571778012136e-05, "loss": 0.0909, "step": 22741 }, { "epoch": 2.696786434246413, "grad_norm": 0.8413501086362907, "learning_rate": 1.2673629413339911e-05, "loss": 0.136, "step": 22742 }, { "epoch": 2.6969050160085377, "grad_norm": 0.5979879019243964, "learning_rate": 1.2671541160192481e-05, "loss": 0.0674, "step": 22743 }, { "epoch": 2.697023597770663, "grad_norm": 1.0112677797376557, "learning_rate": 1.2669453020698327e-05, "loss": 0.1616, "step": 22744 }, { "epoch": 2.697142179532788, "grad_norm": 0.44633091094511135, "learning_rate": 1.2667364994876702e-05, "loss": 0.0774, "step": 22745 }, { "epoch": 2.697260761294913, "grad_norm": 0.5995920200690452, "learning_rate": 1.2665277082746841e-05, "loss": 0.0905, "step": 22746 }, { "epoch": 2.6973793430570376, "grad_norm": 0.6545687583819564, "learning_rate": 1.2663189284327997e-05, "loss": 0.0893, "step": 22747 }, { "epoch": 2.697497924819163, "grad_norm": 0.6522568688850834, "learning_rate": 1.2661101599639419e-05, "loss": 0.0892, "step": 22748 }, { "epoch": 2.697616506581288, "grad_norm": 0.78077782629118, "learning_rate": 1.2659014028700367e-05, "loss": 0.0941, "step": 22749 }, { "epoch": 2.697735088343413, "grad_norm": 0.4313899223225727, "learning_rate": 1.2656926571530048e-05, "loss": 0.0565, "step": 22750 }, { "epoch": 2.6978536701055376, "grad_norm": 0.5804578202992583, "learning_rate": 1.2654839228147753e-05, "loss": 0.0844, "step": 22751 }, { "epoch": 2.6979722518676628, "grad_norm": 0.7257741640901009, "learning_rate": 1.265275199857269e-05, "loss": 0.0878, "step": 22752 }, { "epoch": 2.698090833629788, "grad_norm": 0.780080404762754, "learning_rate": 1.2650664882824115e-05, "loss": 0.1059, "step": 22753 }, { "epoch": 2.6982094153919127, "grad_norm": 0.6713721973552561, "learning_rate": 1.2648577880921262e-05, "loss": 0.0925, "step": 22754 }, { "epoch": 2.6983279971540375, "grad_norm": 0.8257228921985101, "learning_rate": 1.2646490992883375e-05, "loss": 0.1034, "step": 22755 }, { "epoch": 2.6984465789161627, "grad_norm": 0.7776384631622199, "learning_rate": 1.2644404218729706e-05, "loss": 0.0988, "step": 22756 }, { "epoch": 2.698565160678288, "grad_norm": 0.47418332836791444, "learning_rate": 1.2642317558479467e-05, "loss": 0.0675, "step": 22757 }, { "epoch": 2.6986837424404126, "grad_norm": 0.6842871709767068, "learning_rate": 1.2640231012151903e-05, "loss": 0.0882, "step": 22758 }, { "epoch": 2.6988023242025374, "grad_norm": 0.629514492385526, "learning_rate": 1.2638144579766253e-05, "loss": 0.084, "step": 22759 }, { "epoch": 2.6989209059646626, "grad_norm": 0.8248015747684961, "learning_rate": 1.2636058261341766e-05, "loss": 0.1371, "step": 22760 }, { "epoch": 2.699039487726788, "grad_norm": 0.7165385736909052, "learning_rate": 1.2633972056897647e-05, "loss": 0.1006, "step": 22761 }, { "epoch": 2.6991580694889126, "grad_norm": 0.40186081802343737, "learning_rate": 1.2631885966453138e-05, "loss": 0.0613, "step": 22762 }, { "epoch": 2.6992766512510373, "grad_norm": 0.8079884618888045, "learning_rate": 1.262979999002748e-05, "loss": 0.1193, "step": 22763 }, { "epoch": 2.6993952330131625, "grad_norm": 0.7911981050110795, "learning_rate": 1.2627714127639894e-05, "loss": 0.1062, "step": 22764 }, { "epoch": 2.6995138147752877, "grad_norm": 0.6034663853816326, "learning_rate": 1.2625628379309612e-05, "loss": 0.0793, "step": 22765 }, { "epoch": 2.6996323965374125, "grad_norm": 0.7721710474435802, "learning_rate": 1.262354274505586e-05, "loss": 0.0927, "step": 22766 }, { "epoch": 2.6997509782995377, "grad_norm": 0.539145852289442, "learning_rate": 1.2621457224897878e-05, "loss": 0.0802, "step": 22767 }, { "epoch": 2.6998695600616625, "grad_norm": 0.8787092567582525, "learning_rate": 1.2619371818854872e-05, "loss": 0.0867, "step": 22768 }, { "epoch": 2.6999881418237877, "grad_norm": 0.9219153252362043, "learning_rate": 1.2617286526946076e-05, "loss": 0.1202, "step": 22769 }, { "epoch": 2.7001067235859124, "grad_norm": 0.5162954680785747, "learning_rate": 1.2615201349190712e-05, "loss": 0.06, "step": 22770 }, { "epoch": 2.7002253053480376, "grad_norm": 0.5366795977500038, "learning_rate": 1.2613116285608012e-05, "loss": 0.0769, "step": 22771 }, { "epoch": 2.7003438871101624, "grad_norm": 0.604692381514184, "learning_rate": 1.2611031336217182e-05, "loss": 0.0851, "step": 22772 }, { "epoch": 2.7004624688722876, "grad_norm": 0.8246092472578473, "learning_rate": 1.2608946501037455e-05, "loss": 0.1235, "step": 22773 }, { "epoch": 2.7005810506344123, "grad_norm": 0.5313543586962263, "learning_rate": 1.2606861780088042e-05, "loss": 0.0608, "step": 22774 }, { "epoch": 2.7006996323965375, "grad_norm": 0.8630990604154145, "learning_rate": 1.2604777173388167e-05, "loss": 0.098, "step": 22775 }, { "epoch": 2.7008182141586623, "grad_norm": 0.5979208730013187, "learning_rate": 1.2602692680957056e-05, "loss": 0.0679, "step": 22776 }, { "epoch": 2.7009367959207875, "grad_norm": 0.8501698939811153, "learning_rate": 1.2600608302813895e-05, "loss": 0.1038, "step": 22777 }, { "epoch": 2.7010553776829123, "grad_norm": 0.6146874683107714, "learning_rate": 1.2598524038977944e-05, "loss": 0.0941, "step": 22778 }, { "epoch": 2.7011739594450375, "grad_norm": 0.534531662759171, "learning_rate": 1.2596439889468382e-05, "loss": 0.0792, "step": 22779 }, { "epoch": 2.7012925412071622, "grad_norm": 0.6439750482548712, "learning_rate": 1.259435585430443e-05, "loss": 0.0912, "step": 22780 }, { "epoch": 2.7014111229692874, "grad_norm": 0.6977257382973537, "learning_rate": 1.2592271933505306e-05, "loss": 0.1085, "step": 22781 }, { "epoch": 2.701529704731412, "grad_norm": 0.6413768927050115, "learning_rate": 1.2590188127090222e-05, "loss": 0.068, "step": 22782 }, { "epoch": 2.7016482864935374, "grad_norm": 0.7698311302591774, "learning_rate": 1.2588104435078395e-05, "loss": 0.1081, "step": 22783 }, { "epoch": 2.701766868255662, "grad_norm": 1.0268962135063155, "learning_rate": 1.2586020857489004e-05, "loss": 0.1082, "step": 22784 }, { "epoch": 2.7018854500177873, "grad_norm": 0.6953674355330706, "learning_rate": 1.2583937394341297e-05, "loss": 0.0683, "step": 22785 }, { "epoch": 2.702004031779912, "grad_norm": 0.6257758384148192, "learning_rate": 1.258185404565445e-05, "loss": 0.1004, "step": 22786 }, { "epoch": 2.7021226135420373, "grad_norm": 0.5432076205125019, "learning_rate": 1.2579770811447695e-05, "loss": 0.0666, "step": 22787 }, { "epoch": 2.702241195304162, "grad_norm": 0.9111465561298466, "learning_rate": 1.2577687691740201e-05, "loss": 0.0829, "step": 22788 }, { "epoch": 2.7023597770662873, "grad_norm": 0.5810821822972202, "learning_rate": 1.2575604686551212e-05, "loss": 0.0825, "step": 22789 }, { "epoch": 2.702478358828412, "grad_norm": 0.7561677902546714, "learning_rate": 1.2573521795899901e-05, "loss": 0.1037, "step": 22790 }, { "epoch": 2.7025969405905372, "grad_norm": 0.9266115007644398, "learning_rate": 1.2571439019805482e-05, "loss": 0.1069, "step": 22791 }, { "epoch": 2.702715522352662, "grad_norm": 0.9985679763559371, "learning_rate": 1.2569356358287154e-05, "loss": 0.1311, "step": 22792 }, { "epoch": 2.702834104114787, "grad_norm": 0.6372499202762824, "learning_rate": 1.2567273811364116e-05, "loss": 0.1019, "step": 22793 }, { "epoch": 2.7029526858769124, "grad_norm": 0.711136879616522, "learning_rate": 1.256519137905558e-05, "loss": 0.0996, "step": 22794 }, { "epoch": 2.703071267639037, "grad_norm": 0.9298979532337991, "learning_rate": 1.2563109061380713e-05, "loss": 0.0797, "step": 22795 }, { "epoch": 2.703189849401162, "grad_norm": 0.6653092481378606, "learning_rate": 1.2561026858358743e-05, "loss": 0.0825, "step": 22796 }, { "epoch": 2.703308431163287, "grad_norm": 0.7647847020085917, "learning_rate": 1.2558944770008846e-05, "loss": 0.1134, "step": 22797 }, { "epoch": 2.7034270129254123, "grad_norm": 0.8613889167254154, "learning_rate": 1.255686279635023e-05, "loss": 0.1146, "step": 22798 }, { "epoch": 2.703545594687537, "grad_norm": 0.5455728001927529, "learning_rate": 1.2554780937402061e-05, "loss": 0.064, "step": 22799 }, { "epoch": 2.703664176449662, "grad_norm": 0.5428192454065235, "learning_rate": 1.255269919318357e-05, "loss": 0.0917, "step": 22800 }, { "epoch": 2.703782758211787, "grad_norm": 0.4657333647306136, "learning_rate": 1.2550617563713918e-05, "loss": 0.0814, "step": 22801 }, { "epoch": 2.7039013399739122, "grad_norm": 0.5949642419644904, "learning_rate": 1.2548536049012308e-05, "loss": 0.0974, "step": 22802 }, { "epoch": 2.704019921736037, "grad_norm": 0.8039710204079231, "learning_rate": 1.2546454649097924e-05, "loss": 0.111, "step": 22803 }, { "epoch": 2.7041385034981618, "grad_norm": 0.8566026578523163, "learning_rate": 1.2544373363989958e-05, "loss": 0.0755, "step": 22804 }, { "epoch": 2.704257085260287, "grad_norm": 0.43534226114137814, "learning_rate": 1.2542292193707606e-05, "loss": 0.0685, "step": 22805 }, { "epoch": 2.704375667022412, "grad_norm": 0.7520580456728888, "learning_rate": 1.254021113827003e-05, "loss": 0.0916, "step": 22806 }, { "epoch": 2.704494248784537, "grad_norm": 0.6236481238895565, "learning_rate": 1.2538130197696433e-05, "loss": 0.0762, "step": 22807 }, { "epoch": 2.7046128305466617, "grad_norm": 0.5363956374060455, "learning_rate": 1.2536049372005992e-05, "loss": 0.0772, "step": 22808 }, { "epoch": 2.704731412308787, "grad_norm": 0.7105527627905014, "learning_rate": 1.2533968661217898e-05, "loss": 0.0895, "step": 22809 }, { "epoch": 2.704849994070912, "grad_norm": 0.6473717854007198, "learning_rate": 1.253188806535131e-05, "loss": 0.102, "step": 22810 }, { "epoch": 2.704968575833037, "grad_norm": 0.8088086016206997, "learning_rate": 1.2529807584425446e-05, "loss": 0.1016, "step": 22811 }, { "epoch": 2.7050871575951616, "grad_norm": 0.5877096205178001, "learning_rate": 1.252772721845945e-05, "loss": 0.0726, "step": 22812 }, { "epoch": 2.705205739357287, "grad_norm": 0.5343936207138081, "learning_rate": 1.2525646967472515e-05, "loss": 0.0795, "step": 22813 }, { "epoch": 2.705324321119412, "grad_norm": 0.5345260523556793, "learning_rate": 1.2523566831483821e-05, "loss": 0.0794, "step": 22814 }, { "epoch": 2.7054429028815368, "grad_norm": 0.5281301911982569, "learning_rate": 1.252148681051254e-05, "loss": 0.0575, "step": 22815 }, { "epoch": 2.7055614846436615, "grad_norm": 0.38248873104317355, "learning_rate": 1.2519406904577857e-05, "loss": 0.0528, "step": 22816 }, { "epoch": 2.7056800664057867, "grad_norm": 0.7372460918791258, "learning_rate": 1.2517327113698928e-05, "loss": 0.0937, "step": 22817 }, { "epoch": 2.705798648167912, "grad_norm": 0.4920837893911828, "learning_rate": 1.2515247437894936e-05, "loss": 0.056, "step": 22818 }, { "epoch": 2.7059172299300367, "grad_norm": 0.5566417864746563, "learning_rate": 1.2513167877185052e-05, "loss": 0.0909, "step": 22819 }, { "epoch": 2.706035811692162, "grad_norm": 0.6779354736080779, "learning_rate": 1.251108843158846e-05, "loss": 0.0982, "step": 22820 }, { "epoch": 2.7061543934542867, "grad_norm": 0.6219419636692997, "learning_rate": 1.2509009101124297e-05, "loss": 0.0784, "step": 22821 }, { "epoch": 2.706272975216412, "grad_norm": 0.6898093255830678, "learning_rate": 1.2506929885811772e-05, "loss": 0.0781, "step": 22822 }, { "epoch": 2.7063915569785366, "grad_norm": 0.6287696468078497, "learning_rate": 1.2504850785670024e-05, "loss": 0.0723, "step": 22823 }, { "epoch": 2.706510138740662, "grad_norm": 0.6932862393740953, "learning_rate": 1.2502771800718226e-05, "loss": 0.0951, "step": 22824 }, { "epoch": 2.7066287205027866, "grad_norm": 0.8136746180222879, "learning_rate": 1.2500692930975552e-05, "loss": 0.0991, "step": 22825 }, { "epoch": 2.706747302264912, "grad_norm": 0.8253373568847392, "learning_rate": 1.249861417646116e-05, "loss": 0.1037, "step": 22826 }, { "epoch": 2.7068658840270365, "grad_norm": 0.5860322379114561, "learning_rate": 1.2496535537194223e-05, "loss": 0.0845, "step": 22827 }, { "epoch": 2.7069844657891617, "grad_norm": 0.524337309319647, "learning_rate": 1.249445701319389e-05, "loss": 0.0762, "step": 22828 }, { "epoch": 2.7071030475512865, "grad_norm": 0.5089274671411005, "learning_rate": 1.2492378604479324e-05, "loss": 0.0599, "step": 22829 }, { "epoch": 2.7072216293134117, "grad_norm": 0.745548622668968, "learning_rate": 1.2490300311069688e-05, "loss": 0.0924, "step": 22830 }, { "epoch": 2.7073402110755365, "grad_norm": 0.6682834502891294, "learning_rate": 1.2488222132984156e-05, "loss": 0.087, "step": 22831 }, { "epoch": 2.7074587928376617, "grad_norm": 1.3366672230134253, "learning_rate": 1.2486144070241862e-05, "loss": 0.134, "step": 22832 }, { "epoch": 2.7075773745997864, "grad_norm": 0.7167203849734176, "learning_rate": 1.2484066122861973e-05, "loss": 0.1013, "step": 22833 }, { "epoch": 2.7076959563619116, "grad_norm": 0.5799490051558938, "learning_rate": 1.248198829086365e-05, "loss": 0.0757, "step": 22834 }, { "epoch": 2.7078145381240364, "grad_norm": 0.8193958791129795, "learning_rate": 1.247991057426604e-05, "loss": 0.1011, "step": 22835 }, { "epoch": 2.7079331198861616, "grad_norm": 0.6417244978648117, "learning_rate": 1.2477832973088304e-05, "loss": 0.0816, "step": 22836 }, { "epoch": 2.7080517016482863, "grad_norm": 0.7637656844310203, "learning_rate": 1.2475755487349592e-05, "loss": 0.0961, "step": 22837 }, { "epoch": 2.7081702834104115, "grad_norm": 0.6340956905357933, "learning_rate": 1.2473678117069066e-05, "loss": 0.0808, "step": 22838 }, { "epoch": 2.7082888651725363, "grad_norm": 0.528598087849251, "learning_rate": 1.2471600862265857e-05, "loss": 0.0722, "step": 22839 }, { "epoch": 2.7084074469346615, "grad_norm": 0.6226957755527607, "learning_rate": 1.2469523722959123e-05, "loss": 0.101, "step": 22840 }, { "epoch": 2.7085260286967863, "grad_norm": 0.6195094990534684, "learning_rate": 1.2467446699168014e-05, "loss": 0.082, "step": 22841 }, { "epoch": 2.7086446104589115, "grad_norm": 0.5194022368243967, "learning_rate": 1.2465369790911683e-05, "loss": 0.0708, "step": 22842 }, { "epoch": 2.7087631922210367, "grad_norm": 1.0808221294973022, "learning_rate": 1.2463292998209275e-05, "loss": 0.1339, "step": 22843 }, { "epoch": 2.7088817739831614, "grad_norm": 0.6483559974085983, "learning_rate": 1.2461216321079916e-05, "loss": 0.0819, "step": 22844 }, { "epoch": 2.709000355745286, "grad_norm": 0.6803266141989134, "learning_rate": 1.2459139759542788e-05, "loss": 0.0856, "step": 22845 }, { "epoch": 2.7091189375074114, "grad_norm": 0.5553743934385044, "learning_rate": 1.2457063313616998e-05, "loss": 0.0735, "step": 22846 }, { "epoch": 2.7092375192695366, "grad_norm": 0.3830516995931219, "learning_rate": 1.2454986983321718e-05, "loss": 0.0511, "step": 22847 }, { "epoch": 2.7093561010316614, "grad_norm": 0.5828969364784844, "learning_rate": 1.2452910768676052e-05, "loss": 0.0664, "step": 22848 }, { "epoch": 2.709474682793786, "grad_norm": 0.7675105431035145, "learning_rate": 1.2450834669699182e-05, "loss": 0.0972, "step": 22849 }, { "epoch": 2.7095932645559113, "grad_norm": 0.7275462209317104, "learning_rate": 1.2448758686410217e-05, "loss": 0.0994, "step": 22850 }, { "epoch": 2.7097118463180365, "grad_norm": 0.7439111423053639, "learning_rate": 1.2446682818828307e-05, "loss": 0.0846, "step": 22851 }, { "epoch": 2.7098304280801613, "grad_norm": 0.45570641600368145, "learning_rate": 1.2444607066972586e-05, "loss": 0.0549, "step": 22852 }, { "epoch": 2.709949009842286, "grad_norm": 0.6852969500117976, "learning_rate": 1.2442531430862192e-05, "loss": 0.1005, "step": 22853 }, { "epoch": 2.7100675916044112, "grad_norm": 0.6139432802002925, "learning_rate": 1.2440455910516272e-05, "loss": 0.0947, "step": 22854 }, { "epoch": 2.7101861733665364, "grad_norm": 0.5154886521750516, "learning_rate": 1.2438380505953923e-05, "loss": 0.0765, "step": 22855 }, { "epoch": 2.710304755128661, "grad_norm": 1.0804847376774664, "learning_rate": 1.2436305217194325e-05, "loss": 0.1165, "step": 22856 }, { "epoch": 2.710423336890786, "grad_norm": 0.4790650613935944, "learning_rate": 1.2434230044256573e-05, "loss": 0.066, "step": 22857 }, { "epoch": 2.710541918652911, "grad_norm": 0.6431504673910016, "learning_rate": 1.2432154987159822e-05, "loss": 0.1057, "step": 22858 }, { "epoch": 2.7106605004150364, "grad_norm": 0.7911571245499318, "learning_rate": 1.243008004592317e-05, "loss": 0.0869, "step": 22859 }, { "epoch": 2.710779082177161, "grad_norm": 0.4676069502455532, "learning_rate": 1.2428005220565786e-05, "loss": 0.0552, "step": 22860 }, { "epoch": 2.710897663939286, "grad_norm": 0.5779695153533326, "learning_rate": 1.2425930511106768e-05, "loss": 0.0803, "step": 22861 }, { "epoch": 2.711016245701411, "grad_norm": 0.7941448274811136, "learning_rate": 1.2423855917565252e-05, "loss": 0.099, "step": 22862 }, { "epoch": 2.7111348274635363, "grad_norm": 0.5819781146105081, "learning_rate": 1.2421781439960361e-05, "loss": 0.0739, "step": 22863 }, { "epoch": 2.711253409225661, "grad_norm": 1.1310453644458134, "learning_rate": 1.2419707078311221e-05, "loss": 0.1213, "step": 22864 }, { "epoch": 2.711371990987786, "grad_norm": 0.6569073664634694, "learning_rate": 1.2417632832636965e-05, "loss": 0.0942, "step": 22865 }, { "epoch": 2.711490572749911, "grad_norm": 0.6389891217903565, "learning_rate": 1.2415558702956684e-05, "loss": 0.065, "step": 22866 }, { "epoch": 2.711609154512036, "grad_norm": 0.7497562909086473, "learning_rate": 1.241348468928954e-05, "loss": 0.0938, "step": 22867 }, { "epoch": 2.711727736274161, "grad_norm": 1.027733759207597, "learning_rate": 1.2411410791654623e-05, "loss": 0.1499, "step": 22868 }, { "epoch": 2.711846318036286, "grad_norm": 0.9601614101352067, "learning_rate": 1.240933701007107e-05, "loss": 0.1114, "step": 22869 }, { "epoch": 2.711964899798411, "grad_norm": 0.5291827578599926, "learning_rate": 1.2407263344557973e-05, "loss": 0.0697, "step": 22870 }, { "epoch": 2.712083481560536, "grad_norm": 0.6359025234610513, "learning_rate": 1.2405189795134484e-05, "loss": 0.057, "step": 22871 }, { "epoch": 2.712202063322661, "grad_norm": 0.6765666765117524, "learning_rate": 1.2403116361819692e-05, "loss": 0.0996, "step": 22872 }, { "epoch": 2.712320645084786, "grad_norm": 0.3995658060435283, "learning_rate": 1.2401043044632719e-05, "loss": 0.0551, "step": 22873 }, { "epoch": 2.712439226846911, "grad_norm": 0.606911174856811, "learning_rate": 1.2398969843592675e-05, "loss": 0.0857, "step": 22874 }, { "epoch": 2.712557808609036, "grad_norm": 0.757569506779207, "learning_rate": 1.2396896758718682e-05, "loss": 0.0933, "step": 22875 }, { "epoch": 2.712676390371161, "grad_norm": 0.795232083441143, "learning_rate": 1.2394823790029852e-05, "loss": 0.1212, "step": 22876 }, { "epoch": 2.712794972133286, "grad_norm": 0.5590888744987811, "learning_rate": 1.2392750937545283e-05, "loss": 0.0783, "step": 22877 }, { "epoch": 2.712913553895411, "grad_norm": 0.5571002738970988, "learning_rate": 1.2390678201284086e-05, "loss": 0.0902, "step": 22878 }, { "epoch": 2.713032135657536, "grad_norm": 0.42121118529149043, "learning_rate": 1.2388605581265378e-05, "loss": 0.062, "step": 22879 }, { "epoch": 2.7131507174196607, "grad_norm": 1.2514692085427344, "learning_rate": 1.238653307750827e-05, "loss": 0.1403, "step": 22880 }, { "epoch": 2.713269299181786, "grad_norm": 0.44872690709384033, "learning_rate": 1.2384460690031838e-05, "loss": 0.0694, "step": 22881 }, { "epoch": 2.7133878809439107, "grad_norm": 0.7391091424429785, "learning_rate": 1.2382388418855228e-05, "loss": 0.11, "step": 22882 }, { "epoch": 2.713506462706036, "grad_norm": 0.5008868516462135, "learning_rate": 1.2380316263997516e-05, "loss": 0.0536, "step": 22883 }, { "epoch": 2.7136250444681607, "grad_norm": 0.7121144848834023, "learning_rate": 1.237824422547781e-05, "loss": 0.086, "step": 22884 }, { "epoch": 2.713743626230286, "grad_norm": 0.7763758695451651, "learning_rate": 1.2376172303315218e-05, "loss": 0.1057, "step": 22885 }, { "epoch": 2.7138622079924106, "grad_norm": 0.7939523250731322, "learning_rate": 1.2374100497528834e-05, "loss": 0.1044, "step": 22886 }, { "epoch": 2.713980789754536, "grad_norm": 0.5841660899277804, "learning_rate": 1.2372028808137773e-05, "loss": 0.0744, "step": 22887 }, { "epoch": 2.7140993715166606, "grad_norm": 0.7781785347689407, "learning_rate": 1.236995723516111e-05, "loss": 0.1092, "step": 22888 }, { "epoch": 2.714217953278786, "grad_norm": 0.7427718372259811, "learning_rate": 1.2367885778617952e-05, "loss": 0.1028, "step": 22889 }, { "epoch": 2.7143365350409105, "grad_norm": 0.5181631342207078, "learning_rate": 1.2365814438527398e-05, "loss": 0.0749, "step": 22890 }, { "epoch": 2.7144551168030358, "grad_norm": 0.642453691545869, "learning_rate": 1.2363743214908541e-05, "loss": 0.0919, "step": 22891 }, { "epoch": 2.714573698565161, "grad_norm": 0.7863588227433153, "learning_rate": 1.2361672107780486e-05, "loss": 0.1007, "step": 22892 }, { "epoch": 2.7146922803272857, "grad_norm": 0.6028335792968061, "learning_rate": 1.2359601117162297e-05, "loss": 0.0956, "step": 22893 }, { "epoch": 2.7148108620894105, "grad_norm": 0.5925335163666156, "learning_rate": 1.2357530243073104e-05, "loss": 0.0836, "step": 22894 }, { "epoch": 2.7149294438515357, "grad_norm": 0.7302883222805482, "learning_rate": 1.235545948553197e-05, "loss": 0.105, "step": 22895 }, { "epoch": 2.715048025613661, "grad_norm": 0.7888032928748052, "learning_rate": 1.2353388844557991e-05, "loss": 0.1105, "step": 22896 }, { "epoch": 2.7151666073757856, "grad_norm": 0.7068486638993073, "learning_rate": 1.2351318320170264e-05, "loss": 0.0915, "step": 22897 }, { "epoch": 2.7152851891379104, "grad_norm": 0.6591111343257902, "learning_rate": 1.2349247912387875e-05, "loss": 0.0952, "step": 22898 }, { "epoch": 2.7154037709000356, "grad_norm": 0.5914160611862005, "learning_rate": 1.23471776212299e-05, "loss": 0.0751, "step": 22899 }, { "epoch": 2.715522352662161, "grad_norm": 0.8146634746121066, "learning_rate": 1.2345107446715431e-05, "loss": 0.1119, "step": 22900 }, { "epoch": 2.7156409344242856, "grad_norm": 0.6507851599835665, "learning_rate": 1.2343037388863554e-05, "loss": 0.0872, "step": 22901 }, { "epoch": 2.7157595161864103, "grad_norm": 0.8532501043364852, "learning_rate": 1.234096744769335e-05, "loss": 0.1272, "step": 22902 }, { "epoch": 2.7158780979485355, "grad_norm": 0.7670833482097494, "learning_rate": 1.2338897623223913e-05, "loss": 0.0913, "step": 22903 }, { "epoch": 2.7159966797106607, "grad_norm": 0.6626462206022, "learning_rate": 1.2336827915474294e-05, "loss": 0.0951, "step": 22904 }, { "epoch": 2.7161152614727855, "grad_norm": 0.5553441865098555, "learning_rate": 1.2334758324463613e-05, "loss": 0.0683, "step": 22905 }, { "epoch": 2.7162338432349102, "grad_norm": 0.8137276025278782, "learning_rate": 1.2332688850210919e-05, "loss": 0.1137, "step": 22906 }, { "epoch": 2.7163524249970354, "grad_norm": 0.8935785305606706, "learning_rate": 1.2330619492735298e-05, "loss": 0.117, "step": 22907 }, { "epoch": 2.7164710067591606, "grad_norm": 0.5685596922071227, "learning_rate": 1.2328550252055832e-05, "loss": 0.0699, "step": 22908 }, { "epoch": 2.7165895885212854, "grad_norm": 0.6272957739245556, "learning_rate": 1.2326481128191602e-05, "loss": 0.0868, "step": 22909 }, { "epoch": 2.71670817028341, "grad_norm": 0.8150409959715319, "learning_rate": 1.2324412121161666e-05, "loss": 0.1297, "step": 22910 }, { "epoch": 2.7168267520455354, "grad_norm": 0.5964121092698188, "learning_rate": 1.2322343230985105e-05, "loss": 0.0893, "step": 22911 }, { "epoch": 2.7169453338076606, "grad_norm": 0.6549637088891821, "learning_rate": 1.2320274457680997e-05, "loss": 0.0797, "step": 22912 }, { "epoch": 2.7170639155697853, "grad_norm": 0.7739316640841268, "learning_rate": 1.2318205801268406e-05, "loss": 0.1061, "step": 22913 }, { "epoch": 2.71718249733191, "grad_norm": 0.6937517887758134, "learning_rate": 1.2316137261766417e-05, "loss": 0.0709, "step": 22914 }, { "epoch": 2.7173010790940353, "grad_norm": 0.6251265210023421, "learning_rate": 1.2314068839194068e-05, "loss": 0.0888, "step": 22915 }, { "epoch": 2.7174196608561605, "grad_norm": 0.5231777501634355, "learning_rate": 1.231200053357047e-05, "loss": 0.0507, "step": 22916 }, { "epoch": 2.7175382426182852, "grad_norm": 0.5576241221954318, "learning_rate": 1.2309932344914653e-05, "loss": 0.0782, "step": 22917 }, { "epoch": 2.7176568243804105, "grad_norm": 0.5789261546374365, "learning_rate": 1.2307864273245711e-05, "loss": 0.079, "step": 22918 }, { "epoch": 2.717775406142535, "grad_norm": 0.7551630447256571, "learning_rate": 1.230579631858268e-05, "loss": 0.0999, "step": 22919 }, { "epoch": 2.7178939879046604, "grad_norm": 0.5544152847895917, "learning_rate": 1.2303728480944654e-05, "loss": 0.0813, "step": 22920 }, { "epoch": 2.718012569666785, "grad_norm": 0.6496513763096124, "learning_rate": 1.2301660760350675e-05, "loss": 0.0784, "step": 22921 }, { "epoch": 2.7181311514289104, "grad_norm": 0.9357046161191285, "learning_rate": 1.2299593156819813e-05, "loss": 0.139, "step": 22922 }, { "epoch": 2.718249733191035, "grad_norm": 0.5730038134998272, "learning_rate": 1.2297525670371126e-05, "loss": 0.073, "step": 22923 }, { "epoch": 2.7183683149531603, "grad_norm": 0.9311479146732368, "learning_rate": 1.2295458301023678e-05, "loss": 0.1219, "step": 22924 }, { "epoch": 2.718486896715285, "grad_norm": 0.5698268273328686, "learning_rate": 1.2293391048796532e-05, "loss": 0.0799, "step": 22925 }, { "epoch": 2.7186054784774103, "grad_norm": 0.4898443704107063, "learning_rate": 1.2291323913708721e-05, "loss": 0.0605, "step": 22926 }, { "epoch": 2.718724060239535, "grad_norm": 0.6742819900756006, "learning_rate": 1.2289256895779338e-05, "loss": 0.0769, "step": 22927 }, { "epoch": 2.7188426420016603, "grad_norm": 0.589151909344817, "learning_rate": 1.228718999502741e-05, "loss": 0.0905, "step": 22928 }, { "epoch": 2.718961223763785, "grad_norm": 1.051182786152531, "learning_rate": 1.228512321147201e-05, "loss": 0.1307, "step": 22929 }, { "epoch": 2.71907980552591, "grad_norm": 0.6474150770960652, "learning_rate": 1.2283056545132162e-05, "loss": 0.09, "step": 22930 }, { "epoch": 2.719198387288035, "grad_norm": 0.7042881382502459, "learning_rate": 1.2280989996026959e-05, "loss": 0.102, "step": 22931 }, { "epoch": 2.71931696905016, "grad_norm": 0.6365743310249772, "learning_rate": 1.227892356417542e-05, "loss": 0.0761, "step": 22932 }, { "epoch": 2.719435550812285, "grad_norm": 0.6095895588828529, "learning_rate": 1.227685724959661e-05, "loss": 0.0727, "step": 22933 }, { "epoch": 2.71955413257441, "grad_norm": 0.6404726307090183, "learning_rate": 1.227479105230957e-05, "loss": 0.0903, "step": 22934 }, { "epoch": 2.719672714336535, "grad_norm": 0.8766688161337733, "learning_rate": 1.2272724972333351e-05, "loss": 0.1041, "step": 22935 }, { "epoch": 2.71979129609866, "grad_norm": 0.7115574059841383, "learning_rate": 1.2270659009687013e-05, "loss": 0.0786, "step": 22936 }, { "epoch": 2.719909877860785, "grad_norm": 0.7211259117694715, "learning_rate": 1.226859316438957e-05, "loss": 0.0658, "step": 22937 }, { "epoch": 2.72002845962291, "grad_norm": 0.555475925214798, "learning_rate": 1.2266527436460103e-05, "loss": 0.0914, "step": 22938 }, { "epoch": 2.720147041385035, "grad_norm": 0.8727343575824346, "learning_rate": 1.2264461825917631e-05, "loss": 0.1227, "step": 22939 }, { "epoch": 2.72026562314716, "grad_norm": 0.8608031661201022, "learning_rate": 1.22623963327812e-05, "loss": 0.1124, "step": 22940 }, { "epoch": 2.720384204909285, "grad_norm": 0.5814307430265034, "learning_rate": 1.2260330957069858e-05, "loss": 0.0757, "step": 22941 }, { "epoch": 2.72050278667141, "grad_norm": 0.8749405716247962, "learning_rate": 1.2258265698802642e-05, "loss": 0.1058, "step": 22942 }, { "epoch": 2.7206213684335347, "grad_norm": 0.7214975260385246, "learning_rate": 1.2256200557998602e-05, "loss": 0.0897, "step": 22943 }, { "epoch": 2.72073995019566, "grad_norm": 0.7801033000360772, "learning_rate": 1.2254135534676754e-05, "loss": 0.0994, "step": 22944 }, { "epoch": 2.720858531957785, "grad_norm": 0.8147078009297245, "learning_rate": 1.2252070628856144e-05, "loss": 0.085, "step": 22945 }, { "epoch": 2.72097711371991, "grad_norm": 0.5090636442528014, "learning_rate": 1.2250005840555812e-05, "loss": 0.0746, "step": 22946 }, { "epoch": 2.7210956954820347, "grad_norm": 0.5462060154449243, "learning_rate": 1.2247941169794802e-05, "loss": 0.068, "step": 22947 }, { "epoch": 2.72121427724416, "grad_norm": 0.9559319204735842, "learning_rate": 1.2245876616592125e-05, "loss": 0.1005, "step": 22948 }, { "epoch": 2.721332859006285, "grad_norm": 0.5983504109964133, "learning_rate": 1.224381218096683e-05, "loss": 0.0742, "step": 22949 }, { "epoch": 2.72145144076841, "grad_norm": 0.48325227330931014, "learning_rate": 1.2241747862937938e-05, "loss": 0.0586, "step": 22950 }, { "epoch": 2.7215700225305346, "grad_norm": 0.473241154412509, "learning_rate": 1.223968366252449e-05, "loss": 0.0535, "step": 22951 }, { "epoch": 2.72168860429266, "grad_norm": 0.7958948518400485, "learning_rate": 1.2237619579745507e-05, "loss": 0.1135, "step": 22952 }, { "epoch": 2.721807186054785, "grad_norm": 0.6465389206606225, "learning_rate": 1.2235555614620026e-05, "loss": 0.0752, "step": 22953 }, { "epoch": 2.7219257678169098, "grad_norm": 0.8227220919767011, "learning_rate": 1.2233491767167077e-05, "loss": 0.1148, "step": 22954 }, { "epoch": 2.7220443495790345, "grad_norm": 0.7601743433837387, "learning_rate": 1.2231428037405667e-05, "loss": 0.1097, "step": 22955 }, { "epoch": 2.7221629313411597, "grad_norm": 0.5132660428422361, "learning_rate": 1.2229364425354833e-05, "loss": 0.0664, "step": 22956 }, { "epoch": 2.722281513103285, "grad_norm": 0.5713599142219753, "learning_rate": 1.22273009310336e-05, "loss": 0.0725, "step": 22957 }, { "epoch": 2.7224000948654097, "grad_norm": 0.6711265567764158, "learning_rate": 1.2225237554461e-05, "loss": 0.1056, "step": 22958 }, { "epoch": 2.7225186766275344, "grad_norm": 0.5295158168166484, "learning_rate": 1.2223174295656034e-05, "loss": 0.0854, "step": 22959 }, { "epoch": 2.7226372583896596, "grad_norm": 0.8623088183931276, "learning_rate": 1.2221111154637732e-05, "loss": 0.0954, "step": 22960 }, { "epoch": 2.722755840151785, "grad_norm": 0.6591783976960001, "learning_rate": 1.2219048131425114e-05, "loss": 0.084, "step": 22961 }, { "epoch": 2.7228744219139096, "grad_norm": 0.6213267491700256, "learning_rate": 1.2216985226037204e-05, "loss": 0.0875, "step": 22962 }, { "epoch": 2.7229930036760344, "grad_norm": 0.7077599907359853, "learning_rate": 1.2214922438493018e-05, "loss": 0.1052, "step": 22963 }, { "epoch": 2.7231115854381596, "grad_norm": 0.6929788518300855, "learning_rate": 1.2212859768811553e-05, "loss": 0.0846, "step": 22964 }, { "epoch": 2.7232301672002848, "grad_norm": 0.6059340304424424, "learning_rate": 1.221079721701186e-05, "loss": 0.0731, "step": 22965 }, { "epoch": 2.7233487489624095, "grad_norm": 0.7268879586979806, "learning_rate": 1.220873478311292e-05, "loss": 0.0855, "step": 22966 }, { "epoch": 2.7234673307245347, "grad_norm": 0.8298752264632223, "learning_rate": 1.2206672467133764e-05, "loss": 0.1296, "step": 22967 }, { "epoch": 2.7235859124866595, "grad_norm": 0.6129033833567423, "learning_rate": 1.2204610269093398e-05, "loss": 0.0868, "step": 22968 }, { "epoch": 2.7237044942487847, "grad_norm": 0.5479818366575606, "learning_rate": 1.2202548189010845e-05, "loss": 0.0962, "step": 22969 }, { "epoch": 2.7238230760109094, "grad_norm": 0.6745242272804527, "learning_rate": 1.2200486226905095e-05, "loss": 0.0949, "step": 22970 }, { "epoch": 2.7239416577730347, "grad_norm": 1.0468308692588777, "learning_rate": 1.2198424382795165e-05, "loss": 0.1243, "step": 22971 }, { "epoch": 2.7240602395351594, "grad_norm": 0.5038063669118098, "learning_rate": 1.2196362656700063e-05, "loss": 0.0639, "step": 22972 }, { "epoch": 2.7241788212972846, "grad_norm": 0.9219349432508519, "learning_rate": 1.2194301048638797e-05, "loss": 0.1328, "step": 22973 }, { "epoch": 2.7242974030594094, "grad_norm": 0.47930546309200295, "learning_rate": 1.2192239558630384e-05, "loss": 0.0715, "step": 22974 }, { "epoch": 2.7244159848215346, "grad_norm": 0.6044382576946699, "learning_rate": 1.2190178186693795e-05, "loss": 0.0853, "step": 22975 }, { "epoch": 2.7245345665836593, "grad_norm": 0.7549361905507401, "learning_rate": 1.2188116932848073e-05, "loss": 0.0935, "step": 22976 }, { "epoch": 2.7246531483457845, "grad_norm": 0.9508866153140477, "learning_rate": 1.2186055797112195e-05, "loss": 0.1139, "step": 22977 }, { "epoch": 2.7247717301079093, "grad_norm": 0.6026728214768886, "learning_rate": 1.2183994779505167e-05, "loss": 0.0846, "step": 22978 }, { "epoch": 2.7248903118700345, "grad_norm": 0.6876996675529145, "learning_rate": 1.2181933880045993e-05, "loss": 0.1101, "step": 22979 }, { "epoch": 2.7250088936321593, "grad_norm": 0.6821289071574927, "learning_rate": 1.2179873098753677e-05, "loss": 0.0821, "step": 22980 }, { "epoch": 2.7251274753942845, "grad_norm": 0.6347486877983024, "learning_rate": 1.2177812435647202e-05, "loss": 0.0781, "step": 22981 }, { "epoch": 2.725246057156409, "grad_norm": 0.4456925117931907, "learning_rate": 1.217575189074557e-05, "loss": 0.0748, "step": 22982 }, { "epoch": 2.7253646389185344, "grad_norm": 0.6404976014436174, "learning_rate": 1.2173691464067785e-05, "loss": 0.075, "step": 22983 }, { "epoch": 2.725483220680659, "grad_norm": 0.36819921769251374, "learning_rate": 1.2171631155632831e-05, "loss": 0.057, "step": 22984 }, { "epoch": 2.7256018024427844, "grad_norm": 0.478207688459665, "learning_rate": 1.216957096545972e-05, "loss": 0.071, "step": 22985 }, { "epoch": 2.725720384204909, "grad_norm": 0.6696619256353288, "learning_rate": 1.2167510893567408e-05, "loss": 0.0992, "step": 22986 }, { "epoch": 2.7258389659670343, "grad_norm": 0.5475511365655622, "learning_rate": 1.2165450939974932e-05, "loss": 0.075, "step": 22987 }, { "epoch": 2.725957547729159, "grad_norm": 0.6321117164572155, "learning_rate": 1.2163391104701246e-05, "loss": 0.0748, "step": 22988 }, { "epoch": 2.7260761294912843, "grad_norm": 0.5534564881299621, "learning_rate": 1.2161331387765357e-05, "loss": 0.0827, "step": 22989 }, { "epoch": 2.726194711253409, "grad_norm": 0.747047164302015, "learning_rate": 1.2159271789186246e-05, "loss": 0.1026, "step": 22990 }, { "epoch": 2.7263132930155343, "grad_norm": 1.1855773213314587, "learning_rate": 1.2157212308982905e-05, "loss": 0.1041, "step": 22991 }, { "epoch": 2.726431874777659, "grad_norm": 0.8821099941717395, "learning_rate": 1.2155152947174326e-05, "loss": 0.1179, "step": 22992 }, { "epoch": 2.7265504565397842, "grad_norm": 0.6404524431331026, "learning_rate": 1.2153093703779464e-05, "loss": 0.0892, "step": 22993 }, { "epoch": 2.7266690383019094, "grad_norm": 0.5864135795204584, "learning_rate": 1.2151034578817346e-05, "loss": 0.0636, "step": 22994 }, { "epoch": 2.726787620064034, "grad_norm": 0.6296097805809004, "learning_rate": 1.2148975572306923e-05, "loss": 0.0759, "step": 22995 }, { "epoch": 2.726906201826159, "grad_norm": 0.6341058600515478, "learning_rate": 1.2146916684267195e-05, "loss": 0.0882, "step": 22996 }, { "epoch": 2.727024783588284, "grad_norm": 0.7592245410810152, "learning_rate": 1.2144857914717118e-05, "loss": 0.1002, "step": 22997 }, { "epoch": 2.7271433653504094, "grad_norm": 0.8660526213212911, "learning_rate": 1.21427992636757e-05, "loss": 0.1143, "step": 22998 }, { "epoch": 2.727261947112534, "grad_norm": 0.5230701864853842, "learning_rate": 1.2140740731161901e-05, "loss": 0.0553, "step": 22999 }, { "epoch": 2.727380528874659, "grad_norm": 0.6310807056751019, "learning_rate": 1.2138682317194703e-05, "loss": 0.0941, "step": 23000 }, { "epoch": 2.727499110636784, "grad_norm": 0.566106268524832, "learning_rate": 1.2136624021793081e-05, "loss": 0.0848, "step": 23001 }, { "epoch": 2.7276176923989093, "grad_norm": 0.48327421856590974, "learning_rate": 1.2134565844976012e-05, "loss": 0.0651, "step": 23002 }, { "epoch": 2.727736274161034, "grad_norm": 0.6914048958118376, "learning_rate": 1.2132507786762479e-05, "loss": 0.0666, "step": 23003 }, { "epoch": 2.727854855923159, "grad_norm": 0.7063237220247371, "learning_rate": 1.2130449847171434e-05, "loss": 0.0849, "step": 23004 }, { "epoch": 2.727973437685284, "grad_norm": 0.49816492366509024, "learning_rate": 1.2128392026221864e-05, "loss": 0.0757, "step": 23005 }, { "epoch": 2.728092019447409, "grad_norm": 0.7099997615020643, "learning_rate": 1.212633432393273e-05, "loss": 0.0942, "step": 23006 }, { "epoch": 2.728210601209534, "grad_norm": 0.437460881815232, "learning_rate": 1.2124276740323018e-05, "loss": 0.0554, "step": 23007 }, { "epoch": 2.7283291829716587, "grad_norm": 0.6665224268613121, "learning_rate": 1.2122219275411668e-05, "loss": 0.0948, "step": 23008 }, { "epoch": 2.728447764733784, "grad_norm": 0.5990805378530335, "learning_rate": 1.2120161929217682e-05, "loss": 0.0766, "step": 23009 }, { "epoch": 2.728566346495909, "grad_norm": 0.8142979052419979, "learning_rate": 1.211810470176e-05, "loss": 0.0972, "step": 23010 }, { "epoch": 2.728684928258034, "grad_norm": 0.5632124498516123, "learning_rate": 1.2116047593057595e-05, "loss": 0.0718, "step": 23011 }, { "epoch": 2.7288035100201586, "grad_norm": 0.6851194088061068, "learning_rate": 1.2113990603129433e-05, "loss": 0.0889, "step": 23012 }, { "epoch": 2.728922091782284, "grad_norm": 0.7011190813113061, "learning_rate": 1.2111933731994474e-05, "loss": 0.0831, "step": 23013 }, { "epoch": 2.729040673544409, "grad_norm": 0.5905789315041149, "learning_rate": 1.2109876979671692e-05, "loss": 0.0795, "step": 23014 }, { "epoch": 2.729159255306534, "grad_norm": 0.5630293888629859, "learning_rate": 1.2107820346180029e-05, "loss": 0.0572, "step": 23015 }, { "epoch": 2.7292778370686586, "grad_norm": 0.793781195176863, "learning_rate": 1.2105763831538452e-05, "loss": 0.1024, "step": 23016 }, { "epoch": 2.7293964188307838, "grad_norm": 0.5686305606129101, "learning_rate": 1.210370743576592e-05, "loss": 0.0743, "step": 23017 }, { "epoch": 2.729515000592909, "grad_norm": 0.773305812413292, "learning_rate": 1.2101651158881403e-05, "loss": 0.109, "step": 23018 }, { "epoch": 2.7296335823550337, "grad_norm": 0.603966970166552, "learning_rate": 1.2099595000903831e-05, "loss": 0.0788, "step": 23019 }, { "epoch": 2.729752164117159, "grad_norm": 0.5430200385638231, "learning_rate": 1.2097538961852178e-05, "loss": 0.0781, "step": 23020 }, { "epoch": 2.7298707458792837, "grad_norm": 0.7850105371320054, "learning_rate": 1.2095483041745392e-05, "loss": 0.0914, "step": 23021 }, { "epoch": 2.729989327641409, "grad_norm": 0.606624188160079, "learning_rate": 1.2093427240602428e-05, "loss": 0.0803, "step": 23022 }, { "epoch": 2.7301079094035337, "grad_norm": 0.558199476300812, "learning_rate": 1.2091371558442239e-05, "loss": 0.0724, "step": 23023 }, { "epoch": 2.730226491165659, "grad_norm": 0.8903959528100039, "learning_rate": 1.208931599528377e-05, "loss": 0.0997, "step": 23024 }, { "epoch": 2.7303450729277836, "grad_norm": 0.9391698108208114, "learning_rate": 1.208726055114599e-05, "loss": 0.1003, "step": 23025 }, { "epoch": 2.730463654689909, "grad_norm": 0.666286808384473, "learning_rate": 1.2085205226047821e-05, "loss": 0.1058, "step": 23026 }, { "epoch": 2.7305822364520336, "grad_norm": 0.6966513590385467, "learning_rate": 1.2083150020008224e-05, "loss": 0.0848, "step": 23027 }, { "epoch": 2.7307008182141588, "grad_norm": 0.7123579547412119, "learning_rate": 1.2081094933046142e-05, "loss": 0.089, "step": 23028 }, { "epoch": 2.7308193999762835, "grad_norm": 0.9696084299500916, "learning_rate": 1.2079039965180532e-05, "loss": 0.1078, "step": 23029 }, { "epoch": 2.7309379817384087, "grad_norm": 0.599851910423277, "learning_rate": 1.2076985116430322e-05, "loss": 0.061, "step": 23030 }, { "epoch": 2.7310565635005335, "grad_norm": 0.621765409942884, "learning_rate": 1.2074930386814462e-05, "loss": 0.0795, "step": 23031 }, { "epoch": 2.7311751452626587, "grad_norm": 0.6423753605194666, "learning_rate": 1.207287577635189e-05, "loss": 0.0872, "step": 23032 }, { "epoch": 2.7312937270247835, "grad_norm": 0.9276133822050114, "learning_rate": 1.2070821285061556e-05, "loss": 0.1205, "step": 23033 }, { "epoch": 2.7314123087869087, "grad_norm": 0.8250449166814429, "learning_rate": 1.2068766912962403e-05, "loss": 0.1237, "step": 23034 }, { "epoch": 2.7315308905490334, "grad_norm": 0.8117054239868996, "learning_rate": 1.2066712660073342e-05, "loss": 0.0883, "step": 23035 }, { "epoch": 2.7316494723111586, "grad_norm": 0.6038419496388142, "learning_rate": 1.206465852641335e-05, "loss": 0.0944, "step": 23036 }, { "epoch": 2.7317680540732834, "grad_norm": 0.7233984034982177, "learning_rate": 1.2062604512001333e-05, "loss": 0.0891, "step": 23037 }, { "epoch": 2.7318866358354086, "grad_norm": 0.7061106040435764, "learning_rate": 1.2060550616856239e-05, "loss": 0.1057, "step": 23038 }, { "epoch": 2.7320052175975333, "grad_norm": 0.7410352113852099, "learning_rate": 1.2058496840997e-05, "loss": 0.1081, "step": 23039 }, { "epoch": 2.7321237993596585, "grad_norm": 1.1220993536532746, "learning_rate": 1.2056443184442562e-05, "loss": 0.1135, "step": 23040 }, { "epoch": 2.7322423811217833, "grad_norm": 0.800671894653105, "learning_rate": 1.2054389647211833e-05, "loss": 0.0977, "step": 23041 }, { "epoch": 2.7323609628839085, "grad_norm": 0.6906868146628204, "learning_rate": 1.2052336229323757e-05, "loss": 0.0974, "step": 23042 }, { "epoch": 2.7324795446460337, "grad_norm": 0.46752021440072034, "learning_rate": 1.2050282930797263e-05, "loss": 0.0546, "step": 23043 }, { "epoch": 2.7325981264081585, "grad_norm": 0.49228973651040475, "learning_rate": 1.2048229751651283e-05, "loss": 0.082, "step": 23044 }, { "epoch": 2.7327167081702832, "grad_norm": 0.6415345423724852, "learning_rate": 1.204617669190475e-05, "loss": 0.0821, "step": 23045 }, { "epoch": 2.7328352899324084, "grad_norm": 0.8040138238428041, "learning_rate": 1.2044123751576564e-05, "loss": 0.1142, "step": 23046 }, { "epoch": 2.7329538716945336, "grad_norm": 0.8646681610341688, "learning_rate": 1.204207093068569e-05, "loss": 0.1028, "step": 23047 }, { "epoch": 2.7330724534566584, "grad_norm": 0.7452872888712717, "learning_rate": 1.2040018229251018e-05, "loss": 0.0917, "step": 23048 }, { "epoch": 2.733191035218783, "grad_norm": 0.4911370938882959, "learning_rate": 1.2037965647291489e-05, "loss": 0.0525, "step": 23049 }, { "epoch": 2.7333096169809084, "grad_norm": 0.4734322563755914, "learning_rate": 1.203591318482602e-05, "loss": 0.0687, "step": 23050 }, { "epoch": 2.7334281987430336, "grad_norm": 0.8037009097054694, "learning_rate": 1.2033860841873536e-05, "loss": 0.1168, "step": 23051 }, { "epoch": 2.7335467805051583, "grad_norm": 0.7116718912830884, "learning_rate": 1.2031808618452964e-05, "loss": 0.0965, "step": 23052 }, { "epoch": 2.733665362267283, "grad_norm": 0.6802849407263313, "learning_rate": 1.2029756514583192e-05, "loss": 0.0779, "step": 23053 }, { "epoch": 2.7337839440294083, "grad_norm": 0.6929793486052311, "learning_rate": 1.2027704530283182e-05, "loss": 0.0879, "step": 23054 }, { "epoch": 2.7339025257915335, "grad_norm": 0.6863774602146308, "learning_rate": 1.2025652665571818e-05, "loss": 0.0865, "step": 23055 }, { "epoch": 2.7340211075536582, "grad_norm": 0.5342201968684808, "learning_rate": 1.2023600920468033e-05, "loss": 0.062, "step": 23056 }, { "epoch": 2.734139689315783, "grad_norm": 0.5743407078724405, "learning_rate": 1.2021549294990719e-05, "loss": 0.0716, "step": 23057 }, { "epoch": 2.734258271077908, "grad_norm": 0.42395862756474906, "learning_rate": 1.2019497789158826e-05, "loss": 0.0604, "step": 23058 }, { "epoch": 2.7343768528400334, "grad_norm": 0.48382394584177574, "learning_rate": 1.2017446402991234e-05, "loss": 0.0559, "step": 23059 }, { "epoch": 2.734495434602158, "grad_norm": 0.9854129165798985, "learning_rate": 1.2015395136506866e-05, "loss": 0.1044, "step": 23060 }, { "epoch": 2.734614016364283, "grad_norm": 0.6087149266921947, "learning_rate": 1.2013343989724629e-05, "loss": 0.0823, "step": 23061 }, { "epoch": 2.734732598126408, "grad_norm": 1.0226208223974493, "learning_rate": 1.201129296266344e-05, "loss": 0.1123, "step": 23062 }, { "epoch": 2.7348511798885333, "grad_norm": 0.6840903715628531, "learning_rate": 1.2009242055342209e-05, "loss": 0.0913, "step": 23063 }, { "epoch": 2.734969761650658, "grad_norm": 0.7285194041642314, "learning_rate": 1.2007191267779816e-05, "loss": 0.0959, "step": 23064 }, { "epoch": 2.735088343412783, "grad_norm": 0.48133704334331157, "learning_rate": 1.2005140599995208e-05, "loss": 0.066, "step": 23065 }, { "epoch": 2.735206925174908, "grad_norm": 0.5202112242778785, "learning_rate": 1.2003090052007257e-05, "loss": 0.0681, "step": 23066 }, { "epoch": 2.7353255069370332, "grad_norm": 0.8080572442892797, "learning_rate": 1.200103962383489e-05, "loss": 0.1201, "step": 23067 }, { "epoch": 2.735444088699158, "grad_norm": 0.9905204382721391, "learning_rate": 1.1998989315496976e-05, "loss": 0.125, "step": 23068 }, { "epoch": 2.735562670461283, "grad_norm": 0.7716289025041229, "learning_rate": 1.1996939127012457e-05, "loss": 0.0973, "step": 23069 }, { "epoch": 2.735681252223408, "grad_norm": 0.6124046805506348, "learning_rate": 1.1994889058400205e-05, "loss": 0.0808, "step": 23070 }, { "epoch": 2.735799833985533, "grad_norm": 0.7977043548730671, "learning_rate": 1.1992839109679127e-05, "loss": 0.0994, "step": 23071 }, { "epoch": 2.735918415747658, "grad_norm": 0.7910628357926021, "learning_rate": 1.1990789280868123e-05, "loss": 0.0894, "step": 23072 }, { "epoch": 2.736036997509783, "grad_norm": 0.5127537355202552, "learning_rate": 1.1988739571986088e-05, "loss": 0.0634, "step": 23073 }, { "epoch": 2.736155579271908, "grad_norm": 0.48736373436842617, "learning_rate": 1.1986689983051927e-05, "loss": 0.0545, "step": 23074 }, { "epoch": 2.736274161034033, "grad_norm": 0.8247651361643328, "learning_rate": 1.198464051408452e-05, "loss": 0.1021, "step": 23075 }, { "epoch": 2.736392742796158, "grad_norm": 0.5244339755518762, "learning_rate": 1.1982591165102764e-05, "loss": 0.0821, "step": 23076 }, { "epoch": 2.736511324558283, "grad_norm": 0.660831383078059, "learning_rate": 1.1980541936125555e-05, "loss": 0.1009, "step": 23077 }, { "epoch": 2.736629906320408, "grad_norm": 0.9934536527998663, "learning_rate": 1.1978492827171794e-05, "loss": 0.1238, "step": 23078 }, { "epoch": 2.736748488082533, "grad_norm": 0.5950771479451896, "learning_rate": 1.1976443838260343e-05, "loss": 0.0927, "step": 23079 }, { "epoch": 2.7368670698446578, "grad_norm": 0.8181555584629641, "learning_rate": 1.1974394969410127e-05, "loss": 0.0961, "step": 23080 }, { "epoch": 2.736985651606783, "grad_norm": 0.5690534119184455, "learning_rate": 1.197234622064001e-05, "loss": 0.0757, "step": 23081 }, { "epoch": 2.7371042333689077, "grad_norm": 0.7247676031073997, "learning_rate": 1.1970297591968882e-05, "loss": 0.0967, "step": 23082 }, { "epoch": 2.737222815131033, "grad_norm": 0.9502016901087762, "learning_rate": 1.1968249083415631e-05, "loss": 0.1096, "step": 23083 }, { "epoch": 2.7373413968931577, "grad_norm": 0.744277339112612, "learning_rate": 1.1966200694999147e-05, "loss": 0.0847, "step": 23084 }, { "epoch": 2.737459978655283, "grad_norm": 0.515226601155056, "learning_rate": 1.1964152426738314e-05, "loss": 0.0693, "step": 23085 }, { "epoch": 2.7375785604174077, "grad_norm": 0.7051696137683684, "learning_rate": 1.1962104278652003e-05, "loss": 0.0747, "step": 23086 }, { "epoch": 2.737697142179533, "grad_norm": 0.8923779953863118, "learning_rate": 1.1960056250759105e-05, "loss": 0.1254, "step": 23087 }, { "epoch": 2.7378157239416576, "grad_norm": 0.7258364245242953, "learning_rate": 1.1958008343078495e-05, "loss": 0.0997, "step": 23088 }, { "epoch": 2.737934305703783, "grad_norm": 0.6471806930467116, "learning_rate": 1.1955960555629064e-05, "loss": 0.0757, "step": 23089 }, { "epoch": 2.7380528874659076, "grad_norm": 0.5663824680111791, "learning_rate": 1.1953912888429672e-05, "loss": 0.0782, "step": 23090 }, { "epoch": 2.738171469228033, "grad_norm": 0.8869348306532913, "learning_rate": 1.1951865341499204e-05, "loss": 0.1372, "step": 23091 }, { "epoch": 2.738290050990158, "grad_norm": 0.8174850902507066, "learning_rate": 1.1949817914856539e-05, "loss": 0.1, "step": 23092 }, { "epoch": 2.7384086327522827, "grad_norm": 0.5503972159664063, "learning_rate": 1.194777060852055e-05, "loss": 0.0657, "step": 23093 }, { "epoch": 2.7385272145144075, "grad_norm": 0.6758769634335257, "learning_rate": 1.1945723422510108e-05, "loss": 0.0886, "step": 23094 }, { "epoch": 2.7386457962765327, "grad_norm": 0.891715021946282, "learning_rate": 1.1943676356844088e-05, "loss": 0.1325, "step": 23095 }, { "epoch": 2.738764378038658, "grad_norm": 0.5254079648953677, "learning_rate": 1.1941629411541371e-05, "loss": 0.0595, "step": 23096 }, { "epoch": 2.7388829598007827, "grad_norm": 0.7877805937817046, "learning_rate": 1.193958258662081e-05, "loss": 0.1152, "step": 23097 }, { "epoch": 2.7390015415629074, "grad_norm": 0.4346954446194328, "learning_rate": 1.1937535882101281e-05, "loss": 0.0559, "step": 23098 }, { "epoch": 2.7391201233250326, "grad_norm": 0.7460255317026866, "learning_rate": 1.193548929800165e-05, "loss": 0.0803, "step": 23099 }, { "epoch": 2.739238705087158, "grad_norm": 0.5830992596601138, "learning_rate": 1.1933442834340792e-05, "loss": 0.0776, "step": 23100 }, { "epoch": 2.7393572868492826, "grad_norm": 0.739242161601661, "learning_rate": 1.1931396491137575e-05, "loss": 0.0871, "step": 23101 }, { "epoch": 2.7394758686114073, "grad_norm": 0.6334974330719747, "learning_rate": 1.1929350268410836e-05, "loss": 0.0752, "step": 23102 }, { "epoch": 2.7395944503735326, "grad_norm": 0.7683917073286749, "learning_rate": 1.1927304166179478e-05, "loss": 0.1055, "step": 23103 }, { "epoch": 2.7397130321356578, "grad_norm": 1.2435054073679588, "learning_rate": 1.1925258184462337e-05, "loss": 0.1359, "step": 23104 }, { "epoch": 2.7398316138977825, "grad_norm": 0.7257584780848799, "learning_rate": 1.1923212323278294e-05, "loss": 0.0822, "step": 23105 }, { "epoch": 2.7399501956599073, "grad_norm": 0.6298042578372426, "learning_rate": 1.1921166582646176e-05, "loss": 0.0823, "step": 23106 }, { "epoch": 2.7400687774220325, "grad_norm": 1.1187060070318102, "learning_rate": 1.1919120962584881e-05, "loss": 0.1402, "step": 23107 }, { "epoch": 2.7401873591841577, "grad_norm": 0.5684241901149921, "learning_rate": 1.1917075463113243e-05, "loss": 0.0704, "step": 23108 }, { "epoch": 2.7403059409462824, "grad_norm": 1.0314117372453524, "learning_rate": 1.1915030084250126e-05, "loss": 0.1439, "step": 23109 }, { "epoch": 2.740424522708407, "grad_norm": 1.0292443061038474, "learning_rate": 1.1912984826014384e-05, "loss": 0.126, "step": 23110 }, { "epoch": 2.7405431044705324, "grad_norm": 0.6915565714184718, "learning_rate": 1.1910939688424874e-05, "loss": 0.0972, "step": 23111 }, { "epoch": 2.7406616862326576, "grad_norm": 0.543985283469277, "learning_rate": 1.1908894671500459e-05, "loss": 0.0897, "step": 23112 }, { "epoch": 2.7407802679947824, "grad_norm": 0.4910810802485434, "learning_rate": 1.1906849775259963e-05, "loss": 0.0728, "step": 23113 }, { "epoch": 2.740898849756907, "grad_norm": 0.4819163360907256, "learning_rate": 1.1904804999722275e-05, "loss": 0.0606, "step": 23114 }, { "epoch": 2.7410174315190323, "grad_norm": 0.6985967397273753, "learning_rate": 1.1902760344906217e-05, "loss": 0.1188, "step": 23115 }, { "epoch": 2.7411360132811575, "grad_norm": 0.6328884678863625, "learning_rate": 1.1900715810830656e-05, "loss": 0.0741, "step": 23116 }, { "epoch": 2.7412545950432823, "grad_norm": 0.5386810087423353, "learning_rate": 1.1898671397514416e-05, "loss": 0.0793, "step": 23117 }, { "epoch": 2.7413731768054075, "grad_norm": 0.5980652631625646, "learning_rate": 1.1896627104976377e-05, "loss": 0.0736, "step": 23118 }, { "epoch": 2.7414917585675322, "grad_norm": 0.4614697445183384, "learning_rate": 1.1894582933235357e-05, "loss": 0.0643, "step": 23119 }, { "epoch": 2.7416103403296574, "grad_norm": 0.7200859552413702, "learning_rate": 1.1892538882310213e-05, "loss": 0.0766, "step": 23120 }, { "epoch": 2.741728922091782, "grad_norm": 0.6907502118815246, "learning_rate": 1.1890494952219788e-05, "loss": 0.114, "step": 23121 }, { "epoch": 2.7418475038539074, "grad_norm": 0.865598098409033, "learning_rate": 1.188845114298292e-05, "loss": 0.1391, "step": 23122 }, { "epoch": 2.741966085616032, "grad_norm": 0.7194538319221677, "learning_rate": 1.1886407454618467e-05, "loss": 0.1162, "step": 23123 }, { "epoch": 2.7420846673781574, "grad_norm": 0.7105125018141333, "learning_rate": 1.1884363887145236e-05, "loss": 0.093, "step": 23124 }, { "epoch": 2.742203249140282, "grad_norm": 0.6800941052590278, "learning_rate": 1.1882320440582107e-05, "loss": 0.0882, "step": 23125 }, { "epoch": 2.7423218309024073, "grad_norm": 0.46367496179140644, "learning_rate": 1.1880277114947886e-05, "loss": 0.0612, "step": 23126 }, { "epoch": 2.742440412664532, "grad_norm": 0.7881347228507966, "learning_rate": 1.187823391026143e-05, "loss": 0.0977, "step": 23127 }, { "epoch": 2.7425589944266573, "grad_norm": 0.6917065781051245, "learning_rate": 1.187619082654155e-05, "loss": 0.0828, "step": 23128 }, { "epoch": 2.742677576188782, "grad_norm": 0.6608229653215935, "learning_rate": 1.1874147863807114e-05, "loss": 0.0895, "step": 23129 }, { "epoch": 2.7427961579509073, "grad_norm": 1.2206853768861883, "learning_rate": 1.1872105022076933e-05, "loss": 0.1279, "step": 23130 }, { "epoch": 2.742914739713032, "grad_norm": 0.6197965758551146, "learning_rate": 1.1870062301369841e-05, "loss": 0.0783, "step": 23131 }, { "epoch": 2.743033321475157, "grad_norm": 0.7143669259807663, "learning_rate": 1.1868019701704675e-05, "loss": 0.0932, "step": 23132 }, { "epoch": 2.743151903237282, "grad_norm": 0.7399615047909145, "learning_rate": 1.1865977223100264e-05, "loss": 0.108, "step": 23133 }, { "epoch": 2.743270484999407, "grad_norm": 0.8559245154917009, "learning_rate": 1.1863934865575447e-05, "loss": 0.1033, "step": 23134 }, { "epoch": 2.743389066761532, "grad_norm": 0.572563723591045, "learning_rate": 1.1861892629149033e-05, "loss": 0.0672, "step": 23135 }, { "epoch": 2.743507648523657, "grad_norm": 0.5574742855253119, "learning_rate": 1.1859850513839857e-05, "loss": 0.0769, "step": 23136 }, { "epoch": 2.743626230285782, "grad_norm": 0.5932449919886581, "learning_rate": 1.1857808519666743e-05, "loss": 0.0839, "step": 23137 }, { "epoch": 2.743744812047907, "grad_norm": 0.642047499816333, "learning_rate": 1.1855766646648531e-05, "loss": 0.0849, "step": 23138 }, { "epoch": 2.743863393810032, "grad_norm": 0.7430136713736295, "learning_rate": 1.1853724894804013e-05, "loss": 0.0724, "step": 23139 }, { "epoch": 2.743981975572157, "grad_norm": 0.8194555100206072, "learning_rate": 1.185168326415205e-05, "loss": 0.1138, "step": 23140 }, { "epoch": 2.7441005573342823, "grad_norm": 0.6262802348038256, "learning_rate": 1.1849641754711433e-05, "loss": 0.0775, "step": 23141 }, { "epoch": 2.744219139096407, "grad_norm": 0.7193131787260711, "learning_rate": 1.1847600366500993e-05, "loss": 0.1066, "step": 23142 }, { "epoch": 2.744337720858532, "grad_norm": 0.5716881353120598, "learning_rate": 1.184555909953955e-05, "loss": 0.0746, "step": 23143 }, { "epoch": 2.744456302620657, "grad_norm": 0.43631985943222434, "learning_rate": 1.184351795384592e-05, "loss": 0.0519, "step": 23144 }, { "epoch": 2.744574884382782, "grad_norm": 0.7082124563675943, "learning_rate": 1.184147692943893e-05, "loss": 0.0601, "step": 23145 }, { "epoch": 2.744693466144907, "grad_norm": 0.8330704892147364, "learning_rate": 1.183943602633738e-05, "loss": 0.1082, "step": 23146 }, { "epoch": 2.7448120479070317, "grad_norm": 0.6986217569706727, "learning_rate": 1.183739524456009e-05, "loss": 0.0934, "step": 23147 }, { "epoch": 2.744930629669157, "grad_norm": 0.8732521909319247, "learning_rate": 1.1835354584125873e-05, "loss": 0.1007, "step": 23148 }, { "epoch": 2.745049211431282, "grad_norm": 0.763817611371851, "learning_rate": 1.1833314045053546e-05, "loss": 0.1201, "step": 23149 }, { "epoch": 2.745167793193407, "grad_norm": 0.7646920651004684, "learning_rate": 1.1831273627361914e-05, "loss": 0.108, "step": 23150 }, { "epoch": 2.7452863749555316, "grad_norm": 0.7726998346658454, "learning_rate": 1.1829233331069792e-05, "loss": 0.0814, "step": 23151 }, { "epoch": 2.745404956717657, "grad_norm": 0.7290121863596605, "learning_rate": 1.1827193156195998e-05, "loss": 0.0872, "step": 23152 }, { "epoch": 2.745523538479782, "grad_norm": 0.714872884854006, "learning_rate": 1.1825153102759321e-05, "loss": 0.1008, "step": 23153 }, { "epoch": 2.745642120241907, "grad_norm": 0.5539275144104406, "learning_rate": 1.1823113170778574e-05, "loss": 0.0881, "step": 23154 }, { "epoch": 2.7457607020040316, "grad_norm": 0.6873414189376511, "learning_rate": 1.1821073360272564e-05, "loss": 0.0812, "step": 23155 }, { "epoch": 2.7458792837661568, "grad_norm": 0.45798000962287394, "learning_rate": 1.181903367126011e-05, "loss": 0.0581, "step": 23156 }, { "epoch": 2.745997865528282, "grad_norm": 0.6099623557091703, "learning_rate": 1.181699410375999e-05, "loss": 0.0648, "step": 23157 }, { "epoch": 2.7461164472904067, "grad_norm": 0.7226081787052613, "learning_rate": 1.1814954657791018e-05, "loss": 0.0908, "step": 23158 }, { "epoch": 2.7462350290525315, "grad_norm": 0.8675639387170122, "learning_rate": 1.1812915333371996e-05, "loss": 0.1147, "step": 23159 }, { "epoch": 2.7463536108146567, "grad_norm": 0.5375888567709732, "learning_rate": 1.1810876130521726e-05, "loss": 0.0667, "step": 23160 }, { "epoch": 2.746472192576782, "grad_norm": 0.6832110038975256, "learning_rate": 1.1808837049259009e-05, "loss": 0.0881, "step": 23161 }, { "epoch": 2.7465907743389066, "grad_norm": 0.5205632312676047, "learning_rate": 1.1806798089602623e-05, "loss": 0.0549, "step": 23162 }, { "epoch": 2.7467093561010314, "grad_norm": 0.6139078980951905, "learning_rate": 1.1804759251571398e-05, "loss": 0.0911, "step": 23163 }, { "epoch": 2.7468279378631566, "grad_norm": 0.7016036313385798, "learning_rate": 1.1802720535184103e-05, "loss": 0.115, "step": 23164 }, { "epoch": 2.746946519625282, "grad_norm": 0.6829602416788452, "learning_rate": 1.180068194045954e-05, "loss": 0.0855, "step": 23165 }, { "epoch": 2.7470651013874066, "grad_norm": 0.6060883927442322, "learning_rate": 1.1798643467416504e-05, "loss": 0.0688, "step": 23166 }, { "epoch": 2.7471836831495318, "grad_norm": 0.6672183704070178, "learning_rate": 1.1796605116073794e-05, "loss": 0.0891, "step": 23167 }, { "epoch": 2.7473022649116565, "grad_norm": 0.9331675915869254, "learning_rate": 1.1794566886450187e-05, "loss": 0.1274, "step": 23168 }, { "epoch": 2.7474208466737817, "grad_norm": 0.6574574314680772, "learning_rate": 1.179252877856448e-05, "loss": 0.0855, "step": 23169 }, { "epoch": 2.7475394284359065, "grad_norm": 0.5074808629826048, "learning_rate": 1.1790490792435461e-05, "loss": 0.0807, "step": 23170 }, { "epoch": 2.7476580101980317, "grad_norm": 0.8329929925104862, "learning_rate": 1.1788452928081916e-05, "loss": 0.1043, "step": 23171 }, { "epoch": 2.7477765919601564, "grad_norm": 0.564710608666219, "learning_rate": 1.1786415185522645e-05, "loss": 0.0709, "step": 23172 }, { "epoch": 2.7478951737222816, "grad_norm": 0.5932547642121235, "learning_rate": 1.1784377564776406e-05, "loss": 0.0871, "step": 23173 }, { "epoch": 2.7480137554844064, "grad_norm": 0.5824500130420875, "learning_rate": 1.1782340065862016e-05, "loss": 0.0883, "step": 23174 }, { "epoch": 2.7481323372465316, "grad_norm": 0.8767838113870643, "learning_rate": 1.1780302688798231e-05, "loss": 0.1109, "step": 23175 }, { "epoch": 2.7482509190086564, "grad_norm": 0.536105045307961, "learning_rate": 1.1778265433603858e-05, "loss": 0.0807, "step": 23176 }, { "epoch": 2.7483695007707816, "grad_norm": 0.5534242662881256, "learning_rate": 1.1776228300297643e-05, "loss": 0.0811, "step": 23177 }, { "epoch": 2.7484880825329063, "grad_norm": 0.6707116299125645, "learning_rate": 1.1774191288898403e-05, "loss": 0.0948, "step": 23178 }, { "epoch": 2.7486066642950315, "grad_norm": 0.6036276969488735, "learning_rate": 1.1772154399424895e-05, "loss": 0.0719, "step": 23179 }, { "epoch": 2.7487252460571563, "grad_norm": 0.6773470820402991, "learning_rate": 1.1770117631895902e-05, "loss": 0.0864, "step": 23180 }, { "epoch": 2.7488438278192815, "grad_norm": 0.7423429118236902, "learning_rate": 1.17680809863302e-05, "loss": 0.1111, "step": 23181 }, { "epoch": 2.7489624095814063, "grad_norm": 0.8310268445102894, "learning_rate": 1.1766044462746565e-05, "loss": 0.1026, "step": 23182 }, { "epoch": 2.7490809913435315, "grad_norm": 0.6858817708633038, "learning_rate": 1.1764008061163781e-05, "loss": 0.1016, "step": 23183 }, { "epoch": 2.749199573105656, "grad_norm": 0.5477690041742157, "learning_rate": 1.1761971781600595e-05, "loss": 0.066, "step": 23184 }, { "epoch": 2.7493181548677814, "grad_norm": 0.5640038618196758, "learning_rate": 1.1759935624075813e-05, "loss": 0.0707, "step": 23185 }, { "epoch": 2.749436736629906, "grad_norm": 0.7474787906030458, "learning_rate": 1.175789958860818e-05, "loss": 0.108, "step": 23186 }, { "epoch": 2.7495553183920314, "grad_norm": 0.911031840995823, "learning_rate": 1.175586367521648e-05, "loss": 0.1244, "step": 23187 }, { "epoch": 2.749673900154156, "grad_norm": 0.4824626093640524, "learning_rate": 1.175382788391946e-05, "loss": 0.066, "step": 23188 }, { "epoch": 2.7497924819162813, "grad_norm": 0.7701567431069881, "learning_rate": 1.1751792214735921e-05, "loss": 0.0835, "step": 23189 }, { "epoch": 2.749911063678406, "grad_norm": 0.7239493623504701, "learning_rate": 1.1749756667684605e-05, "loss": 0.1096, "step": 23190 }, { "epoch": 2.7500296454405313, "grad_norm": 0.4710844094835971, "learning_rate": 1.1747721242784281e-05, "loss": 0.0649, "step": 23191 }, { "epoch": 2.750148227202656, "grad_norm": 0.5233701442149173, "learning_rate": 1.1745685940053718e-05, "loss": 0.0689, "step": 23192 }, { "epoch": 2.7502668089647813, "grad_norm": 0.8504072954685145, "learning_rate": 1.1743650759511676e-05, "loss": 0.1119, "step": 23193 }, { "epoch": 2.7503853907269065, "grad_norm": 0.5532022153350937, "learning_rate": 1.1741615701176929e-05, "loss": 0.0903, "step": 23194 }, { "epoch": 2.7505039724890312, "grad_norm": 0.7374505109362389, "learning_rate": 1.1739580765068205e-05, "loss": 0.0691, "step": 23195 }, { "epoch": 2.750622554251156, "grad_norm": 0.7906816537403468, "learning_rate": 1.1737545951204307e-05, "loss": 0.0772, "step": 23196 }, { "epoch": 2.750741136013281, "grad_norm": 0.6615695759315355, "learning_rate": 1.173551125960396e-05, "loss": 0.0769, "step": 23197 }, { "epoch": 2.7508597177754064, "grad_norm": 0.6220267347376586, "learning_rate": 1.1733476690285936e-05, "loss": 0.0704, "step": 23198 }, { "epoch": 2.750978299537531, "grad_norm": 0.8156369029073308, "learning_rate": 1.1731442243268986e-05, "loss": 0.1299, "step": 23199 }, { "epoch": 2.751096881299656, "grad_norm": 0.7291976561187722, "learning_rate": 1.1729407918571868e-05, "loss": 0.1055, "step": 23200 }, { "epoch": 2.751215463061781, "grad_norm": 0.7628092324390173, "learning_rate": 1.1727373716213347e-05, "loss": 0.0648, "step": 23201 }, { "epoch": 2.7513340448239063, "grad_norm": 0.47274566437729115, "learning_rate": 1.1725339636212152e-05, "loss": 0.0668, "step": 23202 }, { "epoch": 2.751452626586031, "grad_norm": 0.4557146895878627, "learning_rate": 1.172330567858705e-05, "loss": 0.0528, "step": 23203 }, { "epoch": 2.751571208348156, "grad_norm": 0.781567521938458, "learning_rate": 1.1721271843356785e-05, "loss": 0.0794, "step": 23204 }, { "epoch": 2.751689790110281, "grad_norm": 0.5324589047348279, "learning_rate": 1.1719238130540122e-05, "loss": 0.0625, "step": 23205 }, { "epoch": 2.7518083718724062, "grad_norm": 1.0243788622800671, "learning_rate": 1.1717204540155788e-05, "loss": 0.152, "step": 23206 }, { "epoch": 2.751926953634531, "grad_norm": 0.5710371615873754, "learning_rate": 1.1715171072222539e-05, "loss": 0.0721, "step": 23207 }, { "epoch": 2.7520455353966558, "grad_norm": 0.668131498094448, "learning_rate": 1.1713137726759121e-05, "loss": 0.0691, "step": 23208 }, { "epoch": 2.752164117158781, "grad_norm": 0.7643715407016686, "learning_rate": 1.1711104503784282e-05, "loss": 0.1096, "step": 23209 }, { "epoch": 2.752282698920906, "grad_norm": 0.9220865768194336, "learning_rate": 1.1709071403316762e-05, "loss": 0.1271, "step": 23210 }, { "epoch": 2.752401280683031, "grad_norm": 0.5811655236168525, "learning_rate": 1.1707038425375308e-05, "loss": 0.0607, "step": 23211 }, { "epoch": 2.7525198624451557, "grad_norm": 0.5055479625183099, "learning_rate": 1.1705005569978664e-05, "loss": 0.0589, "step": 23212 }, { "epoch": 2.752638444207281, "grad_norm": 0.5834324561900491, "learning_rate": 1.170297283714556e-05, "loss": 0.0864, "step": 23213 }, { "epoch": 2.752757025969406, "grad_norm": 0.9530297161050078, "learning_rate": 1.1700940226894739e-05, "loss": 0.1005, "step": 23214 }, { "epoch": 2.752875607731531, "grad_norm": 0.4900741662942976, "learning_rate": 1.1698907739244938e-05, "loss": 0.054, "step": 23215 }, { "epoch": 2.752994189493656, "grad_norm": 0.9030241336553417, "learning_rate": 1.1696875374214907e-05, "loss": 0.1471, "step": 23216 }, { "epoch": 2.753112771255781, "grad_norm": 0.9769466530893934, "learning_rate": 1.1694843131823366e-05, "loss": 0.1186, "step": 23217 }, { "epoch": 2.753231353017906, "grad_norm": 0.9068042400304376, "learning_rate": 1.1692811012089053e-05, "loss": 0.1256, "step": 23218 }, { "epoch": 2.7533499347800308, "grad_norm": 0.6259703570407561, "learning_rate": 1.1690779015030704e-05, "loss": 0.0761, "step": 23219 }, { "epoch": 2.753468516542156, "grad_norm": 0.6396594215431457, "learning_rate": 1.168874714066705e-05, "loss": 0.0886, "step": 23220 }, { "epoch": 2.7535870983042807, "grad_norm": 0.8706575368164459, "learning_rate": 1.1686715389016826e-05, "loss": 0.1354, "step": 23221 }, { "epoch": 2.753705680066406, "grad_norm": 0.6308450130044098, "learning_rate": 1.1684683760098761e-05, "loss": 0.087, "step": 23222 }, { "epoch": 2.7538242618285307, "grad_norm": 0.4752296567049809, "learning_rate": 1.1682652253931592e-05, "loss": 0.0565, "step": 23223 }, { "epoch": 2.753942843590656, "grad_norm": 0.5079119976685699, "learning_rate": 1.1680620870534028e-05, "loss": 0.0697, "step": 23224 }, { "epoch": 2.7540614253527806, "grad_norm": 0.7120180024650957, "learning_rate": 1.1678589609924809e-05, "loss": 0.0953, "step": 23225 }, { "epoch": 2.754180007114906, "grad_norm": 0.7641696036898306, "learning_rate": 1.1676558472122654e-05, "loss": 0.1204, "step": 23226 }, { "epoch": 2.7542985888770306, "grad_norm": 0.7266781344738421, "learning_rate": 1.1674527457146303e-05, "loss": 0.1124, "step": 23227 }, { "epoch": 2.754417170639156, "grad_norm": 0.5153508626326616, "learning_rate": 1.167249656501446e-05, "loss": 0.0652, "step": 23228 }, { "epoch": 2.7545357524012806, "grad_norm": 0.8319796714487494, "learning_rate": 1.1670465795745855e-05, "loss": 0.0945, "step": 23229 }, { "epoch": 2.7546543341634058, "grad_norm": 0.5574868875753586, "learning_rate": 1.1668435149359207e-05, "loss": 0.0702, "step": 23230 }, { "epoch": 2.7547729159255305, "grad_norm": 0.9530105664030789, "learning_rate": 1.1666404625873243e-05, "loss": 0.1125, "step": 23231 }, { "epoch": 2.7548914976876557, "grad_norm": 1.2054929310270381, "learning_rate": 1.1664374225306681e-05, "loss": 0.1461, "step": 23232 }, { "epoch": 2.7550100794497805, "grad_norm": 0.6333081002597546, "learning_rate": 1.1662343947678223e-05, "loss": 0.0947, "step": 23233 }, { "epoch": 2.7551286612119057, "grad_norm": 0.5226178631345191, "learning_rate": 1.1660313793006614e-05, "loss": 0.0679, "step": 23234 }, { "epoch": 2.7552472429740305, "grad_norm": 0.4992619557426473, "learning_rate": 1.1658283761310543e-05, "loss": 0.0724, "step": 23235 }, { "epoch": 2.7553658247361557, "grad_norm": 0.7398892847942955, "learning_rate": 1.1656253852608737e-05, "loss": 0.1298, "step": 23236 }, { "epoch": 2.7554844064982804, "grad_norm": 0.566883579239692, "learning_rate": 1.1654224066919906e-05, "loss": 0.0817, "step": 23237 }, { "epoch": 2.7556029882604056, "grad_norm": 0.8076735117892134, "learning_rate": 1.1652194404262773e-05, "loss": 0.1101, "step": 23238 }, { "epoch": 2.7557215700225304, "grad_norm": 0.5089694423602504, "learning_rate": 1.1650164864656032e-05, "loss": 0.0666, "step": 23239 }, { "epoch": 2.7558401517846556, "grad_norm": 0.656702611614249, "learning_rate": 1.1648135448118397e-05, "loss": 0.0825, "step": 23240 }, { "epoch": 2.7559587335467803, "grad_norm": 0.842515503464508, "learning_rate": 1.1646106154668582e-05, "loss": 0.1331, "step": 23241 }, { "epoch": 2.7560773153089055, "grad_norm": 0.8428625635548418, "learning_rate": 1.1644076984325292e-05, "loss": 0.1056, "step": 23242 }, { "epoch": 2.7561958970710307, "grad_norm": 0.6562214446557147, "learning_rate": 1.1642047937107246e-05, "loss": 0.0826, "step": 23243 }, { "epoch": 2.7563144788331555, "grad_norm": 0.47945732905923144, "learning_rate": 1.1640019013033115e-05, "loss": 0.0749, "step": 23244 }, { "epoch": 2.7564330605952803, "grad_norm": 0.5305519751948833, "learning_rate": 1.1637990212121646e-05, "loss": 0.0635, "step": 23245 }, { "epoch": 2.7565516423574055, "grad_norm": 0.6613964189324458, "learning_rate": 1.1635961534391513e-05, "loss": 0.0978, "step": 23246 }, { "epoch": 2.7566702241195307, "grad_norm": 0.6249804084678224, "learning_rate": 1.1633932979861425e-05, "loss": 0.0781, "step": 23247 }, { "epoch": 2.7567888058816554, "grad_norm": 0.5188163252537769, "learning_rate": 1.1631904548550086e-05, "loss": 0.0649, "step": 23248 }, { "epoch": 2.75690738764378, "grad_norm": 0.6278208479737726, "learning_rate": 1.1629876240476192e-05, "loss": 0.083, "step": 23249 }, { "epoch": 2.7570259694059054, "grad_norm": 0.6053085938702235, "learning_rate": 1.1627848055658456e-05, "loss": 0.0788, "step": 23250 }, { "epoch": 2.7571445511680306, "grad_norm": 0.6059622991678569, "learning_rate": 1.1625819994115541e-05, "loss": 0.0735, "step": 23251 }, { "epoch": 2.7572631329301553, "grad_norm": 0.5126108803591934, "learning_rate": 1.1623792055866184e-05, "loss": 0.0668, "step": 23252 }, { "epoch": 2.75738171469228, "grad_norm": 0.9577015470226847, "learning_rate": 1.1621764240929053e-05, "loss": 0.1346, "step": 23253 }, { "epoch": 2.7575002964544053, "grad_norm": 0.7054508274891346, "learning_rate": 1.161973654932286e-05, "loss": 0.1132, "step": 23254 }, { "epoch": 2.7576188782165305, "grad_norm": 0.9454499525837586, "learning_rate": 1.161770898106627e-05, "loss": 0.1141, "step": 23255 }, { "epoch": 2.7577374599786553, "grad_norm": 0.8639526446855226, "learning_rate": 1.1615681536178011e-05, "loss": 0.0979, "step": 23256 }, { "epoch": 2.75785604174078, "grad_norm": 0.6646357053917555, "learning_rate": 1.1613654214676744e-05, "loss": 0.108, "step": 23257 }, { "epoch": 2.7579746235029052, "grad_norm": 0.6994130065185173, "learning_rate": 1.1611627016581173e-05, "loss": 0.0949, "step": 23258 }, { "epoch": 2.7580932052650304, "grad_norm": 0.7370143765838, "learning_rate": 1.1609599941909979e-05, "loss": 0.0913, "step": 23259 }, { "epoch": 2.758211787027155, "grad_norm": 0.8145597353659629, "learning_rate": 1.1607572990681857e-05, "loss": 0.0884, "step": 23260 }, { "epoch": 2.75833036878928, "grad_norm": 0.755683606584451, "learning_rate": 1.1605546162915495e-05, "loss": 0.0914, "step": 23261 }, { "epoch": 2.758448950551405, "grad_norm": 0.7982427033390126, "learning_rate": 1.1603519458629564e-05, "loss": 0.1218, "step": 23262 }, { "epoch": 2.7585675323135304, "grad_norm": 0.5197582612355064, "learning_rate": 1.1601492877842756e-05, "loss": 0.0739, "step": 23263 }, { "epoch": 2.758686114075655, "grad_norm": 0.7584675540523226, "learning_rate": 1.1599466420573754e-05, "loss": 0.0935, "step": 23264 }, { "epoch": 2.75880469583778, "grad_norm": 0.5798880276573742, "learning_rate": 1.159744008684125e-05, "loss": 0.0784, "step": 23265 }, { "epoch": 2.758923277599905, "grad_norm": 0.7099682157881472, "learning_rate": 1.159541387666389e-05, "loss": 0.1022, "step": 23266 }, { "epoch": 2.7590418593620303, "grad_norm": 0.5297049571085045, "learning_rate": 1.1593387790060397e-05, "loss": 0.0689, "step": 23267 }, { "epoch": 2.759160441124155, "grad_norm": 0.7394542871229782, "learning_rate": 1.1591361827049418e-05, "loss": 0.0786, "step": 23268 }, { "epoch": 2.7592790228862802, "grad_norm": 0.9031389693201954, "learning_rate": 1.158933598764964e-05, "loss": 0.1152, "step": 23269 }, { "epoch": 2.759397604648405, "grad_norm": 0.6602983535695344, "learning_rate": 1.1587310271879742e-05, "loss": 0.0675, "step": 23270 }, { "epoch": 2.75951618641053, "grad_norm": 0.7099046392761678, "learning_rate": 1.1585284679758393e-05, "loss": 0.0852, "step": 23271 }, { "epoch": 2.759634768172655, "grad_norm": 0.7838198491590664, "learning_rate": 1.1583259211304279e-05, "loss": 0.0998, "step": 23272 }, { "epoch": 2.75975334993478, "grad_norm": 0.9412534715529464, "learning_rate": 1.1581233866536054e-05, "loss": 0.1364, "step": 23273 }, { "epoch": 2.759871931696905, "grad_norm": 0.4234409832166605, "learning_rate": 1.1579208645472397e-05, "loss": 0.0547, "step": 23274 }, { "epoch": 2.75999051345903, "grad_norm": 0.7878448620175719, "learning_rate": 1.1577183548131978e-05, "loss": 0.1055, "step": 23275 }, { "epoch": 2.760109095221155, "grad_norm": 0.42337009067927306, "learning_rate": 1.1575158574533475e-05, "loss": 0.0446, "step": 23276 }, { "epoch": 2.76022767698328, "grad_norm": 1.0373711734965738, "learning_rate": 1.157313372469554e-05, "loss": 0.143, "step": 23277 }, { "epoch": 2.760346258745405, "grad_norm": 0.5745957793321691, "learning_rate": 1.1571108998636846e-05, "loss": 0.0776, "step": 23278 }, { "epoch": 2.76046484050753, "grad_norm": 0.9687250682583013, "learning_rate": 1.156908439637606e-05, "loss": 0.1109, "step": 23279 }, { "epoch": 2.760583422269655, "grad_norm": 0.7308304182036192, "learning_rate": 1.1567059917931844e-05, "loss": 0.0984, "step": 23280 }, { "epoch": 2.76070200403178, "grad_norm": 0.5997574067741324, "learning_rate": 1.1565035563322866e-05, "loss": 0.0737, "step": 23281 }, { "epoch": 2.7608205857939048, "grad_norm": 0.5432995121155878, "learning_rate": 1.1563011332567782e-05, "loss": 0.0736, "step": 23282 }, { "epoch": 2.76093916755603, "grad_norm": 0.5460655131407403, "learning_rate": 1.1560987225685265e-05, "loss": 0.0663, "step": 23283 }, { "epoch": 2.7610577493181547, "grad_norm": 0.5020434905573673, "learning_rate": 1.155896324269396e-05, "loss": 0.0688, "step": 23284 }, { "epoch": 2.76117633108028, "grad_norm": 0.6367110578217066, "learning_rate": 1.155693938361253e-05, "loss": 0.0774, "step": 23285 }, { "epoch": 2.7612949128424047, "grad_norm": 0.9362888084039903, "learning_rate": 1.1554915648459633e-05, "loss": 0.1176, "step": 23286 }, { "epoch": 2.76141349460453, "grad_norm": 0.6931854928125081, "learning_rate": 1.1552892037253934e-05, "loss": 0.1019, "step": 23287 }, { "epoch": 2.7615320763666547, "grad_norm": 0.6396179856485513, "learning_rate": 1.1550868550014074e-05, "loss": 0.0835, "step": 23288 }, { "epoch": 2.76165065812878, "grad_norm": 0.5402306976064243, "learning_rate": 1.1548845186758713e-05, "loss": 0.072, "step": 23289 }, { "epoch": 2.7617692398909046, "grad_norm": 1.0526982260744158, "learning_rate": 1.1546821947506506e-05, "loss": 0.1412, "step": 23290 }, { "epoch": 2.76188782165303, "grad_norm": 0.5665635218866482, "learning_rate": 1.15447988322761e-05, "loss": 0.0777, "step": 23291 }, { "epoch": 2.762006403415155, "grad_norm": 0.7918626368213728, "learning_rate": 1.154277584108616e-05, "loss": 0.1171, "step": 23292 }, { "epoch": 2.76212498517728, "grad_norm": 0.46444171727530853, "learning_rate": 1.1540752973955305e-05, "loss": 0.064, "step": 23293 }, { "epoch": 2.7622435669394045, "grad_norm": 0.8922312189161457, "learning_rate": 1.1538730230902222e-05, "loss": 0.0995, "step": 23294 }, { "epoch": 2.7623621487015297, "grad_norm": 0.8093222293804998, "learning_rate": 1.1536707611945529e-05, "loss": 0.1219, "step": 23295 }, { "epoch": 2.762480730463655, "grad_norm": 0.7851358563393909, "learning_rate": 1.1534685117103883e-05, "loss": 0.105, "step": 23296 }, { "epoch": 2.7625993122257797, "grad_norm": 0.4576093595737123, "learning_rate": 1.1532662746395928e-05, "loss": 0.0576, "step": 23297 }, { "epoch": 2.7627178939879045, "grad_norm": 0.5444537439459317, "learning_rate": 1.1530640499840317e-05, "loss": 0.0656, "step": 23298 }, { "epoch": 2.7628364757500297, "grad_norm": 0.6582817680778555, "learning_rate": 1.1528618377455675e-05, "loss": 0.0821, "step": 23299 }, { "epoch": 2.762955057512155, "grad_norm": 0.505184720603796, "learning_rate": 1.1526596379260649e-05, "loss": 0.0612, "step": 23300 }, { "epoch": 2.7630736392742796, "grad_norm": 0.794154853980335, "learning_rate": 1.1524574505273883e-05, "loss": 0.1029, "step": 23301 }, { "epoch": 2.7631922210364044, "grad_norm": 0.539451222815244, "learning_rate": 1.1522552755514016e-05, "loss": 0.0764, "step": 23302 }, { "epoch": 2.7633108027985296, "grad_norm": 0.6895413125412634, "learning_rate": 1.1520531129999695e-05, "loss": 0.0894, "step": 23303 }, { "epoch": 2.763429384560655, "grad_norm": 0.4813650381435795, "learning_rate": 1.1518509628749529e-05, "loss": 0.0669, "step": 23304 }, { "epoch": 2.7635479663227795, "grad_norm": 0.4856189387824942, "learning_rate": 1.151648825178219e-05, "loss": 0.0589, "step": 23305 }, { "epoch": 2.7636665480849043, "grad_norm": 0.6221462204652989, "learning_rate": 1.1514466999116286e-05, "loss": 0.0815, "step": 23306 }, { "epoch": 2.7637851298470295, "grad_norm": 0.5302939001522381, "learning_rate": 1.1512445870770458e-05, "loss": 0.0573, "step": 23307 }, { "epoch": 2.7639037116091547, "grad_norm": 0.6888417807278513, "learning_rate": 1.1510424866763341e-05, "loss": 0.0871, "step": 23308 }, { "epoch": 2.7640222933712795, "grad_norm": 0.6251860628045091, "learning_rate": 1.1508403987113563e-05, "loss": 0.0801, "step": 23309 }, { "epoch": 2.7641408751334042, "grad_norm": 0.5388388846968905, "learning_rate": 1.1506383231839767e-05, "loss": 0.0798, "step": 23310 }, { "epoch": 2.7642594568955294, "grad_norm": 0.7559227987560897, "learning_rate": 1.1504362600960552e-05, "loss": 0.1076, "step": 23311 }, { "epoch": 2.7643780386576546, "grad_norm": 0.548984212307179, "learning_rate": 1.1502342094494584e-05, "loss": 0.0776, "step": 23312 }, { "epoch": 2.7644966204197794, "grad_norm": 0.653059990482883, "learning_rate": 1.1500321712460457e-05, "loss": 0.0854, "step": 23313 }, { "epoch": 2.764615202181904, "grad_norm": 0.7876343906844039, "learning_rate": 1.1498301454876822e-05, "loss": 0.1079, "step": 23314 }, { "epoch": 2.7647337839440294, "grad_norm": 0.7448892793079768, "learning_rate": 1.149628132176227e-05, "loss": 0.0708, "step": 23315 }, { "epoch": 2.7648523657061546, "grad_norm": 0.6658886928423825, "learning_rate": 1.1494261313135466e-05, "loss": 0.08, "step": 23316 }, { "epoch": 2.7649709474682793, "grad_norm": 0.7739774515176077, "learning_rate": 1.1492241429014999e-05, "loss": 0.0762, "step": 23317 }, { "epoch": 2.7650895292304045, "grad_norm": 0.5776154862076927, "learning_rate": 1.1490221669419501e-05, "loss": 0.0767, "step": 23318 }, { "epoch": 2.7652081109925293, "grad_norm": 0.46537944833319633, "learning_rate": 1.1488202034367596e-05, "loss": 0.0506, "step": 23319 }, { "epoch": 2.7653266927546545, "grad_norm": 0.5448760824536825, "learning_rate": 1.1486182523877895e-05, "loss": 0.0792, "step": 23320 }, { "epoch": 2.7654452745167792, "grad_norm": 0.4462326028866552, "learning_rate": 1.148416313796903e-05, "loss": 0.0537, "step": 23321 }, { "epoch": 2.7655638562789044, "grad_norm": 0.5705133966073183, "learning_rate": 1.148214387665959e-05, "loss": 0.0779, "step": 23322 }, { "epoch": 2.765682438041029, "grad_norm": 0.7417066776767419, "learning_rate": 1.1480124739968223e-05, "loss": 0.1124, "step": 23323 }, { "epoch": 2.7658010198031544, "grad_norm": 0.6634549195528942, "learning_rate": 1.1478105727913518e-05, "loss": 0.0726, "step": 23324 }, { "epoch": 2.765919601565279, "grad_norm": 0.6394946677835075, "learning_rate": 1.1476086840514107e-05, "loss": 0.0792, "step": 23325 }, { "epoch": 2.7660381833274044, "grad_norm": 0.8017176771985788, "learning_rate": 1.1474068077788572e-05, "loss": 0.0872, "step": 23326 }, { "epoch": 2.766156765089529, "grad_norm": 0.7098987646759262, "learning_rate": 1.1472049439755561e-05, "loss": 0.1023, "step": 23327 }, { "epoch": 2.7662753468516543, "grad_norm": 0.9067473373385154, "learning_rate": 1.1470030926433659e-05, "loss": 0.0843, "step": 23328 }, { "epoch": 2.766393928613779, "grad_norm": 0.46563506577301267, "learning_rate": 1.1468012537841477e-05, "loss": 0.0719, "step": 23329 }, { "epoch": 2.7665125103759043, "grad_norm": 0.37837519169115247, "learning_rate": 1.1465994273997623e-05, "loss": 0.0541, "step": 23330 }, { "epoch": 2.766631092138029, "grad_norm": 0.5709801762622949, "learning_rate": 1.1463976134920709e-05, "loss": 0.0687, "step": 23331 }, { "epoch": 2.7667496739001542, "grad_norm": 0.7055184252950649, "learning_rate": 1.1461958120629347e-05, "loss": 0.0903, "step": 23332 }, { "epoch": 2.766868255662279, "grad_norm": 0.4641426089532821, "learning_rate": 1.1459940231142116e-05, "loss": 0.0603, "step": 23333 }, { "epoch": 2.766986837424404, "grad_norm": 0.6595292511284725, "learning_rate": 1.1457922466477633e-05, "loss": 0.1007, "step": 23334 }, { "epoch": 2.767105419186529, "grad_norm": 0.47117515132971005, "learning_rate": 1.1455904826654501e-05, "loss": 0.064, "step": 23335 }, { "epoch": 2.767224000948654, "grad_norm": 0.9393848472851971, "learning_rate": 1.1453887311691325e-05, "loss": 0.123, "step": 23336 }, { "epoch": 2.767342582710779, "grad_norm": 0.737654421065937, "learning_rate": 1.1451869921606678e-05, "loss": 0.094, "step": 23337 }, { "epoch": 2.767461164472904, "grad_norm": 0.5741057997960162, "learning_rate": 1.1449852656419196e-05, "loss": 0.0717, "step": 23338 }, { "epoch": 2.767579746235029, "grad_norm": 0.8468171731757752, "learning_rate": 1.1447835516147446e-05, "loss": 0.1237, "step": 23339 }, { "epoch": 2.767698327997154, "grad_norm": 0.5350642536662792, "learning_rate": 1.1445818500810035e-05, "loss": 0.0825, "step": 23340 }, { "epoch": 2.7678169097592793, "grad_norm": 0.8982257950633143, "learning_rate": 1.1443801610425555e-05, "loss": 0.1249, "step": 23341 }, { "epoch": 2.767935491521404, "grad_norm": 0.5855653357882245, "learning_rate": 1.1441784845012602e-05, "loss": 0.0889, "step": 23342 }, { "epoch": 2.768054073283529, "grad_norm": 0.7692341243865607, "learning_rate": 1.1439768204589774e-05, "loss": 0.1054, "step": 23343 }, { "epoch": 2.768172655045654, "grad_norm": 0.5728546993457404, "learning_rate": 1.1437751689175647e-05, "loss": 0.0778, "step": 23344 }, { "epoch": 2.768291236807779, "grad_norm": 0.6192168893400326, "learning_rate": 1.1435735298788816e-05, "loss": 0.0788, "step": 23345 }, { "epoch": 2.768409818569904, "grad_norm": 0.7936938617041035, "learning_rate": 1.1433719033447873e-05, "loss": 0.1044, "step": 23346 }, { "epoch": 2.7685284003320287, "grad_norm": 0.547924720873421, "learning_rate": 1.1431702893171414e-05, "loss": 0.088, "step": 23347 }, { "epoch": 2.768646982094154, "grad_norm": 0.5610784004624985, "learning_rate": 1.1429686877978005e-05, "loss": 0.0779, "step": 23348 }, { "epoch": 2.768765563856279, "grad_norm": 0.7195750973540213, "learning_rate": 1.1427670987886244e-05, "loss": 0.0957, "step": 23349 }, { "epoch": 2.768884145618404, "grad_norm": 0.7515808115424233, "learning_rate": 1.1425655222914713e-05, "loss": 0.1134, "step": 23350 }, { "epoch": 2.7690027273805287, "grad_norm": 0.48379055752324496, "learning_rate": 1.1423639583081994e-05, "loss": 0.0604, "step": 23351 }, { "epoch": 2.769121309142654, "grad_norm": 0.5486073344076978, "learning_rate": 1.1421624068406669e-05, "loss": 0.0827, "step": 23352 }, { "epoch": 2.769239890904779, "grad_norm": 0.7772881082960681, "learning_rate": 1.141960867890732e-05, "loss": 0.0937, "step": 23353 }, { "epoch": 2.769358472666904, "grad_norm": 0.6981563216572481, "learning_rate": 1.1417593414602534e-05, "loss": 0.1012, "step": 23354 }, { "epoch": 2.7694770544290286, "grad_norm": 0.49997338205292047, "learning_rate": 1.1415578275510874e-05, "loss": 0.0743, "step": 23355 }, { "epoch": 2.769595636191154, "grad_norm": 0.5883682065953693, "learning_rate": 1.1413563261650923e-05, "loss": 0.0716, "step": 23356 }, { "epoch": 2.769714217953279, "grad_norm": 0.5370651452180656, "learning_rate": 1.1411548373041256e-05, "loss": 0.0814, "step": 23357 }, { "epoch": 2.7698327997154037, "grad_norm": 0.6621706147524129, "learning_rate": 1.1409533609700451e-05, "loss": 0.0871, "step": 23358 }, { "epoch": 2.7699513814775285, "grad_norm": 0.6409495134611329, "learning_rate": 1.140751897164709e-05, "loss": 0.0904, "step": 23359 }, { "epoch": 2.7700699632396537, "grad_norm": 0.53667660135656, "learning_rate": 1.1405504458899719e-05, "loss": 0.0662, "step": 23360 }, { "epoch": 2.770188545001779, "grad_norm": 0.7542789881243879, "learning_rate": 1.140349007147694e-05, "loss": 0.1142, "step": 23361 }, { "epoch": 2.7703071267639037, "grad_norm": 0.7099627898056768, "learning_rate": 1.1401475809397306e-05, "loss": 0.0799, "step": 23362 }, { "epoch": 2.7704257085260284, "grad_norm": 0.7916353591707399, "learning_rate": 1.1399461672679396e-05, "loss": 0.1121, "step": 23363 }, { "epoch": 2.7705442902881536, "grad_norm": 0.45507026604928325, "learning_rate": 1.139744766134175e-05, "loss": 0.0651, "step": 23364 }, { "epoch": 2.770662872050279, "grad_norm": 0.6668814071840288, "learning_rate": 1.1395433775402977e-05, "loss": 0.1073, "step": 23365 }, { "epoch": 2.7707814538124036, "grad_norm": 0.8745505513566292, "learning_rate": 1.1393420014881615e-05, "loss": 0.1252, "step": 23366 }, { "epoch": 2.770900035574529, "grad_norm": 0.7363926793999217, "learning_rate": 1.1391406379796232e-05, "loss": 0.0992, "step": 23367 }, { "epoch": 2.7710186173366536, "grad_norm": 0.4161016289198815, "learning_rate": 1.1389392870165395e-05, "loss": 0.055, "step": 23368 }, { "epoch": 2.7711371990987788, "grad_norm": 0.6919787471634709, "learning_rate": 1.1387379486007662e-05, "loss": 0.0868, "step": 23369 }, { "epoch": 2.7712557808609035, "grad_norm": 0.5464815170727892, "learning_rate": 1.138536622734161e-05, "loss": 0.0948, "step": 23370 }, { "epoch": 2.7713743626230287, "grad_norm": 0.7340054691771282, "learning_rate": 1.1383353094185767e-05, "loss": 0.0903, "step": 23371 }, { "epoch": 2.7714929443851535, "grad_norm": 0.5296227315584351, "learning_rate": 1.1381340086558729e-05, "loss": 0.0773, "step": 23372 }, { "epoch": 2.7716115261472787, "grad_norm": 0.945135670142948, "learning_rate": 1.1379327204479026e-05, "loss": 0.0852, "step": 23373 }, { "epoch": 2.7717301079094034, "grad_norm": 0.7888145318470273, "learning_rate": 1.1377314447965231e-05, "loss": 0.0974, "step": 23374 }, { "epoch": 2.7718486896715286, "grad_norm": 0.5861396644734556, "learning_rate": 1.1375301817035877e-05, "loss": 0.0727, "step": 23375 }, { "epoch": 2.7719672714336534, "grad_norm": 0.6379101022446391, "learning_rate": 1.1373289311709548e-05, "loss": 0.0964, "step": 23376 }, { "epoch": 2.7720858531957786, "grad_norm": 0.5497181388919203, "learning_rate": 1.1371276932004774e-05, "loss": 0.0575, "step": 23377 }, { "epoch": 2.7722044349579034, "grad_norm": 0.6854865097937454, "learning_rate": 1.1369264677940112e-05, "loss": 0.0988, "step": 23378 }, { "epoch": 2.7723230167200286, "grad_norm": 0.44659065601487286, "learning_rate": 1.1367252549534116e-05, "loss": 0.054, "step": 23379 }, { "epoch": 2.7724415984821533, "grad_norm": 0.7234591094438547, "learning_rate": 1.1365240546805331e-05, "loss": 0.0996, "step": 23380 }, { "epoch": 2.7725601802442785, "grad_norm": 0.6941569146083866, "learning_rate": 1.1363228669772321e-05, "loss": 0.0829, "step": 23381 }, { "epoch": 2.7726787620064033, "grad_norm": 0.742955521816391, "learning_rate": 1.13612169184536e-05, "loss": 0.1014, "step": 23382 }, { "epoch": 2.7727973437685285, "grad_norm": 0.6934556287992936, "learning_rate": 1.1359205292867754e-05, "loss": 0.0893, "step": 23383 }, { "epoch": 2.7729159255306532, "grad_norm": 0.5126996984529428, "learning_rate": 1.1357193793033297e-05, "loss": 0.0689, "step": 23384 }, { "epoch": 2.7730345072927784, "grad_norm": 0.49324541001856353, "learning_rate": 1.1355182418968791e-05, "loss": 0.0656, "step": 23385 }, { "epoch": 2.773153089054903, "grad_norm": 0.5782235802757719, "learning_rate": 1.1353171170692755e-05, "loss": 0.0779, "step": 23386 }, { "epoch": 2.7732716708170284, "grad_norm": 0.7313779843211881, "learning_rate": 1.1351160048223764e-05, "loss": 0.1065, "step": 23387 }, { "epoch": 2.773390252579153, "grad_norm": 0.6747690931764576, "learning_rate": 1.1349149051580329e-05, "loss": 0.0718, "step": 23388 }, { "epoch": 2.7735088343412784, "grad_norm": 0.7624767170465404, "learning_rate": 1.1347138180781003e-05, "loss": 0.114, "step": 23389 }, { "epoch": 2.773627416103403, "grad_norm": 0.6396540037061718, "learning_rate": 1.134512743584432e-05, "loss": 0.0806, "step": 23390 }, { "epoch": 2.7737459978655283, "grad_norm": 0.6363896486785939, "learning_rate": 1.1343116816788815e-05, "loss": 0.0821, "step": 23391 }, { "epoch": 2.773864579627653, "grad_norm": 0.6699315860091096, "learning_rate": 1.1341106323633036e-05, "loss": 0.0678, "step": 23392 }, { "epoch": 2.7739831613897783, "grad_norm": 0.5829622522369377, "learning_rate": 1.1339095956395488e-05, "loss": 0.0722, "step": 23393 }, { "epoch": 2.7741017431519035, "grad_norm": 0.5885598194121422, "learning_rate": 1.1337085715094743e-05, "loss": 0.0897, "step": 23394 }, { "epoch": 2.7742203249140283, "grad_norm": 0.6903262979015492, "learning_rate": 1.1335075599749304e-05, "loss": 0.1006, "step": 23395 }, { "epoch": 2.774338906676153, "grad_norm": 0.9352921979957686, "learning_rate": 1.133306561037772e-05, "loss": 0.1449, "step": 23396 }, { "epoch": 2.774457488438278, "grad_norm": 0.6504432537003031, "learning_rate": 1.1331055746998493e-05, "loss": 0.0712, "step": 23397 }, { "epoch": 2.7745760702004034, "grad_norm": 0.5477667416647569, "learning_rate": 1.1329046009630187e-05, "loss": 0.0576, "step": 23398 }, { "epoch": 2.774694651962528, "grad_norm": 0.6496991031399796, "learning_rate": 1.1327036398291304e-05, "loss": 0.1025, "step": 23399 }, { "epoch": 2.774813233724653, "grad_norm": 0.4666977099580574, "learning_rate": 1.1325026913000378e-05, "loss": 0.0729, "step": 23400 }, { "epoch": 2.774931815486778, "grad_norm": 0.5293185400135155, "learning_rate": 1.1323017553775936e-05, "loss": 0.0857, "step": 23401 }, { "epoch": 2.7750503972489033, "grad_norm": 0.48627569457089326, "learning_rate": 1.13210083206365e-05, "loss": 0.0737, "step": 23402 }, { "epoch": 2.775168979011028, "grad_norm": 0.45805963640638914, "learning_rate": 1.1318999213600603e-05, "loss": 0.0604, "step": 23403 }, { "epoch": 2.775287560773153, "grad_norm": 0.5417810624275166, "learning_rate": 1.1316990232686745e-05, "loss": 0.0704, "step": 23404 }, { "epoch": 2.775406142535278, "grad_norm": 0.6853309247786881, "learning_rate": 1.1314981377913459e-05, "loss": 0.1141, "step": 23405 }, { "epoch": 2.7755247242974033, "grad_norm": 0.5338295154476604, "learning_rate": 1.1312972649299264e-05, "loss": 0.0614, "step": 23406 }, { "epoch": 2.775643306059528, "grad_norm": 0.6712073322349924, "learning_rate": 1.1310964046862676e-05, "loss": 0.0668, "step": 23407 }, { "epoch": 2.775761887821653, "grad_norm": 0.7405397344719932, "learning_rate": 1.1308955570622212e-05, "loss": 0.0939, "step": 23408 }, { "epoch": 2.775880469583778, "grad_norm": 0.8364662860923527, "learning_rate": 1.1306947220596389e-05, "loss": 0.123, "step": 23409 }, { "epoch": 2.775999051345903, "grad_norm": 0.7164374863204429, "learning_rate": 1.1304938996803732e-05, "loss": 0.0898, "step": 23410 }, { "epoch": 2.776117633108028, "grad_norm": 0.4272130960707243, "learning_rate": 1.1302930899262734e-05, "loss": 0.048, "step": 23411 }, { "epoch": 2.7762362148701527, "grad_norm": 0.6537685553592821, "learning_rate": 1.1300922927991913e-05, "loss": 0.0911, "step": 23412 }, { "epoch": 2.776354796632278, "grad_norm": 0.7987374489929583, "learning_rate": 1.1298915083009787e-05, "loss": 0.0878, "step": 23413 }, { "epoch": 2.776473378394403, "grad_norm": 0.6359388984102033, "learning_rate": 1.1296907364334872e-05, "loss": 0.0582, "step": 23414 }, { "epoch": 2.776591960156528, "grad_norm": 0.6306017588761372, "learning_rate": 1.1294899771985657e-05, "loss": 0.0839, "step": 23415 }, { "epoch": 2.776710541918653, "grad_norm": 0.5303771852182746, "learning_rate": 1.129289230598066e-05, "loss": 0.0851, "step": 23416 }, { "epoch": 2.776829123680778, "grad_norm": 0.5262721251144085, "learning_rate": 1.1290884966338388e-05, "loss": 0.0799, "step": 23417 }, { "epoch": 2.776947705442903, "grad_norm": 0.7592412015378753, "learning_rate": 1.1288877753077346e-05, "loss": 0.0819, "step": 23418 }, { "epoch": 2.777066287205028, "grad_norm": 0.6289334901514505, "learning_rate": 1.1286870666216043e-05, "loss": 0.0847, "step": 23419 }, { "epoch": 2.777184868967153, "grad_norm": 0.6161980919296172, "learning_rate": 1.1284863705772961e-05, "loss": 0.0774, "step": 23420 }, { "epoch": 2.7773034507292778, "grad_norm": 0.4469107934819624, "learning_rate": 1.1282856871766634e-05, "loss": 0.0668, "step": 23421 }, { "epoch": 2.777422032491403, "grad_norm": 0.7742595820781518, "learning_rate": 1.1280850164215537e-05, "loss": 0.0972, "step": 23422 }, { "epoch": 2.7775406142535277, "grad_norm": 0.8023681302508702, "learning_rate": 1.127884358313818e-05, "loss": 0.1154, "step": 23423 }, { "epoch": 2.777659196015653, "grad_norm": 0.4821847835482715, "learning_rate": 1.1276837128553055e-05, "loss": 0.0566, "step": 23424 }, { "epoch": 2.7777777777777777, "grad_norm": 0.532604842604301, "learning_rate": 1.1274830800478674e-05, "loss": 0.079, "step": 23425 }, { "epoch": 2.777896359539903, "grad_norm": 0.6810571639025529, "learning_rate": 1.1272824598933515e-05, "loss": 0.0976, "step": 23426 }, { "epoch": 2.7780149413020276, "grad_norm": 0.470918066238956, "learning_rate": 1.1270818523936078e-05, "loss": 0.0541, "step": 23427 }, { "epoch": 2.778133523064153, "grad_norm": 0.8312389584111695, "learning_rate": 1.1268812575504858e-05, "loss": 0.0964, "step": 23428 }, { "epoch": 2.7782521048262776, "grad_norm": 0.570237066460678, "learning_rate": 1.126680675365835e-05, "loss": 0.0843, "step": 23429 }, { "epoch": 2.778370686588403, "grad_norm": 0.6971174895239831, "learning_rate": 1.1264801058415048e-05, "loss": 0.1085, "step": 23430 }, { "epoch": 2.7784892683505276, "grad_norm": 1.0652720386557615, "learning_rate": 1.126279548979342e-05, "loss": 0.118, "step": 23431 }, { "epoch": 2.7786078501126528, "grad_norm": 0.6056262957213158, "learning_rate": 1.1260790047811993e-05, "loss": 0.0859, "step": 23432 }, { "epoch": 2.7787264318747775, "grad_norm": 0.6108381834806963, "learning_rate": 1.1258784732489221e-05, "loss": 0.0787, "step": 23433 }, { "epoch": 2.7788450136369027, "grad_norm": 0.6663302154018853, "learning_rate": 1.1256779543843616e-05, "loss": 0.0931, "step": 23434 }, { "epoch": 2.7789635953990275, "grad_norm": 0.5494370359363738, "learning_rate": 1.1254774481893632e-05, "loss": 0.0886, "step": 23435 }, { "epoch": 2.7790821771611527, "grad_norm": 0.6283477671677368, "learning_rate": 1.1252769546657788e-05, "loss": 0.0751, "step": 23436 }, { "epoch": 2.7792007589232774, "grad_norm": 0.7279076356755718, "learning_rate": 1.1250764738154543e-05, "loss": 0.0639, "step": 23437 }, { "epoch": 2.7793193406854027, "grad_norm": 0.845724506704306, "learning_rate": 1.1248760056402386e-05, "loss": 0.1077, "step": 23438 }, { "epoch": 2.7794379224475274, "grad_norm": 0.603552704210284, "learning_rate": 1.1246755501419797e-05, "loss": 0.075, "step": 23439 }, { "epoch": 2.7795565042096526, "grad_norm": 0.610258779230439, "learning_rate": 1.1244751073225257e-05, "loss": 0.0915, "step": 23440 }, { "epoch": 2.7796750859717774, "grad_norm": 0.6081004399159734, "learning_rate": 1.1242746771837256e-05, "loss": 0.067, "step": 23441 }, { "epoch": 2.7797936677339026, "grad_norm": 0.6791452516700297, "learning_rate": 1.124074259727424e-05, "loss": 0.0981, "step": 23442 }, { "epoch": 2.7799122494960278, "grad_norm": 0.8201242855706209, "learning_rate": 1.1238738549554722e-05, "loss": 0.102, "step": 23443 }, { "epoch": 2.7800308312581525, "grad_norm": 0.588990882440281, "learning_rate": 1.123673462869715e-05, "loss": 0.0841, "step": 23444 }, { "epoch": 2.7801494130202773, "grad_norm": 0.556993754250878, "learning_rate": 1.123473083472002e-05, "loss": 0.0659, "step": 23445 }, { "epoch": 2.7802679947824025, "grad_norm": 0.5045315666076647, "learning_rate": 1.1232727167641768e-05, "loss": 0.0558, "step": 23446 }, { "epoch": 2.7803865765445277, "grad_norm": 0.6534574266477436, "learning_rate": 1.123072362748091e-05, "loss": 0.0711, "step": 23447 }, { "epoch": 2.7805051583066525, "grad_norm": 0.7167052592650225, "learning_rate": 1.1228720214255886e-05, "loss": 0.085, "step": 23448 }, { "epoch": 2.780623740068777, "grad_norm": 0.612888435768226, "learning_rate": 1.1226716927985176e-05, "loss": 0.0753, "step": 23449 }, { "epoch": 2.7807423218309024, "grad_norm": 0.7784823921815903, "learning_rate": 1.1224713768687243e-05, "loss": 0.0748, "step": 23450 }, { "epoch": 2.7808609035930276, "grad_norm": 0.5162271459894991, "learning_rate": 1.1222710736380557e-05, "loss": 0.0598, "step": 23451 }, { "epoch": 2.7809794853551524, "grad_norm": 0.7549309166560052, "learning_rate": 1.1220707831083593e-05, "loss": 0.0727, "step": 23452 }, { "epoch": 2.781098067117277, "grad_norm": 0.6835079449399556, "learning_rate": 1.1218705052814788e-05, "loss": 0.0864, "step": 23453 }, { "epoch": 2.7812166488794023, "grad_norm": 0.9241988238358865, "learning_rate": 1.1216702401592638e-05, "loss": 0.0947, "step": 23454 }, { "epoch": 2.7813352306415275, "grad_norm": 0.8975612506561685, "learning_rate": 1.1214699877435584e-05, "loss": 0.1087, "step": 23455 }, { "epoch": 2.7814538124036523, "grad_norm": 0.49290726169435695, "learning_rate": 1.1212697480362092e-05, "loss": 0.0576, "step": 23456 }, { "epoch": 2.781572394165777, "grad_norm": 0.5147423779811976, "learning_rate": 1.121069521039062e-05, "loss": 0.0726, "step": 23457 }, { "epoch": 2.7816909759279023, "grad_norm": 0.4940548261934171, "learning_rate": 1.120869306753963e-05, "loss": 0.0632, "step": 23458 }, { "epoch": 2.7818095576900275, "grad_norm": 0.9873955003405185, "learning_rate": 1.1206691051827587e-05, "loss": 0.1027, "step": 23459 }, { "epoch": 2.7819281394521522, "grad_norm": 0.8376053718581348, "learning_rate": 1.120468916327293e-05, "loss": 0.1098, "step": 23460 }, { "epoch": 2.782046721214277, "grad_norm": 0.6619117672145138, "learning_rate": 1.120268740189412e-05, "loss": 0.0973, "step": 23461 }, { "epoch": 2.782165302976402, "grad_norm": 0.8134380274847055, "learning_rate": 1.1200685767709613e-05, "loss": 0.0945, "step": 23462 }, { "epoch": 2.7822838847385274, "grad_norm": 0.5811536555515214, "learning_rate": 1.119868426073787e-05, "loss": 0.0682, "step": 23463 }, { "epoch": 2.782402466500652, "grad_norm": 0.6035045089928263, "learning_rate": 1.1196682880997328e-05, "loss": 0.0597, "step": 23464 }, { "epoch": 2.782521048262777, "grad_norm": 0.626328831919556, "learning_rate": 1.119468162850644e-05, "loss": 0.0879, "step": 23465 }, { "epoch": 2.782639630024902, "grad_norm": 0.6240672075564561, "learning_rate": 1.119268050328366e-05, "loss": 0.074, "step": 23466 }, { "epoch": 2.7827582117870273, "grad_norm": 0.803402551626901, "learning_rate": 1.1190679505347433e-05, "loss": 0.0961, "step": 23467 }, { "epoch": 2.782876793549152, "grad_norm": 0.5287979135709724, "learning_rate": 1.1188678634716207e-05, "loss": 0.0815, "step": 23468 }, { "epoch": 2.7829953753112773, "grad_norm": 0.568364163332609, "learning_rate": 1.1186677891408425e-05, "loss": 0.0811, "step": 23469 }, { "epoch": 2.783113957073402, "grad_norm": 0.6304454981956065, "learning_rate": 1.1184677275442548e-05, "loss": 0.0709, "step": 23470 }, { "epoch": 2.7832325388355272, "grad_norm": 0.5699722596827271, "learning_rate": 1.1182676786836994e-05, "loss": 0.0723, "step": 23471 }, { "epoch": 2.783351120597652, "grad_norm": 0.5971293391273854, "learning_rate": 1.1180676425610214e-05, "loss": 0.0766, "step": 23472 }, { "epoch": 2.783469702359777, "grad_norm": 0.6066639618925744, "learning_rate": 1.1178676191780651e-05, "loss": 0.0962, "step": 23473 }, { "epoch": 2.783588284121902, "grad_norm": 0.9015036971421962, "learning_rate": 1.1176676085366757e-05, "loss": 0.1021, "step": 23474 }, { "epoch": 2.783706865884027, "grad_norm": 0.6717263611140653, "learning_rate": 1.1174676106386948e-05, "loss": 0.0885, "step": 23475 }, { "epoch": 2.783825447646152, "grad_norm": 0.9319969793067343, "learning_rate": 1.1172676254859671e-05, "loss": 0.1398, "step": 23476 }, { "epoch": 2.783944029408277, "grad_norm": 0.5164587545153685, "learning_rate": 1.1170676530803361e-05, "loss": 0.0651, "step": 23477 }, { "epoch": 2.784062611170402, "grad_norm": 1.011169895731756, "learning_rate": 1.1168676934236458e-05, "loss": 0.1251, "step": 23478 }, { "epoch": 2.784181192932527, "grad_norm": 0.7119900996439639, "learning_rate": 1.1166677465177389e-05, "loss": 0.1025, "step": 23479 }, { "epoch": 2.784299774694652, "grad_norm": 0.5501909472383811, "learning_rate": 1.1164678123644592e-05, "loss": 0.0741, "step": 23480 }, { "epoch": 2.784418356456777, "grad_norm": 0.631576968311274, "learning_rate": 1.1162678909656508e-05, "loss": 0.0676, "step": 23481 }, { "epoch": 2.784536938218902, "grad_norm": 0.6539996602266178, "learning_rate": 1.1160679823231542e-05, "loss": 0.0965, "step": 23482 }, { "epoch": 2.784655519981027, "grad_norm": 0.740754414704883, "learning_rate": 1.1158680864388141e-05, "loss": 0.0989, "step": 23483 }, { "epoch": 2.7847741017431518, "grad_norm": 0.7634668640623319, "learning_rate": 1.115668203314473e-05, "loss": 0.091, "step": 23484 }, { "epoch": 2.784892683505277, "grad_norm": 0.706710783243423, "learning_rate": 1.115468332951974e-05, "loss": 0.1147, "step": 23485 }, { "epoch": 2.7850112652674017, "grad_norm": 0.7672791685751229, "learning_rate": 1.1152684753531586e-05, "loss": 0.1093, "step": 23486 }, { "epoch": 2.785129847029527, "grad_norm": 0.6207090623249102, "learning_rate": 1.1150686305198697e-05, "loss": 0.0814, "step": 23487 }, { "epoch": 2.7852484287916517, "grad_norm": 0.9007226900579376, "learning_rate": 1.1148687984539497e-05, "loss": 0.1005, "step": 23488 }, { "epoch": 2.785367010553777, "grad_norm": 0.6141684648072834, "learning_rate": 1.1146689791572407e-05, "loss": 0.062, "step": 23489 }, { "epoch": 2.7854855923159016, "grad_norm": 0.6623831922357807, "learning_rate": 1.1144691726315857e-05, "loss": 0.0857, "step": 23490 }, { "epoch": 2.785604174078027, "grad_norm": 0.5284856008294956, "learning_rate": 1.1142693788788241e-05, "loss": 0.0677, "step": 23491 }, { "epoch": 2.785722755840152, "grad_norm": 0.7588908160261999, "learning_rate": 1.1140695979008017e-05, "loss": 0.1018, "step": 23492 }, { "epoch": 2.785841337602277, "grad_norm": 0.8162243606879179, "learning_rate": 1.1138698296993568e-05, "loss": 0.0994, "step": 23493 }, { "epoch": 2.7859599193644016, "grad_norm": 0.6466143413237161, "learning_rate": 1.1136700742763327e-05, "loss": 0.0723, "step": 23494 }, { "epoch": 2.7860785011265268, "grad_norm": 0.8059849747205549, "learning_rate": 1.1134703316335701e-05, "loss": 0.103, "step": 23495 }, { "epoch": 2.786197082888652, "grad_norm": 0.8718699518958477, "learning_rate": 1.1132706017729117e-05, "loss": 0.0979, "step": 23496 }, { "epoch": 2.7863156646507767, "grad_norm": 0.6083779735028215, "learning_rate": 1.1130708846961971e-05, "loss": 0.0705, "step": 23497 }, { "epoch": 2.7864342464129015, "grad_norm": 0.4795914345781358, "learning_rate": 1.112871180405268e-05, "loss": 0.0576, "step": 23498 }, { "epoch": 2.7865528281750267, "grad_norm": 0.544732258130227, "learning_rate": 1.1126714889019657e-05, "loss": 0.0658, "step": 23499 }, { "epoch": 2.786671409937152, "grad_norm": 0.5130214911146745, "learning_rate": 1.1124718101881309e-05, "loss": 0.0745, "step": 23500 }, { "epoch": 2.7867899916992767, "grad_norm": 0.7444856011993779, "learning_rate": 1.1122721442656056e-05, "loss": 0.0981, "step": 23501 }, { "epoch": 2.7869085734614014, "grad_norm": 0.7026096382957676, "learning_rate": 1.1120724911362276e-05, "loss": 0.0997, "step": 23502 }, { "epoch": 2.7870271552235266, "grad_norm": 0.759724003948782, "learning_rate": 1.1118728508018409e-05, "loss": 0.1022, "step": 23503 }, { "epoch": 2.787145736985652, "grad_norm": 0.6056908082648312, "learning_rate": 1.1116732232642834e-05, "loss": 0.0926, "step": 23504 }, { "epoch": 2.7872643187477766, "grad_norm": 0.6478154617932798, "learning_rate": 1.1114736085253974e-05, "loss": 0.0985, "step": 23505 }, { "epoch": 2.7873829005099013, "grad_norm": 0.8882022131112246, "learning_rate": 1.11127400658702e-05, "loss": 0.1038, "step": 23506 }, { "epoch": 2.7875014822720265, "grad_norm": 0.6982842121914995, "learning_rate": 1.1110744174509952e-05, "loss": 0.0866, "step": 23507 }, { "epoch": 2.7876200640341517, "grad_norm": 0.6068082672123419, "learning_rate": 1.11087484111916e-05, "loss": 0.076, "step": 23508 }, { "epoch": 2.7877386457962765, "grad_norm": 0.6080240347483469, "learning_rate": 1.1106752775933546e-05, "loss": 0.0774, "step": 23509 }, { "epoch": 2.7878572275584013, "grad_norm": 0.5441174528217985, "learning_rate": 1.110475726875421e-05, "loss": 0.0698, "step": 23510 }, { "epoch": 2.7879758093205265, "grad_norm": 0.7805890240472266, "learning_rate": 1.1102761889671964e-05, "loss": 0.1019, "step": 23511 }, { "epoch": 2.7880943910826517, "grad_norm": 0.8389724546742463, "learning_rate": 1.1100766638705224e-05, "loss": 0.1116, "step": 23512 }, { "epoch": 2.7882129728447764, "grad_norm": 0.5910590015139133, "learning_rate": 1.109877151587235e-05, "loss": 0.0833, "step": 23513 }, { "epoch": 2.788331554606901, "grad_norm": 0.4197031037741578, "learning_rate": 1.1096776521191774e-05, "loss": 0.0558, "step": 23514 }, { "epoch": 2.7884501363690264, "grad_norm": 0.9747193304602154, "learning_rate": 1.109478165468186e-05, "loss": 0.1412, "step": 23515 }, { "epoch": 2.7885687181311516, "grad_norm": 0.4045914668731407, "learning_rate": 1.1092786916361008e-05, "loss": 0.0554, "step": 23516 }, { "epoch": 2.7886872998932764, "grad_norm": 0.48530372395231597, "learning_rate": 1.1090792306247608e-05, "loss": 0.0719, "step": 23517 }, { "epoch": 2.7888058816554016, "grad_norm": 0.41533769820696853, "learning_rate": 1.1088797824360045e-05, "loss": 0.0543, "step": 23518 }, { "epoch": 2.7889244634175263, "grad_norm": 0.6039825110559885, "learning_rate": 1.1086803470716714e-05, "loss": 0.0822, "step": 23519 }, { "epoch": 2.7890430451796515, "grad_norm": 0.7124490149936346, "learning_rate": 1.1084809245335987e-05, "loss": 0.0816, "step": 23520 }, { "epoch": 2.7891616269417763, "grad_norm": 0.604238080209942, "learning_rate": 1.1082815148236256e-05, "loss": 0.0844, "step": 23521 }, { "epoch": 2.7892802087039015, "grad_norm": 0.8323349019145171, "learning_rate": 1.10808211794359e-05, "loss": 0.1195, "step": 23522 }, { "epoch": 2.7893987904660262, "grad_norm": 0.7277775408674639, "learning_rate": 1.1078827338953312e-05, "loss": 0.1068, "step": 23523 }, { "epoch": 2.7895173722281514, "grad_norm": 0.7522493043752245, "learning_rate": 1.1076833626806852e-05, "loss": 0.1025, "step": 23524 }, { "epoch": 2.789635953990276, "grad_norm": 0.6877997757117645, "learning_rate": 1.1074840043014925e-05, "loss": 0.0736, "step": 23525 }, { "epoch": 2.7897545357524014, "grad_norm": 1.0432445555293344, "learning_rate": 1.107284658759589e-05, "loss": 0.1326, "step": 23526 }, { "epoch": 2.789873117514526, "grad_norm": 0.7764411556315272, "learning_rate": 1.1070853260568132e-05, "loss": 0.1168, "step": 23527 }, { "epoch": 2.7899916992766514, "grad_norm": 0.5950715740178205, "learning_rate": 1.1068860061950024e-05, "loss": 0.0946, "step": 23528 }, { "epoch": 2.790110281038776, "grad_norm": 0.685383707559297, "learning_rate": 1.1066866991759942e-05, "loss": 0.087, "step": 23529 }, { "epoch": 2.7902288628009013, "grad_norm": 0.5991045350442441, "learning_rate": 1.1064874050016273e-05, "loss": 0.0813, "step": 23530 }, { "epoch": 2.790347444563026, "grad_norm": 0.5754770989093753, "learning_rate": 1.1062881236737362e-05, "loss": 0.0699, "step": 23531 }, { "epoch": 2.7904660263251513, "grad_norm": 0.48396684103025783, "learning_rate": 1.10608885519416e-05, "loss": 0.0707, "step": 23532 }, { "epoch": 2.790584608087276, "grad_norm": 0.8696506287986616, "learning_rate": 1.1058895995647351e-05, "loss": 0.1413, "step": 23533 }, { "epoch": 2.7907031898494012, "grad_norm": 0.6833433879985161, "learning_rate": 1.1056903567872994e-05, "loss": 0.1126, "step": 23534 }, { "epoch": 2.790821771611526, "grad_norm": 0.7613598321371872, "learning_rate": 1.1054911268636879e-05, "loss": 0.0952, "step": 23535 }, { "epoch": 2.790940353373651, "grad_norm": 0.45074584691929903, "learning_rate": 1.105291909795738e-05, "loss": 0.0641, "step": 23536 }, { "epoch": 2.791058935135776, "grad_norm": 0.6696657520349137, "learning_rate": 1.1050927055852866e-05, "loss": 0.0872, "step": 23537 }, { "epoch": 2.791177516897901, "grad_norm": 0.5449393193319935, "learning_rate": 1.1048935142341699e-05, "loss": 0.0668, "step": 23538 }, { "epoch": 2.791296098660026, "grad_norm": 0.5756803640747002, "learning_rate": 1.104694335744224e-05, "loss": 0.0761, "step": 23539 }, { "epoch": 2.791414680422151, "grad_norm": 0.6634937730556476, "learning_rate": 1.1044951701172857e-05, "loss": 0.0941, "step": 23540 }, { "epoch": 2.7915332621842763, "grad_norm": 1.1593905083911784, "learning_rate": 1.104296017355191e-05, "loss": 0.1585, "step": 23541 }, { "epoch": 2.791651843946401, "grad_norm": 0.6893990941596838, "learning_rate": 1.1040968774597751e-05, "loss": 0.0986, "step": 23542 }, { "epoch": 2.791770425708526, "grad_norm": 0.7681533497539526, "learning_rate": 1.103897750432874e-05, "loss": 0.0442, "step": 23543 }, { "epoch": 2.791889007470651, "grad_norm": 0.6622474756981004, "learning_rate": 1.1036986362763235e-05, "loss": 0.093, "step": 23544 }, { "epoch": 2.7920075892327763, "grad_norm": 0.47892105116481704, "learning_rate": 1.1034995349919605e-05, "loss": 0.0694, "step": 23545 }, { "epoch": 2.792126170994901, "grad_norm": 0.9685702425222598, "learning_rate": 1.1033004465816183e-05, "loss": 0.0817, "step": 23546 }, { "epoch": 2.7922447527570258, "grad_norm": 0.4998250766863483, "learning_rate": 1.103101371047133e-05, "loss": 0.0756, "step": 23547 }, { "epoch": 2.792363334519151, "grad_norm": 0.7568335489336373, "learning_rate": 1.1029023083903403e-05, "loss": 0.0867, "step": 23548 }, { "epoch": 2.792481916281276, "grad_norm": 1.030694847835742, "learning_rate": 1.102703258613075e-05, "loss": 0.0885, "step": 23549 }, { "epoch": 2.792600498043401, "grad_norm": 0.49283598608618645, "learning_rate": 1.1025042217171721e-05, "loss": 0.063, "step": 23550 }, { "epoch": 2.7927190798055257, "grad_norm": 0.6788894010468353, "learning_rate": 1.1023051977044668e-05, "loss": 0.0856, "step": 23551 }, { "epoch": 2.792837661567651, "grad_norm": 0.5259770853323518, "learning_rate": 1.1021061865767942e-05, "loss": 0.0651, "step": 23552 }, { "epoch": 2.792956243329776, "grad_norm": 0.5938451334235852, "learning_rate": 1.1019071883359878e-05, "loss": 0.093, "step": 23553 }, { "epoch": 2.793074825091901, "grad_norm": 0.7103605172423068, "learning_rate": 1.1017082029838823e-05, "loss": 0.0728, "step": 23554 }, { "epoch": 2.7931934068540256, "grad_norm": 0.6241433449296186, "learning_rate": 1.1015092305223126e-05, "loss": 0.0821, "step": 23555 }, { "epoch": 2.793311988616151, "grad_norm": 0.453685268953769, "learning_rate": 1.101310270953114e-05, "loss": 0.058, "step": 23556 }, { "epoch": 2.793430570378276, "grad_norm": 0.47895426416608106, "learning_rate": 1.1011113242781185e-05, "loss": 0.0606, "step": 23557 }, { "epoch": 2.793549152140401, "grad_norm": 0.7398800661829196, "learning_rate": 1.1009123904991608e-05, "loss": 0.0674, "step": 23558 }, { "epoch": 2.7936677339025255, "grad_norm": 0.5174059228752153, "learning_rate": 1.1007134696180757e-05, "loss": 0.0679, "step": 23559 }, { "epoch": 2.7937863156646507, "grad_norm": 0.45226774767713107, "learning_rate": 1.100514561636696e-05, "loss": 0.0548, "step": 23560 }, { "epoch": 2.793904897426776, "grad_norm": 0.5764169491599761, "learning_rate": 1.1003156665568572e-05, "loss": 0.0664, "step": 23561 }, { "epoch": 2.7940234791889007, "grad_norm": 0.6634990468146553, "learning_rate": 1.1001167843803894e-05, "loss": 0.0817, "step": 23562 }, { "epoch": 2.7941420609510255, "grad_norm": 0.5752715199390365, "learning_rate": 1.0999179151091305e-05, "loss": 0.0835, "step": 23563 }, { "epoch": 2.7942606427131507, "grad_norm": 0.834645710220044, "learning_rate": 1.0997190587449102e-05, "loss": 0.1037, "step": 23564 }, { "epoch": 2.794379224475276, "grad_norm": 0.7632071919303824, "learning_rate": 1.0995202152895632e-05, "loss": 0.1083, "step": 23565 }, { "epoch": 2.7944978062374006, "grad_norm": 0.6047487044114402, "learning_rate": 1.0993213847449224e-05, "loss": 0.0845, "step": 23566 }, { "epoch": 2.794616387999526, "grad_norm": 0.6353127815825889, "learning_rate": 1.0991225671128208e-05, "loss": 0.0716, "step": 23567 }, { "epoch": 2.7947349697616506, "grad_norm": 0.6840139581753084, "learning_rate": 1.0989237623950921e-05, "loss": 0.0928, "step": 23568 }, { "epoch": 2.794853551523776, "grad_norm": 0.6532244770047617, "learning_rate": 1.0987249705935662e-05, "loss": 0.0937, "step": 23569 }, { "epoch": 2.7949721332859006, "grad_norm": 0.4526396342341503, "learning_rate": 1.09852619171008e-05, "loss": 0.0471, "step": 23570 }, { "epoch": 2.7950907150480258, "grad_norm": 0.600143701939048, "learning_rate": 1.0983274257464627e-05, "loss": 0.0857, "step": 23571 }, { "epoch": 2.7952092968101505, "grad_norm": 0.5626780487701015, "learning_rate": 1.0981286727045483e-05, "loss": 0.0713, "step": 23572 }, { "epoch": 2.7953278785722757, "grad_norm": 0.5735390034668544, "learning_rate": 1.0979299325861666e-05, "loss": 0.101, "step": 23573 }, { "epoch": 2.7954464603344005, "grad_norm": 0.8654833869667998, "learning_rate": 1.0977312053931538e-05, "loss": 0.0967, "step": 23574 }, { "epoch": 2.7955650420965257, "grad_norm": 0.60700024543871, "learning_rate": 1.0975324911273385e-05, "loss": 0.0883, "step": 23575 }, { "epoch": 2.7956836238586504, "grad_norm": 0.664407111953768, "learning_rate": 1.097333789790554e-05, "loss": 0.073, "step": 23576 }, { "epoch": 2.7958022056207756, "grad_norm": 0.8148204547437383, "learning_rate": 1.0971351013846318e-05, "loss": 0.1005, "step": 23577 }, { "epoch": 2.7959207873829004, "grad_norm": 0.5630361685147656, "learning_rate": 1.0969364259114036e-05, "loss": 0.0685, "step": 23578 }, { "epoch": 2.7960393691450256, "grad_norm": 0.6000887371011749, "learning_rate": 1.0967377633727022e-05, "loss": 0.093, "step": 23579 }, { "epoch": 2.7961579509071504, "grad_norm": 0.6316920483191582, "learning_rate": 1.0965391137703556e-05, "loss": 0.0887, "step": 23580 }, { "epoch": 2.7962765326692756, "grad_norm": 0.7489408322441435, "learning_rate": 1.0963404771061995e-05, "loss": 0.0645, "step": 23581 }, { "epoch": 2.7963951144314003, "grad_norm": 0.6167259089654593, "learning_rate": 1.096141853382062e-05, "loss": 0.0835, "step": 23582 }, { "epoch": 2.7965136961935255, "grad_norm": 0.4634641862568996, "learning_rate": 1.0959432425997756e-05, "loss": 0.0567, "step": 23583 }, { "epoch": 2.7966322779556503, "grad_norm": 0.6331279864321742, "learning_rate": 1.0957446447611692e-05, "loss": 0.098, "step": 23584 }, { "epoch": 2.7967508597177755, "grad_norm": 0.7926022811994479, "learning_rate": 1.0955460598680769e-05, "loss": 0.0702, "step": 23585 }, { "epoch": 2.7968694414799002, "grad_norm": 0.7439577962878062, "learning_rate": 1.0953474879223269e-05, "loss": 0.1055, "step": 23586 }, { "epoch": 2.7969880232420254, "grad_norm": 0.6557615754516626, "learning_rate": 1.0951489289257504e-05, "loss": 0.0972, "step": 23587 }, { "epoch": 2.79710660500415, "grad_norm": 0.5916170958664307, "learning_rate": 1.0949503828801782e-05, "loss": 0.0682, "step": 23588 }, { "epoch": 2.7972251867662754, "grad_norm": 0.628392664328486, "learning_rate": 1.0947518497874403e-05, "loss": 0.0914, "step": 23589 }, { "epoch": 2.7973437685284006, "grad_norm": 0.6861520989128417, "learning_rate": 1.0945533296493682e-05, "loss": 0.0993, "step": 23590 }, { "epoch": 2.7974623502905254, "grad_norm": 1.0573612459060657, "learning_rate": 1.0943548224677899e-05, "loss": 0.1076, "step": 23591 }, { "epoch": 2.79758093205265, "grad_norm": 0.5608808816785612, "learning_rate": 1.0941563282445367e-05, "loss": 0.0707, "step": 23592 }, { "epoch": 2.7976995138147753, "grad_norm": 0.9196731104998727, "learning_rate": 1.0939578469814379e-05, "loss": 0.0999, "step": 23593 }, { "epoch": 2.7978180955769005, "grad_norm": 0.6340230909119936, "learning_rate": 1.0937593786803244e-05, "loss": 0.0763, "step": 23594 }, { "epoch": 2.7979366773390253, "grad_norm": 0.6263445126227181, "learning_rate": 1.0935609233430232e-05, "loss": 0.0761, "step": 23595 }, { "epoch": 2.79805525910115, "grad_norm": 0.5518522071872508, "learning_rate": 1.0933624809713677e-05, "loss": 0.06, "step": 23596 }, { "epoch": 2.7981738408632753, "grad_norm": 0.6174772632976541, "learning_rate": 1.093164051567184e-05, "loss": 0.074, "step": 23597 }, { "epoch": 2.7982924226254005, "grad_norm": 0.686974972683224, "learning_rate": 1.0929656351323025e-05, "loss": 0.099, "step": 23598 }, { "epoch": 2.798411004387525, "grad_norm": 0.6473302143599269, "learning_rate": 1.0927672316685528e-05, "loss": 0.0915, "step": 23599 }, { "epoch": 2.79852958614965, "grad_norm": 0.9277406707585905, "learning_rate": 1.0925688411777632e-05, "loss": 0.1291, "step": 23600 }, { "epoch": 2.798648167911775, "grad_norm": 0.6156580063429085, "learning_rate": 1.092370463661764e-05, "loss": 0.0868, "step": 23601 }, { "epoch": 2.7987667496739004, "grad_norm": 0.60269906904038, "learning_rate": 1.0921720991223822e-05, "loss": 0.0904, "step": 23602 }, { "epoch": 2.798885331436025, "grad_norm": 0.5436340204633221, "learning_rate": 1.0919737475614473e-05, "loss": 0.0575, "step": 23603 }, { "epoch": 2.79900391319815, "grad_norm": 0.8232487731096371, "learning_rate": 1.0917754089807874e-05, "loss": 0.0897, "step": 23604 }, { "epoch": 2.799122494960275, "grad_norm": 0.6605622063574442, "learning_rate": 1.0915770833822326e-05, "loss": 0.0766, "step": 23605 }, { "epoch": 2.7992410767224003, "grad_norm": 0.646541173752105, "learning_rate": 1.091378770767609e-05, "loss": 0.0858, "step": 23606 }, { "epoch": 2.799359658484525, "grad_norm": 0.840584740131326, "learning_rate": 1.0911804711387458e-05, "loss": 0.1008, "step": 23607 }, { "epoch": 2.79947824024665, "grad_norm": 0.8095230039828296, "learning_rate": 1.0909821844974707e-05, "loss": 0.0764, "step": 23608 }, { "epoch": 2.799596822008775, "grad_norm": 0.8178376840722514, "learning_rate": 1.0907839108456126e-05, "loss": 0.1029, "step": 23609 }, { "epoch": 2.7997154037709002, "grad_norm": 0.5239948635349123, "learning_rate": 1.0905856501849982e-05, "loss": 0.0701, "step": 23610 }, { "epoch": 2.799833985533025, "grad_norm": 0.7574676074051285, "learning_rate": 1.0903874025174559e-05, "loss": 0.0941, "step": 23611 }, { "epoch": 2.7999525672951497, "grad_norm": 0.7261333076311026, "learning_rate": 1.0901891678448144e-05, "loss": 0.0873, "step": 23612 }, { "epoch": 2.800071149057275, "grad_norm": 0.8540561736683205, "learning_rate": 1.0899909461688988e-05, "loss": 0.1128, "step": 23613 }, { "epoch": 2.8001897308194, "grad_norm": 0.5872833913699418, "learning_rate": 1.0897927374915377e-05, "loss": 0.0697, "step": 23614 }, { "epoch": 2.800308312581525, "grad_norm": 0.8646428183632873, "learning_rate": 1.0895945418145582e-05, "loss": 0.0831, "step": 23615 }, { "epoch": 2.80042689434365, "grad_norm": 0.8210148451816569, "learning_rate": 1.0893963591397873e-05, "loss": 0.1126, "step": 23616 }, { "epoch": 2.800545476105775, "grad_norm": 0.5910364236606048, "learning_rate": 1.0891981894690534e-05, "loss": 0.0764, "step": 23617 }, { "epoch": 2.8006640578679, "grad_norm": 0.5781147982745879, "learning_rate": 1.0890000328041799e-05, "loss": 0.0674, "step": 23618 }, { "epoch": 2.800782639630025, "grad_norm": 1.0973523377642078, "learning_rate": 1.088801889146998e-05, "loss": 0.1484, "step": 23619 }, { "epoch": 2.80090122139215, "grad_norm": 0.7213777531717688, "learning_rate": 1.0886037584993312e-05, "loss": 0.1115, "step": 23620 }, { "epoch": 2.801019803154275, "grad_norm": 0.4645794690112459, "learning_rate": 1.0884056408630077e-05, "loss": 0.0691, "step": 23621 }, { "epoch": 2.8011383849164, "grad_norm": 0.6373183183657428, "learning_rate": 1.0882075362398517e-05, "loss": 0.1019, "step": 23622 }, { "epoch": 2.8012569666785248, "grad_norm": 0.7552400103226458, "learning_rate": 1.0880094446316926e-05, "loss": 0.117, "step": 23623 }, { "epoch": 2.80137554844065, "grad_norm": 0.6650974731140802, "learning_rate": 1.0878113660403538e-05, "loss": 0.0886, "step": 23624 }, { "epoch": 2.8014941302027747, "grad_norm": 0.5845377845118885, "learning_rate": 1.0876133004676628e-05, "loss": 0.068, "step": 23625 }, { "epoch": 2.8016127119649, "grad_norm": 0.5533974719294145, "learning_rate": 1.0874152479154451e-05, "loss": 0.0555, "step": 23626 }, { "epoch": 2.8017312937270247, "grad_norm": 0.6863687411654897, "learning_rate": 1.0872172083855265e-05, "loss": 0.1093, "step": 23627 }, { "epoch": 2.80184987548915, "grad_norm": 0.7972273560809497, "learning_rate": 1.0870191818797338e-05, "loss": 0.0983, "step": 23628 }, { "epoch": 2.8019684572512746, "grad_norm": 0.5375576909235069, "learning_rate": 1.0868211683998894e-05, "loss": 0.0723, "step": 23629 }, { "epoch": 2.8020870390134, "grad_norm": 0.958711652080157, "learning_rate": 1.086623167947823e-05, "loss": 0.1349, "step": 23630 }, { "epoch": 2.8022056207755246, "grad_norm": 0.6656647183436327, "learning_rate": 1.086425180525357e-05, "loss": 0.0861, "step": 23631 }, { "epoch": 2.80232420253765, "grad_norm": 0.5669532564067225, "learning_rate": 1.0862272061343179e-05, "loss": 0.0888, "step": 23632 }, { "epoch": 2.8024427842997746, "grad_norm": 0.7488855712734561, "learning_rate": 1.0860292447765289e-05, "loss": 0.1063, "step": 23633 }, { "epoch": 2.8025613660618998, "grad_norm": 0.9721107395624091, "learning_rate": 1.0858312964538178e-05, "loss": 0.1284, "step": 23634 }, { "epoch": 2.8026799478240245, "grad_norm": 0.5707992436210468, "learning_rate": 1.0856333611680072e-05, "loss": 0.1, "step": 23635 }, { "epoch": 2.8027985295861497, "grad_norm": 0.6267984360587457, "learning_rate": 1.0854354389209227e-05, "loss": 0.0828, "step": 23636 }, { "epoch": 2.8029171113482745, "grad_norm": 0.520678087724199, "learning_rate": 1.0852375297143886e-05, "loss": 0.0754, "step": 23637 }, { "epoch": 2.8030356931103997, "grad_norm": 0.7343831221722324, "learning_rate": 1.0850396335502298e-05, "loss": 0.1014, "step": 23638 }, { "epoch": 2.8031542748725244, "grad_norm": 0.8139350718119551, "learning_rate": 1.0848417504302711e-05, "loss": 0.1265, "step": 23639 }, { "epoch": 2.8032728566346496, "grad_norm": 0.5949760801230392, "learning_rate": 1.0846438803563344e-05, "loss": 0.0992, "step": 23640 }, { "epoch": 2.8033914383967744, "grad_norm": 0.5471308642825482, "learning_rate": 1.0844460233302472e-05, "loss": 0.0689, "step": 23641 }, { "epoch": 2.8035100201588996, "grad_norm": 0.6449651449761193, "learning_rate": 1.0842481793538308e-05, "loss": 0.0956, "step": 23642 }, { "epoch": 2.803628601921025, "grad_norm": 1.0445468472449098, "learning_rate": 1.0840503484289113e-05, "loss": 0.1158, "step": 23643 }, { "epoch": 2.8037471836831496, "grad_norm": 0.4654745456286987, "learning_rate": 1.0838525305573093e-05, "loss": 0.0746, "step": 23644 }, { "epoch": 2.8038657654452743, "grad_norm": 0.774971584590526, "learning_rate": 1.0836547257408522e-05, "loss": 0.1064, "step": 23645 }, { "epoch": 2.8039843472073995, "grad_norm": 0.527577052282642, "learning_rate": 1.0834569339813608e-05, "loss": 0.0703, "step": 23646 }, { "epoch": 2.8041029289695247, "grad_norm": 0.8828495131035237, "learning_rate": 1.0832591552806592e-05, "loss": 0.1296, "step": 23647 }, { "epoch": 2.8042215107316495, "grad_norm": 0.72114630096771, "learning_rate": 1.0830613896405712e-05, "loss": 0.0959, "step": 23648 }, { "epoch": 2.8043400924937743, "grad_norm": 0.8251797441412064, "learning_rate": 1.0828636370629195e-05, "loss": 0.1173, "step": 23649 }, { "epoch": 2.8044586742558995, "grad_norm": 0.5989928091688499, "learning_rate": 1.082665897549528e-05, "loss": 0.0944, "step": 23650 }, { "epoch": 2.8045772560180247, "grad_norm": 0.718926104974931, "learning_rate": 1.0824681711022173e-05, "loss": 0.1131, "step": 23651 }, { "epoch": 2.8046958377801494, "grad_norm": 0.8010455910014552, "learning_rate": 1.0822704577228132e-05, "loss": 0.1136, "step": 23652 }, { "epoch": 2.804814419542274, "grad_norm": 0.6147366708652549, "learning_rate": 1.0820727574131365e-05, "loss": 0.0977, "step": 23653 }, { "epoch": 2.8049330013043994, "grad_norm": 0.5099098582494804, "learning_rate": 1.0818750701750107e-05, "loss": 0.0518, "step": 23654 }, { "epoch": 2.8050515830665246, "grad_norm": 0.8255545181186152, "learning_rate": 1.0816773960102561e-05, "loss": 0.1248, "step": 23655 }, { "epoch": 2.8051701648286493, "grad_norm": 0.7403607768533896, "learning_rate": 1.0814797349206987e-05, "loss": 0.1004, "step": 23656 }, { "epoch": 2.805288746590774, "grad_norm": 1.3397210573473675, "learning_rate": 1.0812820869081575e-05, "loss": 0.193, "step": 23657 }, { "epoch": 2.8054073283528993, "grad_norm": 0.5807243093037502, "learning_rate": 1.0810844519744556e-05, "loss": 0.0764, "step": 23658 }, { "epoch": 2.8055259101150245, "grad_norm": 0.5527536720039181, "learning_rate": 1.080886830121415e-05, "loss": 0.0803, "step": 23659 }, { "epoch": 2.8056444918771493, "grad_norm": 1.0568281757681117, "learning_rate": 1.080689221350858e-05, "loss": 0.1413, "step": 23660 }, { "epoch": 2.805763073639274, "grad_norm": 0.5718982701524848, "learning_rate": 1.0804916256646066e-05, "loss": 0.0812, "step": 23661 }, { "epoch": 2.805881655401399, "grad_norm": 0.6653637133842821, "learning_rate": 1.0802940430644805e-05, "loss": 0.0958, "step": 23662 }, { "epoch": 2.8060002371635244, "grad_norm": 0.5082234330637397, "learning_rate": 1.0800964735523029e-05, "loss": 0.0751, "step": 23663 }, { "epoch": 2.806118818925649, "grad_norm": 0.6055972845365272, "learning_rate": 1.0798989171298943e-05, "loss": 0.0735, "step": 23664 }, { "epoch": 2.8062374006877744, "grad_norm": 0.8371353562794941, "learning_rate": 1.0797013737990763e-05, "loss": 0.0996, "step": 23665 }, { "epoch": 2.806355982449899, "grad_norm": 0.5262735412047529, "learning_rate": 1.07950384356167e-05, "loss": 0.0728, "step": 23666 }, { "epoch": 2.8064745642120243, "grad_norm": 0.6005316053736811, "learning_rate": 1.0793063264194964e-05, "loss": 0.1016, "step": 23667 }, { "epoch": 2.806593145974149, "grad_norm": 0.7639400924158317, "learning_rate": 1.079108822374377e-05, "loss": 0.0925, "step": 23668 }, { "epoch": 2.8067117277362743, "grad_norm": 0.527271588928279, "learning_rate": 1.078911331428131e-05, "loss": 0.0694, "step": 23669 }, { "epoch": 2.806830309498399, "grad_norm": 0.5480928865730844, "learning_rate": 1.07871385358258e-05, "loss": 0.071, "step": 23670 }, { "epoch": 2.8069488912605243, "grad_norm": 0.6586045119824193, "learning_rate": 1.0785163888395442e-05, "loss": 0.0891, "step": 23671 }, { "epoch": 2.807067473022649, "grad_norm": 0.7215217175697418, "learning_rate": 1.0783189372008451e-05, "loss": 0.1007, "step": 23672 }, { "epoch": 2.8071860547847742, "grad_norm": 0.782133800898967, "learning_rate": 1.0781214986683011e-05, "loss": 0.0987, "step": 23673 }, { "epoch": 2.807304636546899, "grad_norm": 0.8002464605416594, "learning_rate": 1.0779240732437334e-05, "loss": 0.1017, "step": 23674 }, { "epoch": 2.807423218309024, "grad_norm": 0.532112045824138, "learning_rate": 1.0777266609289618e-05, "loss": 0.0605, "step": 23675 }, { "epoch": 2.807541800071149, "grad_norm": 0.6816275583863695, "learning_rate": 1.0775292617258062e-05, "loss": 0.0827, "step": 23676 }, { "epoch": 2.807660381833274, "grad_norm": 0.47870374116648556, "learning_rate": 1.0773318756360872e-05, "loss": 0.0599, "step": 23677 }, { "epoch": 2.807778963595399, "grad_norm": 0.8903808901058717, "learning_rate": 1.0771345026616223e-05, "loss": 0.1193, "step": 23678 }, { "epoch": 2.807897545357524, "grad_norm": 0.3533418788836065, "learning_rate": 1.0769371428042341e-05, "loss": 0.0434, "step": 23679 }, { "epoch": 2.808016127119649, "grad_norm": 0.5469935267712775, "learning_rate": 1.0767397960657394e-05, "loss": 0.0646, "step": 23680 }, { "epoch": 2.808134708881774, "grad_norm": 0.49994491125418605, "learning_rate": 1.0765424624479587e-05, "loss": 0.0572, "step": 23681 }, { "epoch": 2.808253290643899, "grad_norm": 0.5009666179191757, "learning_rate": 1.0763451419527106e-05, "loss": 0.076, "step": 23682 }, { "epoch": 2.808371872406024, "grad_norm": 0.5893973054839963, "learning_rate": 1.0761478345818157e-05, "loss": 0.0632, "step": 23683 }, { "epoch": 2.808490454168149, "grad_norm": 0.8267550007647957, "learning_rate": 1.0759505403370906e-05, "loss": 0.0931, "step": 23684 }, { "epoch": 2.808609035930274, "grad_norm": 0.6121810270319018, "learning_rate": 1.0757532592203553e-05, "loss": 0.0746, "step": 23685 }, { "epoch": 2.8087276176923988, "grad_norm": 0.44152799289364647, "learning_rate": 1.0755559912334284e-05, "loss": 0.0578, "step": 23686 }, { "epoch": 2.808846199454524, "grad_norm": 0.5083699151601409, "learning_rate": 1.0753587363781287e-05, "loss": 0.0784, "step": 23687 }, { "epoch": 2.8089647812166487, "grad_norm": 0.9296603632191489, "learning_rate": 1.075161494656275e-05, "loss": 0.1084, "step": 23688 }, { "epoch": 2.809083362978774, "grad_norm": 0.545922447355714, "learning_rate": 1.0749642660696835e-05, "loss": 0.0875, "step": 23689 }, { "epoch": 2.8092019447408987, "grad_norm": 0.4859196231138172, "learning_rate": 1.0747670506201756e-05, "loss": 0.071, "step": 23690 }, { "epoch": 2.809320526503024, "grad_norm": 0.5356678804310886, "learning_rate": 1.074569848309567e-05, "loss": 0.0761, "step": 23691 }, { "epoch": 2.809439108265149, "grad_norm": 0.4278726637330649, "learning_rate": 1.0743726591396774e-05, "loss": 0.0505, "step": 23692 }, { "epoch": 2.809557690027274, "grad_norm": 0.6961396451125965, "learning_rate": 1.0741754831123216e-05, "loss": 0.1013, "step": 23693 }, { "epoch": 2.8096762717893986, "grad_norm": 0.565999601370747, "learning_rate": 1.0739783202293213e-05, "loss": 0.0859, "step": 23694 }, { "epoch": 2.809794853551524, "grad_norm": 0.5365043177921548, "learning_rate": 1.0737811704924913e-05, "loss": 0.0926, "step": 23695 }, { "epoch": 2.809913435313649, "grad_norm": 0.6093967209390238, "learning_rate": 1.0735840339036502e-05, "loss": 0.0827, "step": 23696 }, { "epoch": 2.8100320170757738, "grad_norm": 0.35227049625576856, "learning_rate": 1.0733869104646146e-05, "loss": 0.0506, "step": 23697 }, { "epoch": 2.8101505988378985, "grad_norm": 0.6514808812989781, "learning_rate": 1.0731898001772028e-05, "loss": 0.0828, "step": 23698 }, { "epoch": 2.8102691806000237, "grad_norm": 0.6883742556187223, "learning_rate": 1.072992703043232e-05, "loss": 0.1041, "step": 23699 }, { "epoch": 2.810387762362149, "grad_norm": 0.6151782912030235, "learning_rate": 1.0727956190645166e-05, "loss": 0.0695, "step": 23700 }, { "epoch": 2.8105063441242737, "grad_norm": 0.5914069983155515, "learning_rate": 1.0725985482428776e-05, "loss": 0.085, "step": 23701 }, { "epoch": 2.8106249258863985, "grad_norm": 0.8725170034629668, "learning_rate": 1.0724014905801285e-05, "loss": 0.0879, "step": 23702 }, { "epoch": 2.8107435076485237, "grad_norm": 0.5385494694886028, "learning_rate": 1.0722044460780877e-05, "loss": 0.0776, "step": 23703 }, { "epoch": 2.810862089410649, "grad_norm": 0.5054365529942443, "learning_rate": 1.0720074147385698e-05, "loss": 0.0638, "step": 23704 }, { "epoch": 2.8109806711727736, "grad_norm": 0.5635664815923033, "learning_rate": 1.0718103965633938e-05, "loss": 0.068, "step": 23705 }, { "epoch": 2.8110992529348984, "grad_norm": 0.8202549329097416, "learning_rate": 1.0716133915543738e-05, "loss": 0.1034, "step": 23706 }, { "epoch": 2.8112178346970236, "grad_norm": 0.549204177260597, "learning_rate": 1.0714163997133268e-05, "loss": 0.0847, "step": 23707 }, { "epoch": 2.811336416459149, "grad_norm": 0.5479892463444132, "learning_rate": 1.0712194210420684e-05, "loss": 0.0835, "step": 23708 }, { "epoch": 2.8114549982212735, "grad_norm": 0.37380656411235713, "learning_rate": 1.071022455542415e-05, "loss": 0.0526, "step": 23709 }, { "epoch": 2.8115735799833983, "grad_norm": 0.5256768672382078, "learning_rate": 1.0708255032161835e-05, "loss": 0.0592, "step": 23710 }, { "epoch": 2.8116921617455235, "grad_norm": 0.6411771931255557, "learning_rate": 1.0706285640651862e-05, "loss": 0.1009, "step": 23711 }, { "epoch": 2.8118107435076487, "grad_norm": 0.5526306259615674, "learning_rate": 1.0704316380912425e-05, "loss": 0.075, "step": 23712 }, { "epoch": 2.8119293252697735, "grad_norm": 0.559063765625672, "learning_rate": 1.070234725296165e-05, "loss": 0.0782, "step": 23713 }, { "epoch": 2.812047907031898, "grad_norm": 0.5172482279314841, "learning_rate": 1.0700378256817703e-05, "loss": 0.0614, "step": 23714 }, { "epoch": 2.8121664887940234, "grad_norm": 0.6180587409171935, "learning_rate": 1.0698409392498732e-05, "loss": 0.077, "step": 23715 }, { "epoch": 2.8122850705561486, "grad_norm": 0.35900193770008004, "learning_rate": 1.0696440660022891e-05, "loss": 0.045, "step": 23716 }, { "epoch": 2.8124036523182734, "grad_norm": 0.5969804957578965, "learning_rate": 1.0694472059408334e-05, "loss": 0.0981, "step": 23717 }, { "epoch": 2.8125222340803986, "grad_norm": 0.5532537311891125, "learning_rate": 1.0692503590673192e-05, "loss": 0.0729, "step": 23718 }, { "epoch": 2.8126408158425233, "grad_norm": 0.47378444978458045, "learning_rate": 1.0690535253835623e-05, "loss": 0.0577, "step": 23719 }, { "epoch": 2.8127593976046485, "grad_norm": 0.9832786362975712, "learning_rate": 1.0688567048913772e-05, "loss": 0.1194, "step": 23720 }, { "epoch": 2.8128779793667733, "grad_norm": 0.39003626401532515, "learning_rate": 1.0686598975925791e-05, "loss": 0.0451, "step": 23721 }, { "epoch": 2.8129965611288985, "grad_norm": 0.6310116300166011, "learning_rate": 1.0684631034889798e-05, "loss": 0.0852, "step": 23722 }, { "epoch": 2.8131151428910233, "grad_norm": 0.5792917084545122, "learning_rate": 1.0682663225823968e-05, "loss": 0.0822, "step": 23723 }, { "epoch": 2.8132337246531485, "grad_norm": 0.8808333066704965, "learning_rate": 1.068069554874642e-05, "loss": 0.1282, "step": 23724 }, { "epoch": 2.8133523064152732, "grad_norm": 0.5500673379993618, "learning_rate": 1.0678728003675298e-05, "loss": 0.0643, "step": 23725 }, { "epoch": 2.8134708881773984, "grad_norm": 0.5986926879055103, "learning_rate": 1.0676760590628742e-05, "loss": 0.0808, "step": 23726 }, { "epoch": 2.813589469939523, "grad_norm": 0.6058344137615969, "learning_rate": 1.0674793309624888e-05, "loss": 0.0811, "step": 23727 }, { "epoch": 2.8137080517016484, "grad_norm": 0.4916683179863628, "learning_rate": 1.0672826160681884e-05, "loss": 0.0679, "step": 23728 }, { "epoch": 2.813826633463773, "grad_norm": 1.0005823995978569, "learning_rate": 1.0670859143817844e-05, "loss": 0.1334, "step": 23729 }, { "epoch": 2.8139452152258984, "grad_norm": 0.6988423241344452, "learning_rate": 1.066889225905091e-05, "loss": 0.0736, "step": 23730 }, { "epoch": 2.814063796988023, "grad_norm": 0.4136759900734558, "learning_rate": 1.0666925506399216e-05, "loss": 0.0652, "step": 23731 }, { "epoch": 2.8141823787501483, "grad_norm": 0.4236904524496318, "learning_rate": 1.0664958885880903e-05, "loss": 0.0536, "step": 23732 }, { "epoch": 2.814300960512273, "grad_norm": 0.6226513308721846, "learning_rate": 1.066299239751408e-05, "loss": 0.0711, "step": 23733 }, { "epoch": 2.8144195422743983, "grad_norm": 0.8388667841518095, "learning_rate": 1.0661026041316884e-05, "loss": 0.1094, "step": 23734 }, { "epoch": 2.814538124036523, "grad_norm": 0.7167530924560225, "learning_rate": 1.0659059817307448e-05, "loss": 0.0815, "step": 23735 }, { "epoch": 2.8146567057986482, "grad_norm": 0.6720485987187659, "learning_rate": 1.0657093725503895e-05, "loss": 0.0788, "step": 23736 }, { "epoch": 2.814775287560773, "grad_norm": 0.7593678462661502, "learning_rate": 1.0655127765924345e-05, "loss": 0.1075, "step": 23737 }, { "epoch": 2.814893869322898, "grad_norm": 0.5897087396301391, "learning_rate": 1.0653161938586927e-05, "loss": 0.0705, "step": 23738 }, { "epoch": 2.815012451085023, "grad_norm": 0.8341244532461679, "learning_rate": 1.0651196243509776e-05, "loss": 0.0901, "step": 23739 }, { "epoch": 2.815131032847148, "grad_norm": 0.5407186851701997, "learning_rate": 1.0649230680710986e-05, "loss": 0.0659, "step": 23740 }, { "epoch": 2.8152496146092734, "grad_norm": 0.8161043796340677, "learning_rate": 1.0647265250208694e-05, "loss": 0.0983, "step": 23741 }, { "epoch": 2.815368196371398, "grad_norm": 0.7140660059971243, "learning_rate": 1.0645299952021014e-05, "loss": 0.104, "step": 23742 }, { "epoch": 2.815486778133523, "grad_norm": 0.49087150512896993, "learning_rate": 1.0643334786166076e-05, "loss": 0.0681, "step": 23743 }, { "epoch": 2.815605359895648, "grad_norm": 0.8356188787850176, "learning_rate": 1.0641369752661976e-05, "loss": 0.115, "step": 23744 }, { "epoch": 2.8157239416577733, "grad_norm": 0.7876714712439317, "learning_rate": 1.0639404851526841e-05, "loss": 0.1027, "step": 23745 }, { "epoch": 2.815842523419898, "grad_norm": 0.9126757999756093, "learning_rate": 1.063744008277878e-05, "loss": 0.0735, "step": 23746 }, { "epoch": 2.815961105182023, "grad_norm": 0.6564123676919913, "learning_rate": 1.0635475446435911e-05, "loss": 0.0608, "step": 23747 }, { "epoch": 2.816079686944148, "grad_norm": 0.6920715466905459, "learning_rate": 1.0633510942516354e-05, "loss": 0.0727, "step": 23748 }, { "epoch": 2.816198268706273, "grad_norm": 0.9903694015906713, "learning_rate": 1.0631546571038187e-05, "loss": 0.1059, "step": 23749 }, { "epoch": 2.816316850468398, "grad_norm": 0.46701950955176713, "learning_rate": 1.0629582332019561e-05, "loss": 0.0689, "step": 23750 }, { "epoch": 2.8164354322305227, "grad_norm": 1.0189882738286231, "learning_rate": 1.0627618225478555e-05, "loss": 0.147, "step": 23751 }, { "epoch": 2.816554013992648, "grad_norm": 0.47254083289986, "learning_rate": 1.0625654251433284e-05, "loss": 0.0509, "step": 23752 }, { "epoch": 2.816672595754773, "grad_norm": 0.6479723132800653, "learning_rate": 1.0623690409901854e-05, "loss": 0.0662, "step": 23753 }, { "epoch": 2.816791177516898, "grad_norm": 0.5373318702632125, "learning_rate": 1.0621726700902376e-05, "loss": 0.0763, "step": 23754 }, { "epoch": 2.8169097592790227, "grad_norm": 0.610831593268313, "learning_rate": 1.0619763124452938e-05, "loss": 0.076, "step": 23755 }, { "epoch": 2.817028341041148, "grad_norm": 0.532981798318027, "learning_rate": 1.0617799680571653e-05, "loss": 0.0822, "step": 23756 }, { "epoch": 2.817146922803273, "grad_norm": 0.7629447644766998, "learning_rate": 1.0615836369276613e-05, "loss": 0.0761, "step": 23757 }, { "epoch": 2.817265504565398, "grad_norm": 1.2951250239924097, "learning_rate": 1.0613873190585926e-05, "loss": 0.1426, "step": 23758 }, { "epoch": 2.8173840863275226, "grad_norm": 0.8801474157955219, "learning_rate": 1.0611910144517695e-05, "loss": 0.1342, "step": 23759 }, { "epoch": 2.8175026680896478, "grad_norm": 0.6659869296050666, "learning_rate": 1.0609947231089992e-05, "loss": 0.0903, "step": 23760 }, { "epoch": 2.817621249851773, "grad_norm": 0.6315948346896855, "learning_rate": 1.0607984450320945e-05, "loss": 0.0718, "step": 23761 }, { "epoch": 2.8177398316138977, "grad_norm": 0.722767127922329, "learning_rate": 1.0606021802228625e-05, "loss": 0.0922, "step": 23762 }, { "epoch": 2.8178584133760225, "grad_norm": 0.46189811612105286, "learning_rate": 1.0604059286831141e-05, "loss": 0.065, "step": 23763 }, { "epoch": 2.8179769951381477, "grad_norm": 0.8960723411924778, "learning_rate": 1.060209690414656e-05, "loss": 0.1283, "step": 23764 }, { "epoch": 2.818095576900273, "grad_norm": 0.8298899237661, "learning_rate": 1.0600134654193008e-05, "loss": 0.1093, "step": 23765 }, { "epoch": 2.8182141586623977, "grad_norm": 0.5024519822168046, "learning_rate": 1.0598172536988546e-05, "loss": 0.0542, "step": 23766 }, { "epoch": 2.818332740424523, "grad_norm": 0.7122989078983483, "learning_rate": 1.0596210552551272e-05, "loss": 0.079, "step": 23767 }, { "epoch": 2.8184513221866476, "grad_norm": 0.6833068626755471, "learning_rate": 1.0594248700899276e-05, "loss": 0.0988, "step": 23768 }, { "epoch": 2.818569903948773, "grad_norm": 0.5497625494926425, "learning_rate": 1.059228698205064e-05, "loss": 0.0716, "step": 23769 }, { "epoch": 2.8186884857108976, "grad_norm": 0.8164413689888779, "learning_rate": 1.0590325396023459e-05, "loss": 0.1008, "step": 23770 }, { "epoch": 2.818807067473023, "grad_norm": 0.4995471207546836, "learning_rate": 1.0588363942835791e-05, "loss": 0.0662, "step": 23771 }, { "epoch": 2.8189256492351475, "grad_norm": 0.5841296649727478, "learning_rate": 1.0586402622505751e-05, "loss": 0.0754, "step": 23772 }, { "epoch": 2.8190442309972727, "grad_norm": 0.8284505782431335, "learning_rate": 1.0584441435051396e-05, "loss": 0.0979, "step": 23773 }, { "epoch": 2.8191628127593975, "grad_norm": 0.4201820683835193, "learning_rate": 1.0582480380490811e-05, "loss": 0.0457, "step": 23774 }, { "epoch": 2.8192813945215227, "grad_norm": 0.5995308780864204, "learning_rate": 1.0580519458842078e-05, "loss": 0.0782, "step": 23775 }, { "epoch": 2.8193999762836475, "grad_norm": 0.6404690073452419, "learning_rate": 1.0578558670123276e-05, "loss": 0.0842, "step": 23776 }, { "epoch": 2.8195185580457727, "grad_norm": 0.45887211212098394, "learning_rate": 1.0576598014352485e-05, "loss": 0.063, "step": 23777 }, { "epoch": 2.8196371398078974, "grad_norm": 0.69404967733088, "learning_rate": 1.0574637491547763e-05, "loss": 0.0889, "step": 23778 }, { "epoch": 2.8197557215700226, "grad_norm": 0.9101310672331558, "learning_rate": 1.0572677101727197e-05, "loss": 0.1268, "step": 23779 }, { "epoch": 2.8198743033321474, "grad_norm": 0.7960243419549252, "learning_rate": 1.0570716844908854e-05, "loss": 0.1077, "step": 23780 }, { "epoch": 2.8199928850942726, "grad_norm": 0.4596722628108303, "learning_rate": 1.0568756721110814e-05, "loss": 0.0646, "step": 23781 }, { "epoch": 2.8201114668563974, "grad_norm": 0.4834225185285542, "learning_rate": 1.0566796730351126e-05, "loss": 0.0636, "step": 23782 }, { "epoch": 2.8202300486185226, "grad_norm": 0.441909699851818, "learning_rate": 1.056483687264789e-05, "loss": 0.0565, "step": 23783 }, { "epoch": 2.8203486303806473, "grad_norm": 0.5692772178731074, "learning_rate": 1.0562877148019149e-05, "loss": 0.0619, "step": 23784 }, { "epoch": 2.8204672121427725, "grad_norm": 0.47716008799432597, "learning_rate": 1.0560917556482975e-05, "loss": 0.058, "step": 23785 }, { "epoch": 2.8205857939048973, "grad_norm": 0.6725489606538804, "learning_rate": 1.0558958098057433e-05, "loss": 0.0782, "step": 23786 }, { "epoch": 2.8207043756670225, "grad_norm": 0.5935417824119965, "learning_rate": 1.055699877276059e-05, "loss": 0.0893, "step": 23787 }, { "epoch": 2.8208229574291472, "grad_norm": 0.7032940318462935, "learning_rate": 1.0555039580610517e-05, "loss": 0.0917, "step": 23788 }, { "epoch": 2.8209415391912724, "grad_norm": 0.7048237433454274, "learning_rate": 1.0553080521625255e-05, "loss": 0.1096, "step": 23789 }, { "epoch": 2.8210601209533976, "grad_norm": 0.7495004363018791, "learning_rate": 1.0551121595822877e-05, "loss": 0.1188, "step": 23790 }, { "epoch": 2.8211787027155224, "grad_norm": 0.9930260415469246, "learning_rate": 1.0549162803221439e-05, "loss": 0.127, "step": 23791 }, { "epoch": 2.821297284477647, "grad_norm": 0.6977639732683725, "learning_rate": 1.0547204143839007e-05, "loss": 0.1238, "step": 23792 }, { "epoch": 2.8214158662397724, "grad_norm": 0.8620960084532441, "learning_rate": 1.054524561769362e-05, "loss": 0.0999, "step": 23793 }, { "epoch": 2.8215344480018976, "grad_norm": 0.586802277715516, "learning_rate": 1.0543287224803344e-05, "loss": 0.0915, "step": 23794 }, { "epoch": 2.8216530297640223, "grad_norm": 0.8592279159023308, "learning_rate": 1.054132896518623e-05, "loss": 0.1107, "step": 23795 }, { "epoch": 2.821771611526147, "grad_norm": 0.9457237402408418, "learning_rate": 1.0539370838860335e-05, "loss": 0.1073, "step": 23796 }, { "epoch": 2.8218901932882723, "grad_norm": 0.6204454510924319, "learning_rate": 1.0537412845843708e-05, "loss": 0.082, "step": 23797 }, { "epoch": 2.8220087750503975, "grad_norm": 0.40056061428103235, "learning_rate": 1.0535454986154397e-05, "loss": 0.0525, "step": 23798 }, { "epoch": 2.8221273568125222, "grad_norm": 0.616289393127539, "learning_rate": 1.0533497259810465e-05, "loss": 0.0669, "step": 23799 }, { "epoch": 2.822245938574647, "grad_norm": 0.5516059698116464, "learning_rate": 1.0531539666829938e-05, "loss": 0.0701, "step": 23800 }, { "epoch": 2.822364520336772, "grad_norm": 0.8789717829046028, "learning_rate": 1.0529582207230874e-05, "loss": 0.1123, "step": 23801 }, { "epoch": 2.8224831020988974, "grad_norm": 0.494937314698148, "learning_rate": 1.0527624881031315e-05, "loss": 0.0514, "step": 23802 }, { "epoch": 2.822601683861022, "grad_norm": 0.7000342922817735, "learning_rate": 1.052566768824932e-05, "loss": 0.0925, "step": 23803 }, { "epoch": 2.822720265623147, "grad_norm": 0.6169464473824983, "learning_rate": 1.0523710628902905e-05, "loss": 0.0652, "step": 23804 }, { "epoch": 2.822838847385272, "grad_norm": 0.8127582116055794, "learning_rate": 1.0521753703010129e-05, "loss": 0.0916, "step": 23805 }, { "epoch": 2.8229574291473973, "grad_norm": 0.6205732562841709, "learning_rate": 1.051979691058903e-05, "loss": 0.0739, "step": 23806 }, { "epoch": 2.823076010909522, "grad_norm": 0.5609390435810635, "learning_rate": 1.0517840251657646e-05, "loss": 0.0884, "step": 23807 }, { "epoch": 2.823194592671647, "grad_norm": 0.6738246328870717, "learning_rate": 1.0515883726234017e-05, "loss": 0.0992, "step": 23808 }, { "epoch": 2.823313174433772, "grad_norm": 0.5118812142146344, "learning_rate": 1.0513927334336176e-05, "loss": 0.0597, "step": 23809 }, { "epoch": 2.8234317561958973, "grad_norm": 0.8427540923165535, "learning_rate": 1.0511971075982172e-05, "loss": 0.1026, "step": 23810 }, { "epoch": 2.823550337958022, "grad_norm": 0.6894978833955688, "learning_rate": 1.0510014951190016e-05, "loss": 0.0792, "step": 23811 }, { "epoch": 2.8236689197201468, "grad_norm": 0.9822146270141293, "learning_rate": 1.0508058959977757e-05, "loss": 0.1185, "step": 23812 }, { "epoch": 2.823787501482272, "grad_norm": 0.80181772410509, "learning_rate": 1.0506103102363424e-05, "loss": 0.1085, "step": 23813 }, { "epoch": 2.823906083244397, "grad_norm": 0.7185586803661643, "learning_rate": 1.0504147378365054e-05, "loss": 0.1051, "step": 23814 }, { "epoch": 2.824024665006522, "grad_norm": 0.6696530326421929, "learning_rate": 1.050219178800066e-05, "loss": 0.0794, "step": 23815 }, { "epoch": 2.824143246768647, "grad_norm": 0.8464557869025621, "learning_rate": 1.0500236331288279e-05, "loss": 0.0989, "step": 23816 }, { "epoch": 2.824261828530772, "grad_norm": 0.6995856007810035, "learning_rate": 1.0498281008245939e-05, "loss": 0.0876, "step": 23817 }, { "epoch": 2.824380410292897, "grad_norm": 0.6368290508054114, "learning_rate": 1.0496325818891665e-05, "loss": 0.0872, "step": 23818 }, { "epoch": 2.824498992055022, "grad_norm": 0.6169550566606034, "learning_rate": 1.0494370763243488e-05, "loss": 0.0889, "step": 23819 }, { "epoch": 2.824617573817147, "grad_norm": 0.619076235715699, "learning_rate": 1.049241584131941e-05, "loss": 0.0688, "step": 23820 }, { "epoch": 2.824736155579272, "grad_norm": 0.7520098744252404, "learning_rate": 1.0490461053137484e-05, "loss": 0.0851, "step": 23821 }, { "epoch": 2.824854737341397, "grad_norm": 0.5608413253767294, "learning_rate": 1.0488506398715706e-05, "loss": 0.0902, "step": 23822 }, { "epoch": 2.824973319103522, "grad_norm": 0.6450508415494578, "learning_rate": 1.0486551878072102e-05, "loss": 0.0855, "step": 23823 }, { "epoch": 2.825091900865647, "grad_norm": 0.9286545652755016, "learning_rate": 1.0484597491224692e-05, "loss": 0.1021, "step": 23824 }, { "epoch": 2.8252104826277717, "grad_norm": 0.8498673646080401, "learning_rate": 1.0482643238191495e-05, "loss": 0.1145, "step": 23825 }, { "epoch": 2.825329064389897, "grad_norm": 0.8883344213149525, "learning_rate": 1.0480689118990534e-05, "loss": 0.1127, "step": 23826 }, { "epoch": 2.8254476461520217, "grad_norm": 0.7525265827856628, "learning_rate": 1.0478735133639797e-05, "loss": 0.09, "step": 23827 }, { "epoch": 2.825566227914147, "grad_norm": 0.6367724667081325, "learning_rate": 1.047678128215733e-05, "loss": 0.0672, "step": 23828 }, { "epoch": 2.8256848096762717, "grad_norm": 0.7502386189321952, "learning_rate": 1.0474827564561124e-05, "loss": 0.0894, "step": 23829 }, { "epoch": 2.825803391438397, "grad_norm": 0.6622554884924331, "learning_rate": 1.0472873980869205e-05, "loss": 0.0991, "step": 23830 }, { "epoch": 2.8259219732005216, "grad_norm": 0.591359000566455, "learning_rate": 1.0470920531099554e-05, "loss": 0.0794, "step": 23831 }, { "epoch": 2.826040554962647, "grad_norm": 0.3076503251348632, "learning_rate": 1.0468967215270217e-05, "loss": 0.0435, "step": 23832 }, { "epoch": 2.8261591367247716, "grad_norm": 1.114059832626202, "learning_rate": 1.0467014033399175e-05, "loss": 0.1425, "step": 23833 }, { "epoch": 2.826277718486897, "grad_norm": 0.44655987779462797, "learning_rate": 1.0465060985504441e-05, "loss": 0.0601, "step": 23834 }, { "epoch": 2.8263963002490216, "grad_norm": 0.4834944469500781, "learning_rate": 1.0463108071604022e-05, "loss": 0.0599, "step": 23835 }, { "epoch": 2.8265148820111468, "grad_norm": 0.3989599496591911, "learning_rate": 1.046115529171592e-05, "loss": 0.0619, "step": 23836 }, { "epoch": 2.8266334637732715, "grad_norm": 1.1756868264574083, "learning_rate": 1.0459202645858147e-05, "loss": 0.1007, "step": 23837 }, { "epoch": 2.8267520455353967, "grad_norm": 0.6045675676316454, "learning_rate": 1.0457250134048673e-05, "loss": 0.0756, "step": 23838 }, { "epoch": 2.8268706272975215, "grad_norm": 0.47017971613045006, "learning_rate": 1.0455297756305544e-05, "loss": 0.0704, "step": 23839 }, { "epoch": 2.8269892090596467, "grad_norm": 0.6654461003755512, "learning_rate": 1.045334551264672e-05, "loss": 0.0697, "step": 23840 }, { "epoch": 2.8271077908217714, "grad_norm": 1.0354167798014329, "learning_rate": 1.045139340309022e-05, "loss": 0.1403, "step": 23841 }, { "epoch": 2.8272263725838966, "grad_norm": 0.568594961029302, "learning_rate": 1.0449441427654017e-05, "loss": 0.0834, "step": 23842 }, { "epoch": 2.827344954346022, "grad_norm": 0.8309336289684611, "learning_rate": 1.0447489586356138e-05, "loss": 0.1019, "step": 23843 }, { "epoch": 2.8274635361081466, "grad_norm": 0.6274543938531099, "learning_rate": 1.0445537879214551e-05, "loss": 0.0716, "step": 23844 }, { "epoch": 2.8275821178702714, "grad_norm": 0.657835015360919, "learning_rate": 1.0443586306247255e-05, "loss": 0.0839, "step": 23845 }, { "epoch": 2.8277006996323966, "grad_norm": 0.5293494906357262, "learning_rate": 1.0441634867472243e-05, "loss": 0.0814, "step": 23846 }, { "epoch": 2.8278192813945218, "grad_norm": 0.46574178139668837, "learning_rate": 1.043968356290751e-05, "loss": 0.0615, "step": 23847 }, { "epoch": 2.8279378631566465, "grad_norm": 0.7286369463435364, "learning_rate": 1.0437732392571042e-05, "loss": 0.0824, "step": 23848 }, { "epoch": 2.8280564449187713, "grad_norm": 0.5752693349648198, "learning_rate": 1.043578135648082e-05, "loss": 0.0738, "step": 23849 }, { "epoch": 2.8281750266808965, "grad_norm": 0.6043140416185935, "learning_rate": 1.043383045465483e-05, "loss": 0.0676, "step": 23850 }, { "epoch": 2.8282936084430217, "grad_norm": 0.47594748481235505, "learning_rate": 1.0431879687111065e-05, "loss": 0.0571, "step": 23851 }, { "epoch": 2.8284121902051464, "grad_norm": 0.6635051706046304, "learning_rate": 1.042992905386751e-05, "loss": 0.0818, "step": 23852 }, { "epoch": 2.828530771967271, "grad_norm": 0.5951863375140738, "learning_rate": 1.0427978554942125e-05, "loss": 0.0741, "step": 23853 }, { "epoch": 2.8286493537293964, "grad_norm": 0.9616685634313061, "learning_rate": 1.042602819035293e-05, "loss": 0.1182, "step": 23854 }, { "epoch": 2.8287679354915216, "grad_norm": 0.7105290180572675, "learning_rate": 1.042407796011787e-05, "loss": 0.0881, "step": 23855 }, { "epoch": 2.8288865172536464, "grad_norm": 0.6122452345503079, "learning_rate": 1.0422127864254938e-05, "loss": 0.0879, "step": 23856 }, { "epoch": 2.829005099015771, "grad_norm": 0.5758624624548543, "learning_rate": 1.042017790278211e-05, "loss": 0.0723, "step": 23857 }, { "epoch": 2.8291236807778963, "grad_norm": 0.8791302746185502, "learning_rate": 1.0418228075717366e-05, "loss": 0.0932, "step": 23858 }, { "epoch": 2.8292422625400215, "grad_norm": 0.9570143430106155, "learning_rate": 1.041627838307868e-05, "loss": 0.1061, "step": 23859 }, { "epoch": 2.8293608443021463, "grad_norm": 0.654801565816076, "learning_rate": 1.0414328824884021e-05, "loss": 0.0871, "step": 23860 }, { "epoch": 2.829479426064271, "grad_norm": 0.9440362281955649, "learning_rate": 1.0412379401151363e-05, "loss": 0.1184, "step": 23861 }, { "epoch": 2.8295980078263963, "grad_norm": 0.5993550330116183, "learning_rate": 1.0410430111898678e-05, "loss": 0.0732, "step": 23862 }, { "epoch": 2.8297165895885215, "grad_norm": 0.7161942488625683, "learning_rate": 1.0408480957143943e-05, "loss": 0.0775, "step": 23863 }, { "epoch": 2.829835171350646, "grad_norm": 0.8174603046694873, "learning_rate": 1.0406531936905114e-05, "loss": 0.1148, "step": 23864 }, { "epoch": 2.8299537531127714, "grad_norm": 0.5752944660450884, "learning_rate": 1.0404583051200164e-05, "loss": 0.0732, "step": 23865 }, { "epoch": 2.830072334874896, "grad_norm": 0.5389350007159895, "learning_rate": 1.040263430004706e-05, "loss": 0.0845, "step": 23866 }, { "epoch": 2.8301909166370214, "grad_norm": 0.5715876327420878, "learning_rate": 1.0400685683463768e-05, "loss": 0.0838, "step": 23867 }, { "epoch": 2.830309498399146, "grad_norm": 0.6762715475690781, "learning_rate": 1.039873720146825e-05, "loss": 0.0899, "step": 23868 }, { "epoch": 2.8304280801612713, "grad_norm": 0.7409993526630868, "learning_rate": 1.0396788854078471e-05, "loss": 0.0946, "step": 23869 }, { "epoch": 2.830546661923396, "grad_norm": 0.831702385202434, "learning_rate": 1.0394840641312397e-05, "loss": 0.1216, "step": 23870 }, { "epoch": 2.8306652436855213, "grad_norm": 0.42206352145507037, "learning_rate": 1.0392892563187978e-05, "loss": 0.0544, "step": 23871 }, { "epoch": 2.830783825447646, "grad_norm": 0.8437299822058303, "learning_rate": 1.0390944619723175e-05, "loss": 0.0816, "step": 23872 }, { "epoch": 2.8309024072097713, "grad_norm": 0.8507037642354829, "learning_rate": 1.0388996810935948e-05, "loss": 0.1274, "step": 23873 }, { "epoch": 2.831020988971896, "grad_norm": 0.6708538851759137, "learning_rate": 1.038704913684425e-05, "loss": 0.0829, "step": 23874 }, { "epoch": 2.8311395707340212, "grad_norm": 0.9315242396404548, "learning_rate": 1.0385101597466054e-05, "loss": 0.1083, "step": 23875 }, { "epoch": 2.831258152496146, "grad_norm": 0.8169635478804586, "learning_rate": 1.0383154192819278e-05, "loss": 0.1144, "step": 23876 }, { "epoch": 2.831376734258271, "grad_norm": 0.5341460281162749, "learning_rate": 1.0381206922921915e-05, "loss": 0.0657, "step": 23877 }, { "epoch": 2.831495316020396, "grad_norm": 0.6208984587473538, "learning_rate": 1.0379259787791887e-05, "loss": 0.0681, "step": 23878 }, { "epoch": 2.831613897782521, "grad_norm": 0.5643034136665415, "learning_rate": 1.0377312787447157e-05, "loss": 0.0743, "step": 23879 }, { "epoch": 2.831732479544646, "grad_norm": 0.5953667677463591, "learning_rate": 1.0375365921905669e-05, "loss": 0.0928, "step": 23880 }, { "epoch": 2.831851061306771, "grad_norm": 0.6858551655467482, "learning_rate": 1.0373419191185385e-05, "loss": 0.0797, "step": 23881 }, { "epoch": 2.831969643068896, "grad_norm": 0.8084799155212737, "learning_rate": 1.037147259530423e-05, "loss": 0.1146, "step": 23882 }, { "epoch": 2.832088224831021, "grad_norm": 0.45192986787898004, "learning_rate": 1.0369526134280156e-05, "loss": 0.0603, "step": 23883 }, { "epoch": 2.832206806593146, "grad_norm": 0.7441417646847788, "learning_rate": 1.0367579808131112e-05, "loss": 0.0949, "step": 23884 }, { "epoch": 2.832325388355271, "grad_norm": 0.5514670166810305, "learning_rate": 1.0365633616875039e-05, "loss": 0.0875, "step": 23885 }, { "epoch": 2.832443970117396, "grad_norm": 0.6724699922309714, "learning_rate": 1.0363687560529889e-05, "loss": 0.0882, "step": 23886 }, { "epoch": 2.832562551879521, "grad_norm": 0.5255535645941433, "learning_rate": 1.036174163911357e-05, "loss": 0.0677, "step": 23887 }, { "epoch": 2.8326811336416458, "grad_norm": 0.514037029291515, "learning_rate": 1.0359795852644063e-05, "loss": 0.061, "step": 23888 }, { "epoch": 2.832799715403771, "grad_norm": 0.5272120367132883, "learning_rate": 1.0357850201139274e-05, "loss": 0.0737, "step": 23889 }, { "epoch": 2.8329182971658957, "grad_norm": 0.637955102389592, "learning_rate": 1.0355904684617162e-05, "loss": 0.089, "step": 23890 }, { "epoch": 2.833036878928021, "grad_norm": 0.7253321648041352, "learning_rate": 1.0353959303095631e-05, "loss": 0.0988, "step": 23891 }, { "epoch": 2.833155460690146, "grad_norm": 0.4974976978143321, "learning_rate": 1.0352014056592655e-05, "loss": 0.0519, "step": 23892 }, { "epoch": 2.833274042452271, "grad_norm": 0.6558966894672918, "learning_rate": 1.0350068945126137e-05, "loss": 0.0967, "step": 23893 }, { "epoch": 2.8333926242143956, "grad_norm": 0.5273862134161543, "learning_rate": 1.0348123968714019e-05, "loss": 0.0773, "step": 23894 }, { "epoch": 2.833511205976521, "grad_norm": 0.6460019524701236, "learning_rate": 1.0346179127374228e-05, "loss": 0.0799, "step": 23895 }, { "epoch": 2.833629787738646, "grad_norm": 0.39327055331106797, "learning_rate": 1.03442344211247e-05, "loss": 0.0486, "step": 23896 }, { "epoch": 2.833748369500771, "grad_norm": 0.5812844941222087, "learning_rate": 1.0342289849983364e-05, "loss": 0.0832, "step": 23897 }, { "epoch": 2.8338669512628956, "grad_norm": 0.7535222003266849, "learning_rate": 1.0340345413968125e-05, "loss": 0.0858, "step": 23898 }, { "epoch": 2.8339855330250208, "grad_norm": 0.6758957107772973, "learning_rate": 1.033840111309694e-05, "loss": 0.0831, "step": 23899 }, { "epoch": 2.834104114787146, "grad_norm": 0.7164260731239147, "learning_rate": 1.0336456947387713e-05, "loss": 0.0946, "step": 23900 }, { "epoch": 2.8342226965492707, "grad_norm": 0.7079710433109707, "learning_rate": 1.0334512916858378e-05, "loss": 0.1083, "step": 23901 }, { "epoch": 2.8343412783113955, "grad_norm": 0.6393998190984466, "learning_rate": 1.0332569021526831e-05, "loss": 0.0803, "step": 23902 }, { "epoch": 2.8344598600735207, "grad_norm": 0.4744838088730723, "learning_rate": 1.0330625261411028e-05, "loss": 0.0699, "step": 23903 }, { "epoch": 2.834578441835646, "grad_norm": 1.1858049455653026, "learning_rate": 1.0328681636528864e-05, "loss": 0.1474, "step": 23904 }, { "epoch": 2.8346970235977706, "grad_norm": 0.8535376442582071, "learning_rate": 1.0326738146898266e-05, "loss": 0.1151, "step": 23905 }, { "epoch": 2.8348156053598954, "grad_norm": 0.6263769286788688, "learning_rate": 1.0324794792537146e-05, "loss": 0.0987, "step": 23906 }, { "epoch": 2.8349341871220206, "grad_norm": 0.5796860149290024, "learning_rate": 1.0322851573463425e-05, "loss": 0.0699, "step": 23907 }, { "epoch": 2.835052768884146, "grad_norm": 0.5089261828270222, "learning_rate": 1.0320908489695019e-05, "loss": 0.0782, "step": 23908 }, { "epoch": 2.8351713506462706, "grad_norm": 0.5969611375505518, "learning_rate": 1.0318965541249823e-05, "loss": 0.0629, "step": 23909 }, { "epoch": 2.8352899324083953, "grad_norm": 0.48131237264155025, "learning_rate": 1.0317022728145776e-05, "loss": 0.0653, "step": 23910 }, { "epoch": 2.8354085141705205, "grad_norm": 0.7549415555820216, "learning_rate": 1.0315080050400767e-05, "loss": 0.1028, "step": 23911 }, { "epoch": 2.8355270959326457, "grad_norm": 0.6088014840042304, "learning_rate": 1.0313137508032719e-05, "loss": 0.0829, "step": 23912 }, { "epoch": 2.8356456776947705, "grad_norm": 0.6381406253988157, "learning_rate": 1.0311195101059515e-05, "loss": 0.1005, "step": 23913 }, { "epoch": 2.8357642594568953, "grad_norm": 0.8612422583145198, "learning_rate": 1.03092528294991e-05, "loss": 0.1367, "step": 23914 }, { "epoch": 2.8358828412190205, "grad_norm": 0.3828778965057416, "learning_rate": 1.0307310693369349e-05, "loss": 0.048, "step": 23915 }, { "epoch": 2.8360014229811457, "grad_norm": 0.8292028013196637, "learning_rate": 1.0305368692688174e-05, "loss": 0.1068, "step": 23916 }, { "epoch": 2.8361200047432704, "grad_norm": 0.7441650198310613, "learning_rate": 1.030342682747348e-05, "loss": 0.0889, "step": 23917 }, { "epoch": 2.8362385865053956, "grad_norm": 0.6611667484493016, "learning_rate": 1.0301485097743172e-05, "loss": 0.0823, "step": 23918 }, { "epoch": 2.8363571682675204, "grad_norm": 0.680203172577958, "learning_rate": 1.0299543503515152e-05, "loss": 0.1043, "step": 23919 }, { "epoch": 2.8364757500296456, "grad_norm": 0.7839093428030047, "learning_rate": 1.029760204480731e-05, "loss": 0.125, "step": 23920 }, { "epoch": 2.8365943317917703, "grad_norm": 0.6266849957507009, "learning_rate": 1.0295660721637544e-05, "loss": 0.0865, "step": 23921 }, { "epoch": 2.8367129135538955, "grad_norm": 0.48774326787941236, "learning_rate": 1.0293719534023754e-05, "loss": 0.0665, "step": 23922 }, { "epoch": 2.8368314953160203, "grad_norm": 0.6157500529971958, "learning_rate": 1.0291778481983838e-05, "loss": 0.0793, "step": 23923 }, { "epoch": 2.8369500770781455, "grad_norm": 0.5380836387268747, "learning_rate": 1.0289837565535686e-05, "loss": 0.0738, "step": 23924 }, { "epoch": 2.8370686588402703, "grad_norm": 0.7066844349741134, "learning_rate": 1.0287896784697193e-05, "loss": 0.0613, "step": 23925 }, { "epoch": 2.8371872406023955, "grad_norm": 0.4448555396475084, "learning_rate": 1.0285956139486261e-05, "loss": 0.0567, "step": 23926 }, { "epoch": 2.8373058223645202, "grad_norm": 0.502845026498411, "learning_rate": 1.0284015629920762e-05, "loss": 0.0711, "step": 23927 }, { "epoch": 2.8374244041266454, "grad_norm": 0.9194352149490268, "learning_rate": 1.028207525601859e-05, "loss": 0.1157, "step": 23928 }, { "epoch": 2.83754298588877, "grad_norm": 0.8148818434841729, "learning_rate": 1.0280135017797637e-05, "loss": 0.1016, "step": 23929 }, { "epoch": 2.8376615676508954, "grad_norm": 0.44804442941584, "learning_rate": 1.02781949152758e-05, "loss": 0.0551, "step": 23930 }, { "epoch": 2.83778014941302, "grad_norm": 0.8932978188866842, "learning_rate": 1.0276254948470939e-05, "loss": 0.0941, "step": 23931 }, { "epoch": 2.8378987311751454, "grad_norm": 0.5539383469381187, "learning_rate": 1.0274315117400954e-05, "loss": 0.0683, "step": 23932 }, { "epoch": 2.83801731293727, "grad_norm": 0.47841123887840625, "learning_rate": 1.0272375422083728e-05, "loss": 0.0625, "step": 23933 }, { "epoch": 2.8381358946993953, "grad_norm": 0.6250223569734594, "learning_rate": 1.0270435862537136e-05, "loss": 0.0868, "step": 23934 }, { "epoch": 2.83825447646152, "grad_norm": 0.6386409162286979, "learning_rate": 1.0268496438779075e-05, "loss": 0.087, "step": 23935 }, { "epoch": 2.8383730582236453, "grad_norm": 0.7736975855041484, "learning_rate": 1.0266557150827394e-05, "loss": 0.0948, "step": 23936 }, { "epoch": 2.83849163998577, "grad_norm": 0.5697347286669494, "learning_rate": 1.0264617998700005e-05, "loss": 0.0732, "step": 23937 }, { "epoch": 2.8386102217478952, "grad_norm": 0.53808592094386, "learning_rate": 1.0262678982414761e-05, "loss": 0.074, "step": 23938 }, { "epoch": 2.83872880351002, "grad_norm": 0.47576149678951407, "learning_rate": 1.0260740101989544e-05, "loss": 0.0639, "step": 23939 }, { "epoch": 2.838847385272145, "grad_norm": 0.5040723987323494, "learning_rate": 1.0258801357442227e-05, "loss": 0.0603, "step": 23940 }, { "epoch": 2.8389659670342704, "grad_norm": 0.7002232679513348, "learning_rate": 1.0256862748790696e-05, "loss": 0.0768, "step": 23941 }, { "epoch": 2.839084548796395, "grad_norm": 0.5346125102183429, "learning_rate": 1.0254924276052803e-05, "loss": 0.075, "step": 23942 }, { "epoch": 2.83920313055852, "grad_norm": 0.7389926323608602, "learning_rate": 1.0252985939246423e-05, "loss": 0.1082, "step": 23943 }, { "epoch": 2.839321712320645, "grad_norm": 0.878076152691137, "learning_rate": 1.025104773838943e-05, "loss": 0.0915, "step": 23944 }, { "epoch": 2.8394402940827703, "grad_norm": 0.3806550559911846, "learning_rate": 1.024910967349969e-05, "loss": 0.0461, "step": 23945 }, { "epoch": 2.839558875844895, "grad_norm": 0.47838596822270063, "learning_rate": 1.0247171744595083e-05, "loss": 0.0664, "step": 23946 }, { "epoch": 2.83967745760702, "grad_norm": 0.5014663678948579, "learning_rate": 1.024523395169344e-05, "loss": 0.0662, "step": 23947 }, { "epoch": 2.839796039369145, "grad_norm": 0.5569509765815179, "learning_rate": 1.0243296294812665e-05, "loss": 0.0719, "step": 23948 }, { "epoch": 2.8399146211312702, "grad_norm": 0.4149761871504751, "learning_rate": 1.0241358773970594e-05, "loss": 0.0486, "step": 23949 }, { "epoch": 2.840033202893395, "grad_norm": 0.91323786596332, "learning_rate": 1.0239421389185105e-05, "loss": 0.1125, "step": 23950 }, { "epoch": 2.8401517846555198, "grad_norm": 0.5223051657930876, "learning_rate": 1.0237484140474032e-05, "loss": 0.0618, "step": 23951 }, { "epoch": 2.840270366417645, "grad_norm": 0.6121187230327332, "learning_rate": 1.0235547027855272e-05, "loss": 0.0642, "step": 23952 }, { "epoch": 2.84038894817977, "grad_norm": 0.9551302415132396, "learning_rate": 1.0233610051346651e-05, "loss": 0.1016, "step": 23953 }, { "epoch": 2.840507529941895, "grad_norm": 0.532602163258562, "learning_rate": 1.023167321096604e-05, "loss": 0.0743, "step": 23954 }, { "epoch": 2.8406261117040197, "grad_norm": 0.5411833206582843, "learning_rate": 1.022973650673129e-05, "loss": 0.0686, "step": 23955 }, { "epoch": 2.840744693466145, "grad_norm": 0.46044813890557656, "learning_rate": 1.022779993866026e-05, "loss": 0.0653, "step": 23956 }, { "epoch": 2.84086327522827, "grad_norm": 0.4707832520305943, "learning_rate": 1.0225863506770805e-05, "loss": 0.049, "step": 23957 }, { "epoch": 2.840981856990395, "grad_norm": 0.7903645244101684, "learning_rate": 1.0223927211080755e-05, "loss": 0.1019, "step": 23958 }, { "epoch": 2.8411004387525196, "grad_norm": 0.7070056403405238, "learning_rate": 1.0221991051607996e-05, "loss": 0.0989, "step": 23959 }, { "epoch": 2.841219020514645, "grad_norm": 0.5838097140353159, "learning_rate": 1.0220055028370346e-05, "loss": 0.0734, "step": 23960 }, { "epoch": 2.84133760227677, "grad_norm": 0.8197295434400714, "learning_rate": 1.0218119141385674e-05, "loss": 0.1043, "step": 23961 }, { "epoch": 2.8414561840388948, "grad_norm": 0.5832662602142006, "learning_rate": 1.02161833906718e-05, "loss": 0.0673, "step": 23962 }, { "epoch": 2.8415747658010195, "grad_norm": 0.6250810776504498, "learning_rate": 1.0214247776246602e-05, "loss": 0.0651, "step": 23963 }, { "epoch": 2.8416933475631447, "grad_norm": 0.7453008462835679, "learning_rate": 1.0212312298127901e-05, "loss": 0.1017, "step": 23964 }, { "epoch": 2.84181192932527, "grad_norm": 0.8207908539945641, "learning_rate": 1.0210376956333544e-05, "loss": 0.0949, "step": 23965 }, { "epoch": 2.8419305110873947, "grad_norm": 0.5801251463945364, "learning_rate": 1.0208441750881378e-05, "loss": 0.0744, "step": 23966 }, { "epoch": 2.84204909284952, "grad_norm": 0.5280818153748221, "learning_rate": 1.0206506681789241e-05, "loss": 0.0722, "step": 23967 }, { "epoch": 2.8421676746116447, "grad_norm": 0.6006366351068599, "learning_rate": 1.0204571749074979e-05, "loss": 0.064, "step": 23968 }, { "epoch": 2.84228625637377, "grad_norm": 0.6410921227420314, "learning_rate": 1.0202636952756403e-05, "loss": 0.0923, "step": 23969 }, { "epoch": 2.8424048381358946, "grad_norm": 0.45675159469623716, "learning_rate": 1.0200702292851388e-05, "loss": 0.0535, "step": 23970 }, { "epoch": 2.84252341989802, "grad_norm": 0.4798903718979398, "learning_rate": 1.019876776937774e-05, "loss": 0.064, "step": 23971 }, { "epoch": 2.8426420016601446, "grad_norm": 0.8584794018575118, "learning_rate": 1.0196833382353304e-05, "loss": 0.1069, "step": 23972 }, { "epoch": 2.84276058342227, "grad_norm": 0.6174608486911981, "learning_rate": 1.019489913179591e-05, "loss": 0.0699, "step": 23973 }, { "epoch": 2.8428791651843945, "grad_norm": 0.5334232897111003, "learning_rate": 1.0192965017723392e-05, "loss": 0.0667, "step": 23974 }, { "epoch": 2.8429977469465197, "grad_norm": 0.4602354000315843, "learning_rate": 1.0191031040153588e-05, "loss": 0.066, "step": 23975 }, { "epoch": 2.8431163287086445, "grad_norm": 0.5889784314633367, "learning_rate": 1.0189097199104305e-05, "loss": 0.0726, "step": 23976 }, { "epoch": 2.8432349104707697, "grad_norm": 0.8190642171121745, "learning_rate": 1.0187163494593388e-05, "loss": 0.1093, "step": 23977 }, { "epoch": 2.8433534922328945, "grad_norm": 0.7146812654919443, "learning_rate": 1.0185229926638657e-05, "loss": 0.0931, "step": 23978 }, { "epoch": 2.8434720739950197, "grad_norm": 0.635223554146891, "learning_rate": 1.0183296495257949e-05, "loss": 0.0665, "step": 23979 }, { "epoch": 2.8435906557571444, "grad_norm": 0.9419932088698812, "learning_rate": 1.018136320046906e-05, "loss": 0.1189, "step": 23980 }, { "epoch": 2.8437092375192696, "grad_norm": 0.3987769658698771, "learning_rate": 1.0179430042289848e-05, "loss": 0.0555, "step": 23981 }, { "epoch": 2.8438278192813944, "grad_norm": 0.6070125107854947, "learning_rate": 1.0177497020738105e-05, "loss": 0.0787, "step": 23982 }, { "epoch": 2.8439464010435196, "grad_norm": 0.6748796027164554, "learning_rate": 1.0175564135831667e-05, "loss": 0.0957, "step": 23983 }, { "epoch": 2.8440649828056443, "grad_norm": 0.5518636857473262, "learning_rate": 1.0173631387588348e-05, "loss": 0.0587, "step": 23984 }, { "epoch": 2.8441835645677696, "grad_norm": 1.1040740114174112, "learning_rate": 1.0171698776025964e-05, "loss": 0.1576, "step": 23985 }, { "epoch": 2.8443021463298943, "grad_norm": 0.4997275720569531, "learning_rate": 1.0169766301162345e-05, "loss": 0.0617, "step": 23986 }, { "epoch": 2.8444207280920195, "grad_norm": 0.6386900406121399, "learning_rate": 1.0167833963015286e-05, "loss": 0.0969, "step": 23987 }, { "epoch": 2.8445393098541443, "grad_norm": 0.5844213818357292, "learning_rate": 1.0165901761602608e-05, "loss": 0.079, "step": 23988 }, { "epoch": 2.8446578916162695, "grad_norm": 0.5508786141029828, "learning_rate": 1.0163969696942124e-05, "loss": 0.0714, "step": 23989 }, { "epoch": 2.8447764733783947, "grad_norm": 0.9247235193753938, "learning_rate": 1.0162037769051658e-05, "loss": 0.1466, "step": 23990 }, { "epoch": 2.8448950551405194, "grad_norm": 0.7796931044349732, "learning_rate": 1.0160105977948997e-05, "loss": 0.0914, "step": 23991 }, { "epoch": 2.845013636902644, "grad_norm": 0.6409777536421586, "learning_rate": 1.0158174323651961e-05, "loss": 0.0807, "step": 23992 }, { "epoch": 2.8451322186647694, "grad_norm": 0.5449501319610986, "learning_rate": 1.0156242806178359e-05, "loss": 0.0712, "step": 23993 }, { "epoch": 2.8452508004268946, "grad_norm": 0.5568931224460306, "learning_rate": 1.0154311425545995e-05, "loss": 0.0836, "step": 23994 }, { "epoch": 2.8453693821890194, "grad_norm": 0.6567089239148143, "learning_rate": 1.0152380181772676e-05, "loss": 0.0793, "step": 23995 }, { "epoch": 2.845487963951144, "grad_norm": 0.5041530108061812, "learning_rate": 1.0150449074876202e-05, "loss": 0.0618, "step": 23996 }, { "epoch": 2.8456065457132693, "grad_norm": 0.5465825745389942, "learning_rate": 1.0148518104874389e-05, "loss": 0.081, "step": 23997 }, { "epoch": 2.8457251274753945, "grad_norm": 0.7246228507703061, "learning_rate": 1.0146587271785015e-05, "loss": 0.1046, "step": 23998 }, { "epoch": 2.8458437092375193, "grad_norm": 0.5051293376784369, "learning_rate": 1.0144656575625894e-05, "loss": 0.0597, "step": 23999 }, { "epoch": 2.845962290999644, "grad_norm": 1.013033743988622, "learning_rate": 1.0142726016414822e-05, "loss": 0.1077, "step": 24000 }, { "epoch": 2.8460808727617692, "grad_norm": 0.6252876471345057, "learning_rate": 1.0140795594169608e-05, "loss": 0.0861, "step": 24001 }, { "epoch": 2.8461994545238944, "grad_norm": 0.6675624777836318, "learning_rate": 1.0138865308908025e-05, "loss": 0.0803, "step": 24002 }, { "epoch": 2.846318036286019, "grad_norm": 0.591505920674996, "learning_rate": 1.0136935160647881e-05, "loss": 0.0786, "step": 24003 }, { "epoch": 2.846436618048144, "grad_norm": 0.8704919388292923, "learning_rate": 1.013500514940697e-05, "loss": 0.123, "step": 24004 }, { "epoch": 2.846555199810269, "grad_norm": 1.072829563947362, "learning_rate": 1.013307527520308e-05, "loss": 0.129, "step": 24005 }, { "epoch": 2.8466737815723944, "grad_norm": 0.45592659702893057, "learning_rate": 1.0131145538054015e-05, "loss": 0.0614, "step": 24006 }, { "epoch": 2.846792363334519, "grad_norm": 0.8146596288953463, "learning_rate": 1.012921593797754e-05, "loss": 0.1135, "step": 24007 }, { "epoch": 2.846910945096644, "grad_norm": 0.7852036697079973, "learning_rate": 1.0127286474991474e-05, "loss": 0.1001, "step": 24008 }, { "epoch": 2.847029526858769, "grad_norm": 0.4974874526252747, "learning_rate": 1.012535714911358e-05, "loss": 0.058, "step": 24009 }, { "epoch": 2.8471481086208943, "grad_norm": 0.5246418325998012, "learning_rate": 1.012342796036165e-05, "loss": 0.0758, "step": 24010 }, { "epoch": 2.847266690383019, "grad_norm": 0.9840976015619735, "learning_rate": 1.0121498908753474e-05, "loss": 0.1272, "step": 24011 }, { "epoch": 2.847385272145144, "grad_norm": 0.7023104327309884, "learning_rate": 1.0119569994306841e-05, "loss": 0.093, "step": 24012 }, { "epoch": 2.847503853907269, "grad_norm": 0.6782208223548045, "learning_rate": 1.0117641217039514e-05, "loss": 0.0942, "step": 24013 }, { "epoch": 2.847622435669394, "grad_norm": 0.7229920448765793, "learning_rate": 1.0115712576969289e-05, "loss": 0.0949, "step": 24014 }, { "epoch": 2.847741017431519, "grad_norm": 0.724058759429099, "learning_rate": 1.0113784074113939e-05, "loss": 0.0913, "step": 24015 }, { "epoch": 2.847859599193644, "grad_norm": 0.8651456597557529, "learning_rate": 1.0111855708491245e-05, "loss": 0.1216, "step": 24016 }, { "epoch": 2.847978180955769, "grad_norm": 0.9045692971106987, "learning_rate": 1.0109927480118995e-05, "loss": 0.1294, "step": 24017 }, { "epoch": 2.848096762717894, "grad_norm": 0.6262334308613537, "learning_rate": 1.0107999389014936e-05, "loss": 0.0889, "step": 24018 }, { "epoch": 2.848215344480019, "grad_norm": 0.49963605947624196, "learning_rate": 1.0106071435196875e-05, "loss": 0.0639, "step": 24019 }, { "epoch": 2.848333926242144, "grad_norm": 0.88741196038654, "learning_rate": 1.0104143618682568e-05, "loss": 0.1151, "step": 24020 }, { "epoch": 2.848452508004269, "grad_norm": 0.6858899487686075, "learning_rate": 1.0102215939489796e-05, "loss": 0.0839, "step": 24021 }, { "epoch": 2.848571089766394, "grad_norm": 0.737741739323823, "learning_rate": 1.0100288397636306e-05, "loss": 0.0962, "step": 24022 }, { "epoch": 2.848689671528519, "grad_norm": 0.6530872379917853, "learning_rate": 1.0098360993139907e-05, "loss": 0.082, "step": 24023 }, { "epoch": 2.848808253290644, "grad_norm": 0.4291425515920265, "learning_rate": 1.0096433726018333e-05, "loss": 0.0675, "step": 24024 }, { "epoch": 2.848926835052769, "grad_norm": 0.5678797444325326, "learning_rate": 1.0094506596289366e-05, "loss": 0.0743, "step": 24025 }, { "epoch": 2.849045416814894, "grad_norm": 0.7339713701644687, "learning_rate": 1.009257960397077e-05, "loss": 0.0989, "step": 24026 }, { "epoch": 2.8491639985770187, "grad_norm": 1.1796523123389735, "learning_rate": 1.0090652749080309e-05, "loss": 0.1405, "step": 24027 }, { "epoch": 2.849282580339144, "grad_norm": 0.7144227104054262, "learning_rate": 1.0088726031635756e-05, "loss": 0.0831, "step": 24028 }, { "epoch": 2.8494011621012687, "grad_norm": 0.5869205453047858, "learning_rate": 1.0086799451654846e-05, "loss": 0.0878, "step": 24029 }, { "epoch": 2.849519743863394, "grad_norm": 0.7539710582129604, "learning_rate": 1.0084873009155374e-05, "loss": 0.0827, "step": 24030 }, { "epoch": 2.8496383256255187, "grad_norm": 0.6055103163663869, "learning_rate": 1.0082946704155074e-05, "loss": 0.0724, "step": 24031 }, { "epoch": 2.849756907387644, "grad_norm": 0.5407502291173129, "learning_rate": 1.0081020536671715e-05, "loss": 0.0762, "step": 24032 }, { "epoch": 2.8498754891497686, "grad_norm": 0.767563288299795, "learning_rate": 1.007909450672305e-05, "loss": 0.0997, "step": 24033 }, { "epoch": 2.849994070911894, "grad_norm": 0.7702980568138958, "learning_rate": 1.0077168614326835e-05, "loss": 0.0922, "step": 24034 }, { "epoch": 2.8501126526740186, "grad_norm": 0.5734351076762108, "learning_rate": 1.0075242859500836e-05, "loss": 0.0858, "step": 24035 }, { "epoch": 2.850231234436144, "grad_norm": 0.6826821688434936, "learning_rate": 1.0073317242262787e-05, "loss": 0.0744, "step": 24036 }, { "epoch": 2.8503498161982685, "grad_norm": 0.639258115580154, "learning_rate": 1.007139176263045e-05, "loss": 0.0848, "step": 24037 }, { "epoch": 2.8504683979603938, "grad_norm": 0.4412251948176937, "learning_rate": 1.0069466420621574e-05, "loss": 0.0692, "step": 24038 }, { "epoch": 2.8505869797225185, "grad_norm": 0.6581493807730258, "learning_rate": 1.0067541216253915e-05, "loss": 0.0897, "step": 24039 }, { "epoch": 2.8507055614846437, "grad_norm": 0.6539868849079093, "learning_rate": 1.0065616149545198e-05, "loss": 0.1076, "step": 24040 }, { "epoch": 2.8508241432467685, "grad_norm": 0.5440916203342356, "learning_rate": 1.0063691220513205e-05, "loss": 0.0666, "step": 24041 }, { "epoch": 2.8509427250088937, "grad_norm": 0.5529182073065049, "learning_rate": 1.0061766429175654e-05, "loss": 0.0824, "step": 24042 }, { "epoch": 2.851061306771019, "grad_norm": 0.46698807293926997, "learning_rate": 1.0059841775550297e-05, "loss": 0.057, "step": 24043 }, { "epoch": 2.8511798885331436, "grad_norm": 0.4642022540943324, "learning_rate": 1.0057917259654878e-05, "loss": 0.0608, "step": 24044 }, { "epoch": 2.8512984702952684, "grad_norm": 0.5802511482244278, "learning_rate": 1.005599288150714e-05, "loss": 0.0749, "step": 24045 }, { "epoch": 2.8514170520573936, "grad_norm": 0.645199288577071, "learning_rate": 1.0054068641124832e-05, "loss": 0.0992, "step": 24046 }, { "epoch": 2.851535633819519, "grad_norm": 0.5213131212094146, "learning_rate": 1.005214453852567e-05, "loss": 0.0666, "step": 24047 }, { "epoch": 2.8516542155816436, "grad_norm": 0.936511753514155, "learning_rate": 1.005022057372741e-05, "loss": 0.1134, "step": 24048 }, { "epoch": 2.8517727973437683, "grad_norm": 0.7262641717955587, "learning_rate": 1.004829674674778e-05, "loss": 0.0994, "step": 24049 }, { "epoch": 2.8518913791058935, "grad_norm": 0.588541036809643, "learning_rate": 1.0046373057604527e-05, "loss": 0.0825, "step": 24050 }, { "epoch": 2.8520099608680187, "grad_norm": 0.7462423491197995, "learning_rate": 1.0044449506315363e-05, "loss": 0.1273, "step": 24051 }, { "epoch": 2.8521285426301435, "grad_norm": 0.9774372263095188, "learning_rate": 1.0042526092898049e-05, "loss": 0.1412, "step": 24052 }, { "epoch": 2.8522471243922682, "grad_norm": 0.7189558083380084, "learning_rate": 1.0040602817370298e-05, "loss": 0.0854, "step": 24053 }, { "epoch": 2.8523657061543934, "grad_norm": 0.5383710059016054, "learning_rate": 1.003867967974984e-05, "loss": 0.0587, "step": 24054 }, { "epoch": 2.8524842879165186, "grad_norm": 0.5805936810253558, "learning_rate": 1.0036756680054413e-05, "loss": 0.0756, "step": 24055 }, { "epoch": 2.8526028696786434, "grad_norm": 0.7105309346826402, "learning_rate": 1.0034833818301737e-05, "loss": 0.0999, "step": 24056 }, { "epoch": 2.852721451440768, "grad_norm": 0.4838395474129487, "learning_rate": 1.0032911094509551e-05, "loss": 0.0636, "step": 24057 }, { "epoch": 2.8528400332028934, "grad_norm": 0.9611134887800568, "learning_rate": 1.0030988508695564e-05, "loss": 0.1232, "step": 24058 }, { "epoch": 2.8529586149650186, "grad_norm": 1.0876229636779122, "learning_rate": 1.0029066060877504e-05, "loss": 0.1657, "step": 24059 }, { "epoch": 2.8530771967271433, "grad_norm": 0.5620251391804836, "learning_rate": 1.0027143751073098e-05, "loss": 0.0792, "step": 24060 }, { "epoch": 2.853195778489268, "grad_norm": 0.6594157439714008, "learning_rate": 1.0025221579300074e-05, "loss": 0.0789, "step": 24061 }, { "epoch": 2.8533143602513933, "grad_norm": 0.805588777414057, "learning_rate": 1.0023299545576134e-05, "loss": 0.1242, "step": 24062 }, { "epoch": 2.8534329420135185, "grad_norm": 0.6899835819368925, "learning_rate": 1.0021377649919008e-05, "loss": 0.0856, "step": 24063 }, { "epoch": 2.8535515237756433, "grad_norm": 0.45644309736601407, "learning_rate": 1.0019455892346413e-05, "loss": 0.0624, "step": 24064 }, { "epoch": 2.8536701055377685, "grad_norm": 0.46881098713939434, "learning_rate": 1.0017534272876062e-05, "loss": 0.0623, "step": 24065 }, { "epoch": 2.853788687299893, "grad_norm": 0.6268016992172027, "learning_rate": 1.0015612791525673e-05, "loss": 0.0765, "step": 24066 }, { "epoch": 2.8539072690620184, "grad_norm": 0.5547002108196517, "learning_rate": 1.0013691448312959e-05, "loss": 0.0815, "step": 24067 }, { "epoch": 2.854025850824143, "grad_norm": 0.553010036006762, "learning_rate": 1.0011770243255642e-05, "loss": 0.0872, "step": 24068 }, { "epoch": 2.8541444325862684, "grad_norm": 0.5390776528284686, "learning_rate": 1.0009849176371414e-05, "loss": 0.0699, "step": 24069 }, { "epoch": 2.854263014348393, "grad_norm": 0.4408615171981366, "learning_rate": 1.0007928247677995e-05, "loss": 0.0614, "step": 24070 }, { "epoch": 2.8543815961105183, "grad_norm": 0.6683513615230656, "learning_rate": 1.0006007457193092e-05, "loss": 0.0993, "step": 24071 }, { "epoch": 2.854500177872643, "grad_norm": 0.587524095250223, "learning_rate": 1.0004086804934424e-05, "loss": 0.0907, "step": 24072 }, { "epoch": 2.8546187596347683, "grad_norm": 0.6783730755996218, "learning_rate": 1.0002166290919675e-05, "loss": 0.0926, "step": 24073 }, { "epoch": 2.854737341396893, "grad_norm": 0.7050639611483905, "learning_rate": 1.0000245915166564e-05, "loss": 0.0945, "step": 24074 }, { "epoch": 2.8548559231590183, "grad_norm": 0.6070280996927345, "learning_rate": 9.998325677692791e-06, "loss": 0.0806, "step": 24075 }, { "epoch": 2.854974504921143, "grad_norm": 0.8333095311983663, "learning_rate": 9.996405578516058e-06, "loss": 0.1145, "step": 24076 }, { "epoch": 2.855093086683268, "grad_norm": 0.6835317186527146, "learning_rate": 9.994485617654075e-06, "loss": 0.0873, "step": 24077 }, { "epoch": 2.855211668445393, "grad_norm": 0.6006241097191253, "learning_rate": 9.99256579512452e-06, "loss": 0.0983, "step": 24078 }, { "epoch": 2.855330250207518, "grad_norm": 0.47838598274090155, "learning_rate": 9.990646110945117e-06, "loss": 0.0633, "step": 24079 }, { "epoch": 2.855448831969643, "grad_norm": 0.5752210214622134, "learning_rate": 9.988726565133546e-06, "loss": 0.0798, "step": 24080 }, { "epoch": 2.855567413731768, "grad_norm": 0.900043518090903, "learning_rate": 9.986807157707509e-06, "loss": 0.0975, "step": 24081 }, { "epoch": 2.855685995493893, "grad_norm": 0.544361985345438, "learning_rate": 9.984887888684696e-06, "loss": 0.0575, "step": 24082 }, { "epoch": 2.855804577256018, "grad_norm": 0.5776394134567036, "learning_rate": 9.982968758082809e-06, "loss": 0.0786, "step": 24083 }, { "epoch": 2.855923159018143, "grad_norm": 0.9940426045873706, "learning_rate": 9.98104976591954e-06, "loss": 0.1133, "step": 24084 }, { "epoch": 2.856041740780268, "grad_norm": 0.6800152074460611, "learning_rate": 9.979130912212556e-06, "loss": 0.1, "step": 24085 }, { "epoch": 2.856160322542393, "grad_norm": 0.5203857377003935, "learning_rate": 9.977212196979585e-06, "loss": 0.0736, "step": 24086 }, { "epoch": 2.856278904304518, "grad_norm": 0.6724024176305442, "learning_rate": 9.975293620238285e-06, "loss": 0.0871, "step": 24087 }, { "epoch": 2.856397486066643, "grad_norm": 0.5745557535206329, "learning_rate": 9.973375182006364e-06, "loss": 0.0726, "step": 24088 }, { "epoch": 2.856516067828768, "grad_norm": 0.5115962196092065, "learning_rate": 9.97145688230148e-06, "loss": 0.0591, "step": 24089 }, { "epoch": 2.8566346495908927, "grad_norm": 0.6939954466163035, "learning_rate": 9.96953872114135e-06, "loss": 0.0703, "step": 24090 }, { "epoch": 2.856753231353018, "grad_norm": 0.5445136242860684, "learning_rate": 9.967620698543634e-06, "loss": 0.0718, "step": 24091 }, { "epoch": 2.856871813115143, "grad_norm": 0.6766243697119227, "learning_rate": 9.96570281452602e-06, "loss": 0.0941, "step": 24092 }, { "epoch": 2.856990394877268, "grad_norm": 0.7034276253037391, "learning_rate": 9.963785069106191e-06, "loss": 0.0933, "step": 24093 }, { "epoch": 2.8571089766393927, "grad_norm": 0.870105787808026, "learning_rate": 9.961867462301821e-06, "loss": 0.1121, "step": 24094 }, { "epoch": 2.857227558401518, "grad_norm": 0.5322807393106799, "learning_rate": 9.959949994130605e-06, "loss": 0.0606, "step": 24095 }, { "epoch": 2.857346140163643, "grad_norm": 0.5025495942108675, "learning_rate": 9.95803266461019e-06, "loss": 0.059, "step": 24096 }, { "epoch": 2.857464721925768, "grad_norm": 0.6905193755966604, "learning_rate": 9.956115473758282e-06, "loss": 0.0924, "step": 24097 }, { "epoch": 2.8575833036878926, "grad_norm": 0.6302929367797483, "learning_rate": 9.954198421592536e-06, "loss": 0.0691, "step": 24098 }, { "epoch": 2.857701885450018, "grad_norm": 0.8236918958825613, "learning_rate": 9.952281508130638e-06, "loss": 0.1109, "step": 24099 }, { "epoch": 2.857820467212143, "grad_norm": 0.6540744397939461, "learning_rate": 9.950364733390234e-06, "loss": 0.089, "step": 24100 }, { "epoch": 2.8579390489742678, "grad_norm": 0.5840022805973916, "learning_rate": 9.948448097389027e-06, "loss": 0.0825, "step": 24101 }, { "epoch": 2.8580576307363925, "grad_norm": 0.5266410749202782, "learning_rate": 9.946531600144665e-06, "loss": 0.0521, "step": 24102 }, { "epoch": 2.8581762124985177, "grad_norm": 0.7215864697066492, "learning_rate": 9.944615241674821e-06, "loss": 0.0979, "step": 24103 }, { "epoch": 2.858294794260643, "grad_norm": 0.472997274458136, "learning_rate": 9.942699021997162e-06, "loss": 0.0526, "step": 24104 }, { "epoch": 2.8584133760227677, "grad_norm": 0.7190395628033189, "learning_rate": 9.940782941129357e-06, "loss": 0.0973, "step": 24105 }, { "epoch": 2.8585319577848924, "grad_norm": 0.8189768188516319, "learning_rate": 9.938866999089072e-06, "loss": 0.0756, "step": 24106 }, { "epoch": 2.8586505395470176, "grad_norm": 0.6920278613100245, "learning_rate": 9.936951195893954e-06, "loss": 0.0935, "step": 24107 }, { "epoch": 2.858769121309143, "grad_norm": 0.627683203899578, "learning_rate": 9.935035531561677e-06, "loss": 0.0835, "step": 24108 }, { "epoch": 2.8588877030712676, "grad_norm": 0.6116784892471467, "learning_rate": 9.933120006109897e-06, "loss": 0.07, "step": 24109 }, { "epoch": 2.8590062848333924, "grad_norm": 0.5863321968987643, "learning_rate": 9.931204619556283e-06, "loss": 0.0783, "step": 24110 }, { "epoch": 2.8591248665955176, "grad_norm": 0.5347013619020896, "learning_rate": 9.929289371918469e-06, "loss": 0.0864, "step": 24111 }, { "epoch": 2.8592434483576428, "grad_norm": 0.7888552231601472, "learning_rate": 9.927374263214143e-06, "loss": 0.0967, "step": 24112 }, { "epoch": 2.8593620301197675, "grad_norm": 0.4908226659789212, "learning_rate": 9.925459293460932e-06, "loss": 0.0644, "step": 24113 }, { "epoch": 2.8594806118818927, "grad_norm": 0.5444555198228115, "learning_rate": 9.923544462676502e-06, "loss": 0.0776, "step": 24114 }, { "epoch": 2.8595991936440175, "grad_norm": 0.8212604064559308, "learning_rate": 9.921629770878501e-06, "loss": 0.1364, "step": 24115 }, { "epoch": 2.8597177754061427, "grad_norm": 0.6529842507512087, "learning_rate": 9.919715218084589e-06, "loss": 0.0836, "step": 24116 }, { "epoch": 2.8598363571682675, "grad_norm": 0.45999510917549863, "learning_rate": 9.917800804312415e-06, "loss": 0.0708, "step": 24117 }, { "epoch": 2.8599549389303927, "grad_norm": 0.72678739822603, "learning_rate": 9.915886529579616e-06, "loss": 0.0856, "step": 24118 }, { "epoch": 2.8600735206925174, "grad_norm": 0.7290533739588461, "learning_rate": 9.913972393903841e-06, "loss": 0.0874, "step": 24119 }, { "epoch": 2.8601921024546426, "grad_norm": 0.7505935750769345, "learning_rate": 9.912058397302745e-06, "loss": 0.0886, "step": 24120 }, { "epoch": 2.8603106842167674, "grad_norm": 0.8393869887417525, "learning_rate": 9.910144539793977e-06, "loss": 0.0927, "step": 24121 }, { "epoch": 2.8604292659788926, "grad_norm": 0.4922425166239362, "learning_rate": 9.908230821395154e-06, "loss": 0.0665, "step": 24122 }, { "epoch": 2.8605478477410173, "grad_norm": 0.7595122923551864, "learning_rate": 9.906317242123953e-06, "loss": 0.1002, "step": 24123 }, { "epoch": 2.8606664295031425, "grad_norm": 0.5075036383948484, "learning_rate": 9.90440380199799e-06, "loss": 0.0627, "step": 24124 }, { "epoch": 2.8607850112652673, "grad_norm": 0.7203883462977506, "learning_rate": 9.902490501034914e-06, "loss": 0.0972, "step": 24125 }, { "epoch": 2.8609035930273925, "grad_norm": 0.5655566016494068, "learning_rate": 9.900577339252359e-06, "loss": 0.077, "step": 24126 }, { "epoch": 2.8610221747895173, "grad_norm": 0.7027515442456616, "learning_rate": 9.898664316667965e-06, "loss": 0.0893, "step": 24127 }, { "epoch": 2.8611407565516425, "grad_norm": 0.5293370893572891, "learning_rate": 9.896751433299378e-06, "loss": 0.0662, "step": 24128 }, { "epoch": 2.861259338313767, "grad_norm": 0.5579814209907156, "learning_rate": 9.894838689164213e-06, "loss": 0.0728, "step": 24129 }, { "epoch": 2.8613779200758924, "grad_norm": 0.7438631228855059, "learning_rate": 9.89292608428011e-06, "loss": 0.1066, "step": 24130 }, { "epoch": 2.861496501838017, "grad_norm": 0.6972769604714664, "learning_rate": 9.891013618664704e-06, "loss": 0.0831, "step": 24131 }, { "epoch": 2.8616150836001424, "grad_norm": 0.6154698726966229, "learning_rate": 9.889101292335627e-06, "loss": 0.0898, "step": 24132 }, { "epoch": 2.861733665362267, "grad_norm": 0.5197741248370578, "learning_rate": 9.88718910531051e-06, "loss": 0.0762, "step": 24133 }, { "epoch": 2.8618522471243923, "grad_norm": 0.5411774734615187, "learning_rate": 9.885277057606962e-06, "loss": 0.0732, "step": 24134 }, { "epoch": 2.861970828886517, "grad_norm": 0.6867543019534519, "learning_rate": 9.883365149242643e-06, "loss": 0.0823, "step": 24135 }, { "epoch": 2.8620894106486423, "grad_norm": 0.7224479588777071, "learning_rate": 9.88145338023515e-06, "loss": 0.0887, "step": 24136 }, { "epoch": 2.862207992410767, "grad_norm": 0.6127206574345438, "learning_rate": 9.879541750602117e-06, "loss": 0.0868, "step": 24137 }, { "epoch": 2.8623265741728923, "grad_norm": 0.8258416260857121, "learning_rate": 9.877630260361167e-06, "loss": 0.1043, "step": 24138 }, { "epoch": 2.862445155935017, "grad_norm": 0.41295100837272497, "learning_rate": 9.875718909529932e-06, "loss": 0.061, "step": 24139 }, { "epoch": 2.8625637376971422, "grad_norm": 0.5763640543657235, "learning_rate": 9.873807698126015e-06, "loss": 0.0775, "step": 24140 }, { "epoch": 2.8626823194592674, "grad_norm": 0.8319656379056423, "learning_rate": 9.871896626167038e-06, "loss": 0.111, "step": 24141 }, { "epoch": 2.862800901221392, "grad_norm": 0.5829408189802396, "learning_rate": 9.869985693670625e-06, "loss": 0.0757, "step": 24142 }, { "epoch": 2.862919482983517, "grad_norm": 0.7330988159043462, "learning_rate": 9.86807490065439e-06, "loss": 0.1094, "step": 24143 }, { "epoch": 2.863038064745642, "grad_norm": 0.591522891975986, "learning_rate": 9.866164247135956e-06, "loss": 0.0807, "step": 24144 }, { "epoch": 2.8631566465077674, "grad_norm": 0.6887971950132535, "learning_rate": 9.864253733132914e-06, "loss": 0.0823, "step": 24145 }, { "epoch": 2.863275228269892, "grad_norm": 0.878060688949358, "learning_rate": 9.862343358662906e-06, "loss": 0.1019, "step": 24146 }, { "epoch": 2.863393810032017, "grad_norm": 0.45940666048763645, "learning_rate": 9.860433123743523e-06, "loss": 0.0653, "step": 24147 }, { "epoch": 2.863512391794142, "grad_norm": 0.5160483812944235, "learning_rate": 9.858523028392389e-06, "loss": 0.0669, "step": 24148 }, { "epoch": 2.8636309735562673, "grad_norm": 0.8016611304573242, "learning_rate": 9.856613072627089e-06, "loss": 0.0954, "step": 24149 }, { "epoch": 2.863749555318392, "grad_norm": 0.46734790323244957, "learning_rate": 9.854703256465261e-06, "loss": 0.0579, "step": 24150 }, { "epoch": 2.863868137080517, "grad_norm": 0.5924612066591324, "learning_rate": 9.852793579924491e-06, "loss": 0.0665, "step": 24151 }, { "epoch": 2.863986718842642, "grad_norm": 0.4906981567333014, "learning_rate": 9.850884043022388e-06, "loss": 0.0748, "step": 24152 }, { "epoch": 2.864105300604767, "grad_norm": 0.8395429240787958, "learning_rate": 9.848974645776558e-06, "loss": 0.1043, "step": 24153 }, { "epoch": 2.864223882366892, "grad_norm": 0.6203977610021565, "learning_rate": 9.847065388204602e-06, "loss": 0.0751, "step": 24154 }, { "epoch": 2.8643424641290167, "grad_norm": 0.563054215878696, "learning_rate": 9.84515627032413e-06, "loss": 0.0658, "step": 24155 }, { "epoch": 2.864461045891142, "grad_norm": 0.7329314685790989, "learning_rate": 9.843247292152713e-06, "loss": 0.0932, "step": 24156 }, { "epoch": 2.864579627653267, "grad_norm": 0.7162830799777715, "learning_rate": 9.841338453707991e-06, "loss": 0.0805, "step": 24157 }, { "epoch": 2.864698209415392, "grad_norm": 0.6619546485922194, "learning_rate": 9.839429755007529e-06, "loss": 0.0902, "step": 24158 }, { "epoch": 2.8648167911775166, "grad_norm": 0.6019981536524102, "learning_rate": 9.837521196068941e-06, "loss": 0.0801, "step": 24159 }, { "epoch": 2.864935372939642, "grad_norm": 1.3354716538420044, "learning_rate": 9.8356127769098e-06, "loss": 0.1242, "step": 24160 }, { "epoch": 2.865053954701767, "grad_norm": 0.528098300707396, "learning_rate": 9.83370449754773e-06, "loss": 0.0591, "step": 24161 }, { "epoch": 2.865172536463892, "grad_norm": 0.7062508933255351, "learning_rate": 9.831796358000295e-06, "loss": 0.0802, "step": 24162 }, { "epoch": 2.8652911182260166, "grad_norm": 0.6272868692815767, "learning_rate": 9.8298883582851e-06, "loss": 0.0742, "step": 24163 }, { "epoch": 2.8654096999881418, "grad_norm": 0.779456836915705, "learning_rate": 9.827980498419728e-06, "loss": 0.1133, "step": 24164 }, { "epoch": 2.865528281750267, "grad_norm": 0.6877473827268551, "learning_rate": 9.826072778421775e-06, "loss": 0.1088, "step": 24165 }, { "epoch": 2.8656468635123917, "grad_norm": 0.5161945747652237, "learning_rate": 9.82416519830883e-06, "loss": 0.0617, "step": 24166 }, { "epoch": 2.865765445274517, "grad_norm": 0.5342933817939847, "learning_rate": 9.822257758098455e-06, "loss": 0.0804, "step": 24167 }, { "epoch": 2.8658840270366417, "grad_norm": 0.5075231321549839, "learning_rate": 9.820350457808267e-06, "loss": 0.0708, "step": 24168 }, { "epoch": 2.866002608798767, "grad_norm": 0.6821945359546759, "learning_rate": 9.818443297455826e-06, "loss": 0.0991, "step": 24169 }, { "epoch": 2.8661211905608917, "grad_norm": 0.5964713272752493, "learning_rate": 9.81653627705873e-06, "loss": 0.0869, "step": 24170 }, { "epoch": 2.866239772323017, "grad_norm": 0.39636887767446827, "learning_rate": 9.814629396634533e-06, "loss": 0.0468, "step": 24171 }, { "epoch": 2.8663583540851416, "grad_norm": 0.5142410930077496, "learning_rate": 9.812722656200848e-06, "loss": 0.0587, "step": 24172 }, { "epoch": 2.866476935847267, "grad_norm": 0.6769233836843491, "learning_rate": 9.810816055775229e-06, "loss": 0.0809, "step": 24173 }, { "epoch": 2.8665955176093916, "grad_norm": 0.7872511670108744, "learning_rate": 9.808909595375257e-06, "loss": 0.1061, "step": 24174 }, { "epoch": 2.8667140993715168, "grad_norm": 0.7073616494099164, "learning_rate": 9.807003275018514e-06, "loss": 0.0774, "step": 24175 }, { "epoch": 2.8668326811336415, "grad_norm": 0.6899284004077019, "learning_rate": 9.805097094722568e-06, "loss": 0.0976, "step": 24176 }, { "epoch": 2.8669512628957667, "grad_norm": 0.5636209663560173, "learning_rate": 9.803191054505001e-06, "loss": 0.0597, "step": 24177 }, { "epoch": 2.8670698446578915, "grad_norm": 0.7398726141762736, "learning_rate": 9.801285154383369e-06, "loss": 0.0788, "step": 24178 }, { "epoch": 2.8671884264200167, "grad_norm": 0.5375363201156016, "learning_rate": 9.79937939437525e-06, "loss": 0.0673, "step": 24179 }, { "epoch": 2.8673070081821415, "grad_norm": 0.7847001558418119, "learning_rate": 9.797473774498211e-06, "loss": 0.0688, "step": 24180 }, { "epoch": 2.8674255899442667, "grad_norm": 0.552469476610959, "learning_rate": 9.795568294769824e-06, "loss": 0.06, "step": 24181 }, { "epoch": 2.8675441717063914, "grad_norm": 0.6879393989474678, "learning_rate": 9.793662955207652e-06, "loss": 0.1035, "step": 24182 }, { "epoch": 2.8676627534685166, "grad_norm": 0.7356531789601378, "learning_rate": 9.791757755829258e-06, "loss": 0.0774, "step": 24183 }, { "epoch": 2.8677813352306414, "grad_norm": 0.3888901731747142, "learning_rate": 9.789852696652222e-06, "loss": 0.0508, "step": 24184 }, { "epoch": 2.8678999169927666, "grad_norm": 0.6452590511718309, "learning_rate": 9.787947777694078e-06, "loss": 0.0788, "step": 24185 }, { "epoch": 2.8680184987548913, "grad_norm": 1.135816214373962, "learning_rate": 9.786042998972403e-06, "loss": 0.1229, "step": 24186 }, { "epoch": 2.8681370805170165, "grad_norm": 1.0299609973952193, "learning_rate": 9.784138360504755e-06, "loss": 0.1426, "step": 24187 }, { "epoch": 2.8682556622791413, "grad_norm": 0.3276638673588135, "learning_rate": 9.782233862308702e-06, "loss": 0.0462, "step": 24188 }, { "epoch": 2.8683742440412665, "grad_norm": 0.6239452468036728, "learning_rate": 9.780329504401784e-06, "loss": 0.0884, "step": 24189 }, { "epoch": 2.8684928258033917, "grad_norm": 0.768757553044192, "learning_rate": 9.778425286801559e-06, "loss": 0.0777, "step": 24190 }, { "epoch": 2.8686114075655165, "grad_norm": 0.5023621844557291, "learning_rate": 9.776521209525591e-06, "loss": 0.0682, "step": 24191 }, { "epoch": 2.8687299893276412, "grad_norm": 0.6889094945301277, "learning_rate": 9.77461727259143e-06, "loss": 0.0925, "step": 24192 }, { "epoch": 2.8688485710897664, "grad_norm": 0.47658093930170725, "learning_rate": 9.772713476016634e-06, "loss": 0.0577, "step": 24193 }, { "epoch": 2.8689671528518916, "grad_norm": 0.6511563636131903, "learning_rate": 9.77080981981873e-06, "loss": 0.0796, "step": 24194 }, { "epoch": 2.8690857346140164, "grad_norm": 0.5335144197232538, "learning_rate": 9.768906304015302e-06, "loss": 0.0654, "step": 24195 }, { "epoch": 2.869204316376141, "grad_norm": 0.5247833053169224, "learning_rate": 9.767002928623869e-06, "loss": 0.0551, "step": 24196 }, { "epoch": 2.8693228981382664, "grad_norm": 0.6295754351022136, "learning_rate": 9.765099693661992e-06, "loss": 0.075, "step": 24197 }, { "epoch": 2.8694414799003916, "grad_norm": 1.0360730755988647, "learning_rate": 9.763196599147213e-06, "loss": 0.1245, "step": 24198 }, { "epoch": 2.8695600616625163, "grad_norm": 0.7554778394187468, "learning_rate": 9.761293645097086e-06, "loss": 0.1003, "step": 24199 }, { "epoch": 2.869678643424641, "grad_norm": 0.5609791151265551, "learning_rate": 9.759390831529133e-06, "loss": 0.0835, "step": 24200 }, { "epoch": 2.8697972251867663, "grad_norm": 0.6515306874371858, "learning_rate": 9.75748815846091e-06, "loss": 0.0823, "step": 24201 }, { "epoch": 2.8699158069488915, "grad_norm": 1.1717534479229756, "learning_rate": 9.755585625909954e-06, "loss": 0.1427, "step": 24202 }, { "epoch": 2.8700343887110162, "grad_norm": 0.799308400401133, "learning_rate": 9.753683233893804e-06, "loss": 0.1155, "step": 24203 }, { "epoch": 2.870152970473141, "grad_norm": 0.5787850358701255, "learning_rate": 9.75178098243001e-06, "loss": 0.0736, "step": 24204 }, { "epoch": 2.870271552235266, "grad_norm": 0.5243238719548187, "learning_rate": 9.749878871536075e-06, "loss": 0.054, "step": 24205 }, { "epoch": 2.8703901339973914, "grad_norm": 0.6992097746622686, "learning_rate": 9.747976901229574e-06, "loss": 0.0874, "step": 24206 }, { "epoch": 2.870508715759516, "grad_norm": 0.7289295105558224, "learning_rate": 9.746075071528015e-06, "loss": 0.0743, "step": 24207 }, { "epoch": 2.870627297521641, "grad_norm": 0.6110072550274349, "learning_rate": 9.74417338244894e-06, "loss": 0.0971, "step": 24208 }, { "epoch": 2.870745879283766, "grad_norm": 0.6756233460067339, "learning_rate": 9.742271834009876e-06, "loss": 0.0862, "step": 24209 }, { "epoch": 2.8708644610458913, "grad_norm": 0.852735528693052, "learning_rate": 9.740370426228365e-06, "loss": 0.087, "step": 24210 }, { "epoch": 2.870983042808016, "grad_norm": 1.0557114743708866, "learning_rate": 9.738469159121919e-06, "loss": 0.0951, "step": 24211 }, { "epoch": 2.871101624570141, "grad_norm": 0.5549613963946007, "learning_rate": 9.736568032708069e-06, "loss": 0.0604, "step": 24212 }, { "epoch": 2.871220206332266, "grad_norm": 0.7256480436091368, "learning_rate": 9.734667047004348e-06, "loss": 0.1023, "step": 24213 }, { "epoch": 2.8713387880943912, "grad_norm": 0.7684136289164121, "learning_rate": 9.732766202028274e-06, "loss": 0.0937, "step": 24214 }, { "epoch": 2.871457369856516, "grad_norm": 0.5058961086721484, "learning_rate": 9.730865497797383e-06, "loss": 0.0615, "step": 24215 }, { "epoch": 2.871575951618641, "grad_norm": 0.6211323305895127, "learning_rate": 9.728964934329172e-06, "loss": 0.0762, "step": 24216 }, { "epoch": 2.871694533380766, "grad_norm": 0.691732063313816, "learning_rate": 9.727064511641196e-06, "loss": 0.0687, "step": 24217 }, { "epoch": 2.871813115142891, "grad_norm": 0.5507133735317322, "learning_rate": 9.725164229750944e-06, "loss": 0.0713, "step": 24218 }, { "epoch": 2.871931696905016, "grad_norm": 0.7059652037919698, "learning_rate": 9.723264088675958e-06, "loss": 0.0906, "step": 24219 }, { "epoch": 2.872050278667141, "grad_norm": 0.7363184233280686, "learning_rate": 9.721364088433726e-06, "loss": 0.0757, "step": 24220 }, { "epoch": 2.872168860429266, "grad_norm": 0.6279751575752014, "learning_rate": 9.719464229041797e-06, "loss": 0.0826, "step": 24221 }, { "epoch": 2.872287442191391, "grad_norm": 0.5912707635045874, "learning_rate": 9.717564510517663e-06, "loss": 0.0877, "step": 24222 }, { "epoch": 2.872406023953516, "grad_norm": 0.6195516759483557, "learning_rate": 9.715664932878843e-06, "loss": 0.0654, "step": 24223 }, { "epoch": 2.872524605715641, "grad_norm": 0.6550115045857947, "learning_rate": 9.713765496142848e-06, "loss": 0.0745, "step": 24224 }, { "epoch": 2.872643187477766, "grad_norm": 0.8442237532600653, "learning_rate": 9.711866200327191e-06, "loss": 0.1111, "step": 24225 }, { "epoch": 2.872761769239891, "grad_norm": 0.65274440783701, "learning_rate": 9.709967045449389e-06, "loss": 0.0959, "step": 24226 }, { "epoch": 2.8728803510020158, "grad_norm": 0.7752579566320228, "learning_rate": 9.708068031526924e-06, "loss": 0.0975, "step": 24227 }, { "epoch": 2.872998932764141, "grad_norm": 0.6790512852613181, "learning_rate": 9.706169158577336e-06, "loss": 0.0713, "step": 24228 }, { "epoch": 2.8731175145262657, "grad_norm": 0.5813458041156797, "learning_rate": 9.704270426618104e-06, "loss": 0.0744, "step": 24229 }, { "epoch": 2.873236096288391, "grad_norm": 0.7050430949026748, "learning_rate": 9.702371835666752e-06, "loss": 0.0969, "step": 24230 }, { "epoch": 2.8733546780505157, "grad_norm": 0.5910347614818323, "learning_rate": 9.700473385740755e-06, "loss": 0.064, "step": 24231 }, { "epoch": 2.873473259812641, "grad_norm": 0.7084251010724286, "learning_rate": 9.69857507685765e-06, "loss": 0.099, "step": 24232 }, { "epoch": 2.8735918415747657, "grad_norm": 0.8008555896583675, "learning_rate": 9.696676909034908e-06, "loss": 0.1072, "step": 24233 }, { "epoch": 2.873710423336891, "grad_norm": 0.7908743116862521, "learning_rate": 9.69477888229004e-06, "loss": 0.0958, "step": 24234 }, { "epoch": 2.8738290050990156, "grad_norm": 0.8679892894189345, "learning_rate": 9.692880996640543e-06, "loss": 0.1282, "step": 24235 }, { "epoch": 2.873947586861141, "grad_norm": 0.4896899143328698, "learning_rate": 9.690983252103911e-06, "loss": 0.0486, "step": 24236 }, { "epoch": 2.8740661686232656, "grad_norm": 0.6202836652514163, "learning_rate": 9.689085648697652e-06, "loss": 0.0623, "step": 24237 }, { "epoch": 2.874184750385391, "grad_norm": 0.6240916399230694, "learning_rate": 9.687188186439228e-06, "loss": 0.0615, "step": 24238 }, { "epoch": 2.874303332147516, "grad_norm": 1.0507771281008194, "learning_rate": 9.685290865346167e-06, "loss": 0.1447, "step": 24239 }, { "epoch": 2.8744219139096407, "grad_norm": 1.0493172667611728, "learning_rate": 9.683393685435938e-06, "loss": 0.1647, "step": 24240 }, { "epoch": 2.8745404956717655, "grad_norm": 0.7032908979148609, "learning_rate": 9.681496646726035e-06, "loss": 0.0713, "step": 24241 }, { "epoch": 2.8746590774338907, "grad_norm": 0.6211974839778929, "learning_rate": 9.67959974923395e-06, "loss": 0.0676, "step": 24242 }, { "epoch": 2.874777659196016, "grad_norm": 0.9286183393906605, "learning_rate": 9.677702992977167e-06, "loss": 0.1301, "step": 24243 }, { "epoch": 2.8748962409581407, "grad_norm": 0.6564083052557635, "learning_rate": 9.675806377973182e-06, "loss": 0.0759, "step": 24244 }, { "epoch": 2.8750148227202654, "grad_norm": 0.7126416815352716, "learning_rate": 9.673909904239462e-06, "loss": 0.0895, "step": 24245 }, { "epoch": 2.8751334044823906, "grad_norm": 0.7814312603006369, "learning_rate": 9.672013571793495e-06, "loss": 0.109, "step": 24246 }, { "epoch": 2.875251986244516, "grad_norm": 0.6601452561629403, "learning_rate": 9.670117380652772e-06, "loss": 0.0713, "step": 24247 }, { "epoch": 2.8753705680066406, "grad_norm": 0.7404996475492971, "learning_rate": 9.668221330834773e-06, "loss": 0.0938, "step": 24248 }, { "epoch": 2.8754891497687654, "grad_norm": 0.5020782178472621, "learning_rate": 9.666325422356964e-06, "loss": 0.0729, "step": 24249 }, { "epoch": 2.8756077315308906, "grad_norm": 0.5470769177643736, "learning_rate": 9.664429655236834e-06, "loss": 0.0721, "step": 24250 }, { "epoch": 2.8757263132930158, "grad_norm": 0.7011819035943143, "learning_rate": 9.662534029491855e-06, "loss": 0.0719, "step": 24251 }, { "epoch": 2.8758448950551405, "grad_norm": 0.743819551670039, "learning_rate": 9.660638545139503e-06, "loss": 0.0855, "step": 24252 }, { "epoch": 2.8759634768172653, "grad_norm": 0.5105483348904337, "learning_rate": 9.658743202197255e-06, "loss": 0.0717, "step": 24253 }, { "epoch": 2.8760820585793905, "grad_norm": 0.7445082294471929, "learning_rate": 9.65684800068258e-06, "loss": 0.1096, "step": 24254 }, { "epoch": 2.8762006403415157, "grad_norm": 0.6236451756179221, "learning_rate": 9.654952940612963e-06, "loss": 0.0747, "step": 24255 }, { "epoch": 2.8763192221036404, "grad_norm": 0.8399149448630175, "learning_rate": 9.65305802200585e-06, "loss": 0.1041, "step": 24256 }, { "epoch": 2.876437803865765, "grad_norm": 0.4691900116132239, "learning_rate": 9.651163244878725e-06, "loss": 0.0689, "step": 24257 }, { "epoch": 2.8765563856278904, "grad_norm": 0.9469582836846846, "learning_rate": 9.649268609249054e-06, "loss": 0.1328, "step": 24258 }, { "epoch": 2.8766749673900156, "grad_norm": 0.30424479844658225, "learning_rate": 9.647374115134308e-06, "loss": 0.0393, "step": 24259 }, { "epoch": 2.8767935491521404, "grad_norm": 0.7159213694620831, "learning_rate": 9.64547976255194e-06, "loss": 0.0874, "step": 24260 }, { "epoch": 2.876912130914265, "grad_norm": 0.883905694526706, "learning_rate": 9.643585551519418e-06, "loss": 0.1161, "step": 24261 }, { "epoch": 2.8770307126763903, "grad_norm": 0.5368610184400805, "learning_rate": 9.641691482054206e-06, "loss": 0.0688, "step": 24262 }, { "epoch": 2.8771492944385155, "grad_norm": 0.5542012326357308, "learning_rate": 9.639797554173766e-06, "loss": 0.0842, "step": 24263 }, { "epoch": 2.8772678762006403, "grad_norm": 0.651932772253232, "learning_rate": 9.637903767895565e-06, "loss": 0.0886, "step": 24264 }, { "epoch": 2.8773864579627655, "grad_norm": 0.571638769768261, "learning_rate": 9.636010123237032e-06, "loss": 0.0652, "step": 24265 }, { "epoch": 2.8775050397248902, "grad_norm": 0.6271560597552994, "learning_rate": 9.634116620215666e-06, "loss": 0.0888, "step": 24266 }, { "epoch": 2.8776236214870154, "grad_norm": 0.8220789995158057, "learning_rate": 9.632223258848893e-06, "loss": 0.1025, "step": 24267 }, { "epoch": 2.87774220324914, "grad_norm": 0.5442702548851737, "learning_rate": 9.630330039154178e-06, "loss": 0.0655, "step": 24268 }, { "epoch": 2.8778607850112654, "grad_norm": 0.5916092438203944, "learning_rate": 9.62843696114897e-06, "loss": 0.1005, "step": 24269 }, { "epoch": 2.87797936677339, "grad_norm": 0.8307093301484013, "learning_rate": 9.626544024850734e-06, "loss": 0.0757, "step": 24270 }, { "epoch": 2.8780979485355154, "grad_norm": 0.6475851807582721, "learning_rate": 9.624651230276898e-06, "loss": 0.0835, "step": 24271 }, { "epoch": 2.87821653029764, "grad_norm": 0.8109741880769133, "learning_rate": 9.622758577444926e-06, "loss": 0.1023, "step": 24272 }, { "epoch": 2.8783351120597653, "grad_norm": 0.6072218860582641, "learning_rate": 9.620866066372262e-06, "loss": 0.0798, "step": 24273 }, { "epoch": 2.87845369382189, "grad_norm": 0.7162524325385096, "learning_rate": 9.618973697076354e-06, "loss": 0.0889, "step": 24274 }, { "epoch": 2.8785722755840153, "grad_norm": 0.6694888694249848, "learning_rate": 9.617081469574654e-06, "loss": 0.0799, "step": 24275 }, { "epoch": 2.87869085734614, "grad_norm": 0.4976827352934302, "learning_rate": 9.615189383884585e-06, "loss": 0.0707, "step": 24276 }, { "epoch": 2.8788094391082653, "grad_norm": 0.5147898004075495, "learning_rate": 9.61329744002362e-06, "loss": 0.0629, "step": 24277 }, { "epoch": 2.87892802087039, "grad_norm": 0.5186259091007422, "learning_rate": 9.611405638009175e-06, "loss": 0.0582, "step": 24278 }, { "epoch": 2.879046602632515, "grad_norm": 1.2573994037102276, "learning_rate": 9.609513977858706e-06, "loss": 0.1794, "step": 24279 }, { "epoch": 2.87916518439464, "grad_norm": 0.5021859571886245, "learning_rate": 9.60762245958963e-06, "loss": 0.0619, "step": 24280 }, { "epoch": 2.879283766156765, "grad_norm": 1.400308497093427, "learning_rate": 9.605731083219417e-06, "loss": 0.2042, "step": 24281 }, { "epoch": 2.87940234791889, "grad_norm": 0.473108058551573, "learning_rate": 9.603839848765478e-06, "loss": 0.0657, "step": 24282 }, { "epoch": 2.879520929681015, "grad_norm": 0.651655342293578, "learning_rate": 9.601948756245252e-06, "loss": 0.0973, "step": 24283 }, { "epoch": 2.87963951144314, "grad_norm": 0.6277993771346898, "learning_rate": 9.60005780567618e-06, "loss": 0.0773, "step": 24284 }, { "epoch": 2.879758093205265, "grad_norm": 0.8343031469878129, "learning_rate": 9.598166997075689e-06, "loss": 0.1039, "step": 24285 }, { "epoch": 2.87987667496739, "grad_norm": 0.35427524001773364, "learning_rate": 9.596276330461218e-06, "loss": 0.0447, "step": 24286 }, { "epoch": 2.879995256729515, "grad_norm": 0.6367429152813913, "learning_rate": 9.594385805850176e-06, "loss": 0.0748, "step": 24287 }, { "epoch": 2.88011383849164, "grad_norm": 0.607633358897898, "learning_rate": 9.59249542326002e-06, "loss": 0.0753, "step": 24288 }, { "epoch": 2.880232420253765, "grad_norm": 0.457062550278196, "learning_rate": 9.590605182708154e-06, "loss": 0.0683, "step": 24289 }, { "epoch": 2.88035100201589, "grad_norm": 0.7130041110145274, "learning_rate": 9.588715084212013e-06, "loss": 0.086, "step": 24290 }, { "epoch": 2.880469583778015, "grad_norm": 0.5899431248598694, "learning_rate": 9.586825127789018e-06, "loss": 0.1014, "step": 24291 }, { "epoch": 2.88058816554014, "grad_norm": 0.7749651337610757, "learning_rate": 9.584935313456597e-06, "loss": 0.0823, "step": 24292 }, { "epoch": 2.880706747302265, "grad_norm": 0.5390546920835869, "learning_rate": 9.583045641232177e-06, "loss": 0.0849, "step": 24293 }, { "epoch": 2.8808253290643897, "grad_norm": 0.8213482905186275, "learning_rate": 9.581156111133152e-06, "loss": 0.0982, "step": 24294 }, { "epoch": 2.880943910826515, "grad_norm": 0.38084843519449324, "learning_rate": 9.57926672317698e-06, "loss": 0.0461, "step": 24295 }, { "epoch": 2.88106249258864, "grad_norm": 0.32892522805714086, "learning_rate": 9.57737747738105e-06, "loss": 0.044, "step": 24296 }, { "epoch": 2.881181074350765, "grad_norm": 1.030705792772188, "learning_rate": 9.575488373762792e-06, "loss": 0.1671, "step": 24297 }, { "epoch": 2.8812996561128896, "grad_norm": 0.7312915059466485, "learning_rate": 9.573599412339601e-06, "loss": 0.0985, "step": 24298 }, { "epoch": 2.881418237875015, "grad_norm": 1.2159444127439145, "learning_rate": 9.571710593128927e-06, "loss": 0.1631, "step": 24299 }, { "epoch": 2.88153681963714, "grad_norm": 0.7091925852817951, "learning_rate": 9.569821916148151e-06, "loss": 0.0736, "step": 24300 }, { "epoch": 2.881655401399265, "grad_norm": 0.5066843154013346, "learning_rate": 9.567933381414693e-06, "loss": 0.0802, "step": 24301 }, { "epoch": 2.8817739831613896, "grad_norm": 0.6595801879542789, "learning_rate": 9.566044988945965e-06, "loss": 0.0856, "step": 24302 }, { "epoch": 2.8818925649235148, "grad_norm": 0.7929348121526864, "learning_rate": 9.564156738759377e-06, "loss": 0.1112, "step": 24303 }, { "epoch": 2.88201114668564, "grad_norm": 0.5705927076229116, "learning_rate": 9.562268630872346e-06, "loss": 0.0795, "step": 24304 }, { "epoch": 2.8821297284477647, "grad_norm": 0.49959660355620317, "learning_rate": 9.560380665302257e-06, "loss": 0.0865, "step": 24305 }, { "epoch": 2.8822483102098895, "grad_norm": 0.7246100209029499, "learning_rate": 9.558492842066524e-06, "loss": 0.1191, "step": 24306 }, { "epoch": 2.8823668919720147, "grad_norm": 0.7052810103819313, "learning_rate": 9.556605161182553e-06, "loss": 0.0976, "step": 24307 }, { "epoch": 2.88248547373414, "grad_norm": 0.6458475840558362, "learning_rate": 9.554717622667753e-06, "loss": 0.0994, "step": 24308 }, { "epoch": 2.8826040554962646, "grad_norm": 0.8083785960744031, "learning_rate": 9.552830226539497e-06, "loss": 0.1109, "step": 24309 }, { "epoch": 2.8827226372583894, "grad_norm": 0.6024628619776521, "learning_rate": 9.550942972815224e-06, "loss": 0.0969, "step": 24310 }, { "epoch": 2.8828412190205146, "grad_norm": 0.6016017033418634, "learning_rate": 9.549055861512301e-06, "loss": 0.0553, "step": 24311 }, { "epoch": 2.88295980078264, "grad_norm": 0.46792761993089366, "learning_rate": 9.547168892648136e-06, "loss": 0.0683, "step": 24312 }, { "epoch": 2.8830783825447646, "grad_norm": 0.6286620608057498, "learning_rate": 9.545282066240124e-06, "loss": 0.0698, "step": 24313 }, { "epoch": 2.8831969643068898, "grad_norm": 0.5573485234706418, "learning_rate": 9.543395382305662e-06, "loss": 0.0767, "step": 24314 }, { "epoch": 2.8833155460690145, "grad_norm": 0.9970155970632395, "learning_rate": 9.541508840862148e-06, "loss": 0.1278, "step": 24315 }, { "epoch": 2.8834341278311397, "grad_norm": 0.4818983498260057, "learning_rate": 9.539622441926958e-06, "loss": 0.0533, "step": 24316 }, { "epoch": 2.8835527095932645, "grad_norm": 0.7118962562777247, "learning_rate": 9.537736185517487e-06, "loss": 0.105, "step": 24317 }, { "epoch": 2.8836712913553897, "grad_norm": 0.6571654755791823, "learning_rate": 9.535850071651128e-06, "loss": 0.1164, "step": 24318 }, { "epoch": 2.8837898731175144, "grad_norm": 0.460909089646429, "learning_rate": 9.53396410034528e-06, "loss": 0.0663, "step": 24319 }, { "epoch": 2.8839084548796396, "grad_norm": 0.7985261908461723, "learning_rate": 9.532078271617306e-06, "loss": 0.1331, "step": 24320 }, { "epoch": 2.8840270366417644, "grad_norm": 0.5848277592018569, "learning_rate": 9.530192585484605e-06, "loss": 0.0744, "step": 24321 }, { "epoch": 2.8841456184038896, "grad_norm": 0.7726277018571783, "learning_rate": 9.528307041964554e-06, "loss": 0.0937, "step": 24322 }, { "epoch": 2.8842642001660144, "grad_norm": 0.4837956339012145, "learning_rate": 9.52642164107454e-06, "loss": 0.0643, "step": 24323 }, { "epoch": 2.8843827819281396, "grad_norm": 0.9547178154947382, "learning_rate": 9.524536382831947e-06, "loss": 0.1308, "step": 24324 }, { "epoch": 2.8845013636902643, "grad_norm": 1.1941500946742083, "learning_rate": 9.522651267254149e-06, "loss": 0.1545, "step": 24325 }, { "epoch": 2.8846199454523895, "grad_norm": 0.6520079715274173, "learning_rate": 9.520766294358536e-06, "loss": 0.0693, "step": 24326 }, { "epoch": 2.8847385272145143, "grad_norm": 0.6380869887452477, "learning_rate": 9.518881464162465e-06, "loss": 0.0871, "step": 24327 }, { "epoch": 2.8848571089766395, "grad_norm": 0.5960683698329208, "learning_rate": 9.516996776683327e-06, "loss": 0.0762, "step": 24328 }, { "epoch": 2.8849756907387643, "grad_norm": 0.5504683897225934, "learning_rate": 9.51511223193849e-06, "loss": 0.0845, "step": 24329 }, { "epoch": 2.8850942725008895, "grad_norm": 0.6632650347049234, "learning_rate": 9.513227829945334e-06, "loss": 0.0935, "step": 24330 }, { "epoch": 2.885212854263014, "grad_norm": 0.6582849291494628, "learning_rate": 9.511343570721224e-06, "loss": 0.0937, "step": 24331 }, { "epoch": 2.8853314360251394, "grad_norm": 0.7250658749730947, "learning_rate": 9.509459454283528e-06, "loss": 0.0876, "step": 24332 }, { "epoch": 2.885450017787264, "grad_norm": 0.528162271667388, "learning_rate": 9.507575480649622e-06, "loss": 0.08, "step": 24333 }, { "epoch": 2.8855685995493894, "grad_norm": 0.4600510851168668, "learning_rate": 9.505691649836874e-06, "loss": 0.068, "step": 24334 }, { "epoch": 2.885687181311514, "grad_norm": 0.4902374348484885, "learning_rate": 9.503807961862654e-06, "loss": 0.0788, "step": 24335 }, { "epoch": 2.8858057630736393, "grad_norm": 0.6449234255649149, "learning_rate": 9.501924416744307e-06, "loss": 0.0923, "step": 24336 }, { "epoch": 2.885924344835764, "grad_norm": 0.4577834070195004, "learning_rate": 9.500041014499228e-06, "loss": 0.0637, "step": 24337 }, { "epoch": 2.8860429265978893, "grad_norm": 0.6372520971907949, "learning_rate": 9.498157755144754e-06, "loss": 0.0928, "step": 24338 }, { "epoch": 2.886161508360014, "grad_norm": 0.3987895651814518, "learning_rate": 9.496274638698258e-06, "loss": 0.0639, "step": 24339 }, { "epoch": 2.8862800901221393, "grad_norm": 0.6125099830307607, "learning_rate": 9.494391665177097e-06, "loss": 0.0731, "step": 24340 }, { "epoch": 2.8863986718842645, "grad_norm": 0.9000922855399754, "learning_rate": 9.492508834598632e-06, "loss": 0.1002, "step": 24341 }, { "epoch": 2.8865172536463892, "grad_norm": 0.5085479389263218, "learning_rate": 9.490626146980225e-06, "loss": 0.0758, "step": 24342 }, { "epoch": 2.886635835408514, "grad_norm": 0.6628863006756498, "learning_rate": 9.488743602339212e-06, "loss": 0.0872, "step": 24343 }, { "epoch": 2.886754417170639, "grad_norm": 0.776946515265349, "learning_rate": 9.486861200692975e-06, "loss": 0.1047, "step": 24344 }, { "epoch": 2.8868729989327644, "grad_norm": 0.5057864731365002, "learning_rate": 9.484978942058847e-06, "loss": 0.0736, "step": 24345 }, { "epoch": 2.886991580694889, "grad_norm": 0.5957754597677981, "learning_rate": 9.483096826454199e-06, "loss": 0.0531, "step": 24346 }, { "epoch": 2.887110162457014, "grad_norm": 0.4897874723116628, "learning_rate": 9.48121485389635e-06, "loss": 0.0629, "step": 24347 }, { "epoch": 2.887228744219139, "grad_norm": 0.6195878889144102, "learning_rate": 9.479333024402687e-06, "loss": 0.0722, "step": 24348 }, { "epoch": 2.8873473259812643, "grad_norm": 0.8733937948817143, "learning_rate": 9.477451337990533e-06, "loss": 0.0936, "step": 24349 }, { "epoch": 2.887465907743389, "grad_norm": 0.48232472070766663, "learning_rate": 9.47556979467724e-06, "loss": 0.0542, "step": 24350 }, { "epoch": 2.887584489505514, "grad_norm": 0.5846769818011252, "learning_rate": 9.473688394480163e-06, "loss": 0.0596, "step": 24351 }, { "epoch": 2.887703071267639, "grad_norm": 0.5356619947184457, "learning_rate": 9.471807137416632e-06, "loss": 0.069, "step": 24352 }, { "epoch": 2.8878216530297642, "grad_norm": 0.6640556554907017, "learning_rate": 9.46992602350401e-06, "loss": 0.0945, "step": 24353 }, { "epoch": 2.887940234791889, "grad_norm": 0.7550799709031142, "learning_rate": 9.468045052759608e-06, "loss": 0.112, "step": 24354 }, { "epoch": 2.8880588165540138, "grad_norm": 0.6794419882504484, "learning_rate": 9.4661642252008e-06, "loss": 0.0872, "step": 24355 }, { "epoch": 2.888177398316139, "grad_norm": 0.9101402422581034, "learning_rate": 9.464283540844901e-06, "loss": 0.1321, "step": 24356 }, { "epoch": 2.888295980078264, "grad_norm": 0.9359801237695862, "learning_rate": 9.462402999709265e-06, "loss": 0.109, "step": 24357 }, { "epoch": 2.888414561840389, "grad_norm": 0.803380049218197, "learning_rate": 9.460522601811204e-06, "loss": 0.0919, "step": 24358 }, { "epoch": 2.8885331436025137, "grad_norm": 0.7390849212019065, "learning_rate": 9.458642347168084e-06, "loss": 0.0774, "step": 24359 }, { "epoch": 2.888651725364639, "grad_norm": 0.5635627339414034, "learning_rate": 9.456762235797218e-06, "loss": 0.086, "step": 24360 }, { "epoch": 2.888770307126764, "grad_norm": 0.6485746113511717, "learning_rate": 9.454882267715943e-06, "loss": 0.0997, "step": 24361 }, { "epoch": 2.888888888888889, "grad_norm": 0.7720477936678297, "learning_rate": 9.453002442941588e-06, "loss": 0.1006, "step": 24362 }, { "epoch": 2.8890074706510136, "grad_norm": 0.5502124225757064, "learning_rate": 9.451122761491488e-06, "loss": 0.0723, "step": 24363 }, { "epoch": 2.889126052413139, "grad_norm": 1.1458870845690898, "learning_rate": 9.449243223382975e-06, "loss": 0.1311, "step": 24364 }, { "epoch": 2.889244634175264, "grad_norm": 0.3807697516126871, "learning_rate": 9.447363828633363e-06, "loss": 0.0539, "step": 24365 }, { "epoch": 2.8893632159373888, "grad_norm": 0.4034703162545005, "learning_rate": 9.445484577259983e-06, "loss": 0.0547, "step": 24366 }, { "epoch": 2.889481797699514, "grad_norm": 0.9891553278309152, "learning_rate": 9.443605469280164e-06, "loss": 0.0882, "step": 24367 }, { "epoch": 2.8896003794616387, "grad_norm": 0.6525119841265319, "learning_rate": 9.441726504711232e-06, "loss": 0.1, "step": 24368 }, { "epoch": 2.889718961223764, "grad_norm": 0.5659834154733822, "learning_rate": 9.439847683570489e-06, "loss": 0.0734, "step": 24369 }, { "epoch": 2.8898375429858887, "grad_norm": 0.8162560992255016, "learning_rate": 9.437969005875283e-06, "loss": 0.0949, "step": 24370 }, { "epoch": 2.889956124748014, "grad_norm": 0.6962717405158366, "learning_rate": 9.43609047164291e-06, "loss": 0.1025, "step": 24371 }, { "epoch": 2.8900747065101386, "grad_norm": 0.42315393074092034, "learning_rate": 9.434212080890701e-06, "loss": 0.0573, "step": 24372 }, { "epoch": 2.890193288272264, "grad_norm": 0.7971086082345452, "learning_rate": 9.432333833635967e-06, "loss": 0.0994, "step": 24373 }, { "epoch": 2.8903118700343886, "grad_norm": 0.65848409628581, "learning_rate": 9.430455729896023e-06, "loss": 0.0787, "step": 24374 }, { "epoch": 2.890430451796514, "grad_norm": 0.5216415343184689, "learning_rate": 9.428577769688196e-06, "loss": 0.0656, "step": 24375 }, { "epoch": 2.8905490335586386, "grad_norm": 0.6942812947054995, "learning_rate": 9.426699953029774e-06, "loss": 0.09, "step": 24376 }, { "epoch": 2.8906676153207638, "grad_norm": 0.7042348763294157, "learning_rate": 9.424822279938086e-06, "loss": 0.0824, "step": 24377 }, { "epoch": 2.8907861970828885, "grad_norm": 0.7851473135587312, "learning_rate": 9.422944750430435e-06, "loss": 0.101, "step": 24378 }, { "epoch": 2.8909047788450137, "grad_norm": 0.4234522563269217, "learning_rate": 9.42106736452414e-06, "loss": 0.0588, "step": 24379 }, { "epoch": 2.8910233606071385, "grad_norm": 0.6876657046413623, "learning_rate": 9.419190122236482e-06, "loss": 0.0896, "step": 24380 }, { "epoch": 2.8911419423692637, "grad_norm": 0.6504259445395477, "learning_rate": 9.417313023584803e-06, "loss": 0.0816, "step": 24381 }, { "epoch": 2.8912605241313885, "grad_norm": 0.7393012652867903, "learning_rate": 9.41543606858638e-06, "loss": 0.1044, "step": 24382 }, { "epoch": 2.8913791058935137, "grad_norm": 0.502014777861103, "learning_rate": 9.413559257258524e-06, "loss": 0.0644, "step": 24383 }, { "epoch": 2.8914976876556384, "grad_norm": 0.6868119770883993, "learning_rate": 9.411682589618543e-06, "loss": 0.0756, "step": 24384 }, { "epoch": 2.8916162694177636, "grad_norm": 0.5558671708511199, "learning_rate": 9.40980606568373e-06, "loss": 0.0747, "step": 24385 }, { "epoch": 2.8917348511798884, "grad_norm": 0.5726825057178048, "learning_rate": 9.407929685471395e-06, "loss": 0.0891, "step": 24386 }, { "epoch": 2.8918534329420136, "grad_norm": 0.6441412159571398, "learning_rate": 9.406053448998819e-06, "loss": 0.0852, "step": 24387 }, { "epoch": 2.8919720147041383, "grad_norm": 0.6762293130620526, "learning_rate": 9.404177356283309e-06, "loss": 0.0689, "step": 24388 }, { "epoch": 2.8920905964662635, "grad_norm": 0.6945957574846151, "learning_rate": 9.40230140734216e-06, "loss": 0.1039, "step": 24389 }, { "epoch": 2.8922091782283887, "grad_norm": 0.7994336800333349, "learning_rate": 9.400425602192662e-06, "loss": 0.0877, "step": 24390 }, { "epoch": 2.8923277599905135, "grad_norm": 0.7044501523660955, "learning_rate": 9.39854994085212e-06, "loss": 0.0936, "step": 24391 }, { "epoch": 2.8924463417526383, "grad_norm": 0.7111586515637429, "learning_rate": 9.396674423337799e-06, "loss": 0.0928, "step": 24392 }, { "epoch": 2.8925649235147635, "grad_norm": 0.9183795624038582, "learning_rate": 9.394799049667021e-06, "loss": 0.0935, "step": 24393 }, { "epoch": 2.8926835052768887, "grad_norm": 0.5158507866371822, "learning_rate": 9.392923819857055e-06, "loss": 0.0784, "step": 24394 }, { "epoch": 2.8928020870390134, "grad_norm": 0.6117648310510746, "learning_rate": 9.39104873392519e-06, "loss": 0.0909, "step": 24395 }, { "epoch": 2.892920668801138, "grad_norm": 0.601908795627444, "learning_rate": 9.389173791888715e-06, "loss": 0.076, "step": 24396 }, { "epoch": 2.8930392505632634, "grad_norm": 0.5880765924974992, "learning_rate": 9.387298993764922e-06, "loss": 0.0702, "step": 24397 }, { "epoch": 2.8931578323253886, "grad_norm": 0.7512907466270561, "learning_rate": 9.385424339571081e-06, "loss": 0.1018, "step": 24398 }, { "epoch": 2.8932764140875133, "grad_norm": 0.8946032357995368, "learning_rate": 9.383549829324478e-06, "loss": 0.1084, "step": 24399 }, { "epoch": 2.893394995849638, "grad_norm": 0.5402999645496737, "learning_rate": 9.381675463042394e-06, "loss": 0.0705, "step": 24400 }, { "epoch": 2.8935135776117633, "grad_norm": 0.8624302092110202, "learning_rate": 9.37980124074211e-06, "loss": 0.1123, "step": 24401 }, { "epoch": 2.8936321593738885, "grad_norm": 0.5860202833026167, "learning_rate": 9.377927162440917e-06, "loss": 0.0664, "step": 24402 }, { "epoch": 2.8937507411360133, "grad_norm": 0.5715373941525547, "learning_rate": 9.376053228156056e-06, "loss": 0.0807, "step": 24403 }, { "epoch": 2.893869322898138, "grad_norm": 0.5323641539691534, "learning_rate": 9.374179437904846e-06, "loss": 0.0573, "step": 24404 }, { "epoch": 2.8939879046602632, "grad_norm": 0.8318316935757017, "learning_rate": 9.372305791704527e-06, "loss": 0.1016, "step": 24405 }, { "epoch": 2.8941064864223884, "grad_norm": 0.6011276712961015, "learning_rate": 9.370432289572398e-06, "loss": 0.078, "step": 24406 }, { "epoch": 2.894225068184513, "grad_norm": 0.43058510779488085, "learning_rate": 9.368558931525696e-06, "loss": 0.072, "step": 24407 }, { "epoch": 2.894343649946638, "grad_norm": 0.7320039763255602, "learning_rate": 9.36668571758173e-06, "loss": 0.0969, "step": 24408 }, { "epoch": 2.894462231708763, "grad_norm": 0.6817685973738036, "learning_rate": 9.364812647757743e-06, "loss": 0.0812, "step": 24409 }, { "epoch": 2.8945808134708884, "grad_norm": 0.576598086303818, "learning_rate": 9.36293972207101e-06, "loss": 0.0671, "step": 24410 }, { "epoch": 2.894699395233013, "grad_norm": 0.3730749033286328, "learning_rate": 9.3610669405388e-06, "loss": 0.0493, "step": 24411 }, { "epoch": 2.894817976995138, "grad_norm": 0.451824469195757, "learning_rate": 9.359194303178371e-06, "loss": 0.0548, "step": 24412 }, { "epoch": 2.894936558757263, "grad_norm": 1.133622254817231, "learning_rate": 9.357321810007e-06, "loss": 0.1346, "step": 24413 }, { "epoch": 2.8950551405193883, "grad_norm": 0.3305987360203168, "learning_rate": 9.355449461041923e-06, "loss": 0.0381, "step": 24414 }, { "epoch": 2.895173722281513, "grad_norm": 0.36311354093017156, "learning_rate": 9.353577256300435e-06, "loss": 0.0426, "step": 24415 }, { "epoch": 2.8952923040436382, "grad_norm": 0.8278988080684128, "learning_rate": 9.351705195799771e-06, "loss": 0.1084, "step": 24416 }, { "epoch": 2.895410885805763, "grad_norm": 0.6267428306899921, "learning_rate": 9.349833279557202e-06, "loss": 0.082, "step": 24417 }, { "epoch": 2.895529467567888, "grad_norm": 1.1037011704479522, "learning_rate": 9.347961507589961e-06, "loss": 0.1001, "step": 24418 }, { "epoch": 2.895648049330013, "grad_norm": 0.7263607865456848, "learning_rate": 9.34608987991534e-06, "loss": 0.0855, "step": 24419 }, { "epoch": 2.895766631092138, "grad_norm": 0.6786409577478659, "learning_rate": 9.344218396550563e-06, "loss": 0.0746, "step": 24420 }, { "epoch": 2.895885212854263, "grad_norm": 0.6822850777241748, "learning_rate": 9.342347057512896e-06, "loss": 0.0854, "step": 24421 }, { "epoch": 2.896003794616388, "grad_norm": 0.6351803439046501, "learning_rate": 9.34047586281959e-06, "loss": 0.0739, "step": 24422 }, { "epoch": 2.896122376378513, "grad_norm": 0.4921996806161358, "learning_rate": 9.33860481248789e-06, "loss": 0.0575, "step": 24423 }, { "epoch": 2.896240958140638, "grad_norm": 0.7005099242206372, "learning_rate": 9.336733906535059e-06, "loss": 0.0818, "step": 24424 }, { "epoch": 2.896359539902763, "grad_norm": 0.7423659402073587, "learning_rate": 9.334863144978318e-06, "loss": 0.0861, "step": 24425 }, { "epoch": 2.896478121664888, "grad_norm": 0.3840682519044292, "learning_rate": 9.332992527834944e-06, "loss": 0.0531, "step": 24426 }, { "epoch": 2.896596703427013, "grad_norm": 0.7369431238156041, "learning_rate": 9.331122055122158e-06, "loss": 0.0633, "step": 24427 }, { "epoch": 2.896715285189138, "grad_norm": 0.7283524528976126, "learning_rate": 9.329251726857222e-06, "loss": 0.0974, "step": 24428 }, { "epoch": 2.8968338669512628, "grad_norm": 0.5694119997508854, "learning_rate": 9.327381543057349e-06, "loss": 0.0766, "step": 24429 }, { "epoch": 2.896952448713388, "grad_norm": 1.098741576332052, "learning_rate": 9.325511503739817e-06, "loss": 0.1002, "step": 24430 }, { "epoch": 2.8970710304755127, "grad_norm": 0.5077075480278486, "learning_rate": 9.32364160892184e-06, "loss": 0.0584, "step": 24431 }, { "epoch": 2.897189612237638, "grad_norm": 0.7167714328443007, "learning_rate": 9.321771858620665e-06, "loss": 0.1003, "step": 24432 }, { "epoch": 2.8973081939997627, "grad_norm": 0.7894765224753961, "learning_rate": 9.319902252853527e-06, "loss": 0.1041, "step": 24433 }, { "epoch": 2.897426775761888, "grad_norm": 0.725368344593293, "learning_rate": 9.31803279163766e-06, "loss": 0.0925, "step": 24434 }, { "epoch": 2.8975453575240127, "grad_norm": 0.8385608624953019, "learning_rate": 9.316163474990311e-06, "loss": 0.1088, "step": 24435 }, { "epoch": 2.897663939286138, "grad_norm": 0.7699757125301655, "learning_rate": 9.314294302928691e-06, "loss": 0.0917, "step": 24436 }, { "epoch": 2.8977825210482626, "grad_norm": 0.6906869065211082, "learning_rate": 9.312425275470043e-06, "loss": 0.1035, "step": 24437 }, { "epoch": 2.897901102810388, "grad_norm": 0.8013822598569222, "learning_rate": 9.310556392631597e-06, "loss": 0.0961, "step": 24438 }, { "epoch": 2.898019684572513, "grad_norm": 0.8903038952759228, "learning_rate": 9.308687654430585e-06, "loss": 0.1272, "step": 24439 }, { "epoch": 2.898138266334638, "grad_norm": 0.7303464823845696, "learning_rate": 9.306819060884226e-06, "loss": 0.0758, "step": 24440 }, { "epoch": 2.8982568480967625, "grad_norm": 0.59755484331057, "learning_rate": 9.304950612009753e-06, "loss": 0.0918, "step": 24441 }, { "epoch": 2.8983754298588877, "grad_norm": 0.6232029480684115, "learning_rate": 9.303082307824395e-06, "loss": 0.0947, "step": 24442 }, { "epoch": 2.898494011621013, "grad_norm": 0.7703395507076388, "learning_rate": 9.301214148345364e-06, "loss": 0.0967, "step": 24443 }, { "epoch": 2.8986125933831377, "grad_norm": 0.933209401348064, "learning_rate": 9.299346133589886e-06, "loss": 0.1114, "step": 24444 }, { "epoch": 2.8987311751452625, "grad_norm": 0.6357070257844913, "learning_rate": 9.297478263575184e-06, "loss": 0.0796, "step": 24445 }, { "epoch": 2.8988497569073877, "grad_norm": 0.3550172406324586, "learning_rate": 9.295610538318486e-06, "loss": 0.0417, "step": 24446 }, { "epoch": 2.898968338669513, "grad_norm": 0.6243039435477831, "learning_rate": 9.29374295783699e-06, "loss": 0.0797, "step": 24447 }, { "epoch": 2.8990869204316376, "grad_norm": 0.5108711405090849, "learning_rate": 9.291875522147924e-06, "loss": 0.0426, "step": 24448 }, { "epoch": 2.8992055021937624, "grad_norm": 0.4678394379535922, "learning_rate": 9.290008231268505e-06, "loss": 0.0477, "step": 24449 }, { "epoch": 2.8993240839558876, "grad_norm": 0.7694579120462469, "learning_rate": 9.288141085215943e-06, "loss": 0.0981, "step": 24450 }, { "epoch": 2.899442665718013, "grad_norm": 0.9523959120077785, "learning_rate": 9.286274084007454e-06, "loss": 0.1234, "step": 24451 }, { "epoch": 2.8995612474801375, "grad_norm": 0.705285322885866, "learning_rate": 9.28440722766025e-06, "loss": 0.0769, "step": 24452 }, { "epoch": 2.8996798292422623, "grad_norm": 0.6984090073632909, "learning_rate": 9.282540516191545e-06, "loss": 0.114, "step": 24453 }, { "epoch": 2.8997984110043875, "grad_norm": 0.8073916135253997, "learning_rate": 9.280673949618534e-06, "loss": 0.1105, "step": 24454 }, { "epoch": 2.8999169927665127, "grad_norm": 0.6445166714780363, "learning_rate": 9.278807527958435e-06, "loss": 0.0839, "step": 24455 }, { "epoch": 2.9000355745286375, "grad_norm": 0.6656506292748096, "learning_rate": 9.276941251228452e-06, "loss": 0.0686, "step": 24456 }, { "epoch": 2.9001541562907622, "grad_norm": 0.5051717528775664, "learning_rate": 9.275075119445798e-06, "loss": 0.0516, "step": 24457 }, { "epoch": 2.9002727380528874, "grad_norm": 0.4461358401353634, "learning_rate": 9.27320913262766e-06, "loss": 0.0654, "step": 24458 }, { "epoch": 2.9003913198150126, "grad_norm": 0.9245493698019928, "learning_rate": 9.271343290791249e-06, "loss": 0.1175, "step": 24459 }, { "epoch": 2.9005099015771374, "grad_norm": 0.6063487620495317, "learning_rate": 9.269477593953762e-06, "loss": 0.0825, "step": 24460 }, { "epoch": 2.900628483339262, "grad_norm": 0.6254729943483786, "learning_rate": 9.267612042132403e-06, "loss": 0.0759, "step": 24461 }, { "epoch": 2.9007470651013874, "grad_norm": 0.8160922059710519, "learning_rate": 9.26574663534438e-06, "loss": 0.1162, "step": 24462 }, { "epoch": 2.9008656468635126, "grad_norm": 0.47777382987558253, "learning_rate": 9.263881373606859e-06, "loss": 0.0627, "step": 24463 }, { "epoch": 2.9009842286256373, "grad_norm": 0.5868199287943977, "learning_rate": 9.26201625693707e-06, "loss": 0.0717, "step": 24464 }, { "epoch": 2.9011028103877625, "grad_norm": 0.6381514042180989, "learning_rate": 9.260151285352187e-06, "loss": 0.0562, "step": 24465 }, { "epoch": 2.9012213921498873, "grad_norm": 0.7438409307350594, "learning_rate": 9.258286458869405e-06, "loss": 0.094, "step": 24466 }, { "epoch": 2.9013399739120125, "grad_norm": 0.6820733851374595, "learning_rate": 9.256421777505919e-06, "loss": 0.0845, "step": 24467 }, { "epoch": 2.9014585556741372, "grad_norm": 0.5114040887237391, "learning_rate": 9.254557241278928e-06, "loss": 0.0645, "step": 24468 }, { "epoch": 2.9015771374362624, "grad_norm": 0.7137435960198472, "learning_rate": 9.252692850205599e-06, "loss": 0.1033, "step": 24469 }, { "epoch": 2.901695719198387, "grad_norm": 0.7255936348872871, "learning_rate": 9.250828604303133e-06, "loss": 0.0897, "step": 24470 }, { "epoch": 2.9018143009605124, "grad_norm": 0.6479313906339416, "learning_rate": 9.248964503588715e-06, "loss": 0.099, "step": 24471 }, { "epoch": 2.901932882722637, "grad_norm": 0.5478125446699276, "learning_rate": 9.24710054807953e-06, "loss": 0.0894, "step": 24472 }, { "epoch": 2.9020514644847624, "grad_norm": 0.767984061716492, "learning_rate": 9.245236737792768e-06, "loss": 0.0915, "step": 24473 }, { "epoch": 2.902170046246887, "grad_norm": 0.9377884915823741, "learning_rate": 9.243373072745585e-06, "loss": 0.1196, "step": 24474 }, { "epoch": 2.9022886280090123, "grad_norm": 0.6489635688087836, "learning_rate": 9.2415095529552e-06, "loss": 0.0896, "step": 24475 }, { "epoch": 2.902407209771137, "grad_norm": 0.6365565667463203, "learning_rate": 9.23964617843876e-06, "loss": 0.0963, "step": 24476 }, { "epoch": 2.9025257915332623, "grad_norm": 0.6661926398774086, "learning_rate": 9.237782949213466e-06, "loss": 0.0961, "step": 24477 }, { "epoch": 2.902644373295387, "grad_norm": 0.7194923628748076, "learning_rate": 9.23591986529647e-06, "loss": 0.1103, "step": 24478 }, { "epoch": 2.9027629550575123, "grad_norm": 0.9918467559368086, "learning_rate": 9.234056926704976e-06, "loss": 0.1236, "step": 24479 }, { "epoch": 2.902881536819637, "grad_norm": 0.5835459686085515, "learning_rate": 9.232194133456134e-06, "loss": 0.0724, "step": 24480 }, { "epoch": 2.903000118581762, "grad_norm": 0.5508011278913136, "learning_rate": 9.230331485567131e-06, "loss": 0.0694, "step": 24481 }, { "epoch": 2.903118700343887, "grad_norm": 0.7816489417372173, "learning_rate": 9.22846898305513e-06, "loss": 0.0951, "step": 24482 }, { "epoch": 2.903237282106012, "grad_norm": 0.5933175231711821, "learning_rate": 9.226606625937306e-06, "loss": 0.0791, "step": 24483 }, { "epoch": 2.903355863868137, "grad_norm": 0.5982353723259934, "learning_rate": 9.224744414230831e-06, "loss": 0.087, "step": 24484 }, { "epoch": 2.903474445630262, "grad_norm": 0.4518604811592768, "learning_rate": 9.222882347952854e-06, "loss": 0.0639, "step": 24485 }, { "epoch": 2.903593027392387, "grad_norm": 0.8069695632948581, "learning_rate": 9.221020427120575e-06, "loss": 0.1073, "step": 24486 }, { "epoch": 2.903711609154512, "grad_norm": 0.8973973889046186, "learning_rate": 9.219158651751126e-06, "loss": 0.0906, "step": 24487 }, { "epoch": 2.903830190916637, "grad_norm": 0.926409502691092, "learning_rate": 9.21729702186169e-06, "loss": 0.1166, "step": 24488 }, { "epoch": 2.903948772678762, "grad_norm": 0.6029451563648202, "learning_rate": 9.215435537469406e-06, "loss": 0.066, "step": 24489 }, { "epoch": 2.904067354440887, "grad_norm": 0.5686746230181288, "learning_rate": 9.213574198591465e-06, "loss": 0.0661, "step": 24490 }, { "epoch": 2.904185936203012, "grad_norm": 0.6417171781409, "learning_rate": 9.211713005245003e-06, "loss": 0.1014, "step": 24491 }, { "epoch": 2.9043045179651372, "grad_norm": 0.7428777712043617, "learning_rate": 9.209851957447187e-06, "loss": 0.0774, "step": 24492 }, { "epoch": 2.904423099727262, "grad_norm": 0.8235882852695118, "learning_rate": 9.207991055215175e-06, "loss": 0.117, "step": 24493 }, { "epoch": 2.9045416814893867, "grad_norm": 0.5725987878980657, "learning_rate": 9.206130298566117e-06, "loss": 0.0778, "step": 24494 }, { "epoch": 2.904660263251512, "grad_norm": 0.5383689519344482, "learning_rate": 9.204269687517181e-06, "loss": 0.0663, "step": 24495 }, { "epoch": 2.904778845013637, "grad_norm": 0.5083346150311777, "learning_rate": 9.202409222085493e-06, "loss": 0.0661, "step": 24496 }, { "epoch": 2.904897426775762, "grad_norm": 0.7417845490488046, "learning_rate": 9.200548902288231e-06, "loss": 0.1042, "step": 24497 }, { "epoch": 2.9050160085378867, "grad_norm": 0.7414906795176864, "learning_rate": 9.19868872814253e-06, "loss": 0.0968, "step": 24498 }, { "epoch": 2.905134590300012, "grad_norm": 0.8376180114083874, "learning_rate": 9.196828699665538e-06, "loss": 0.1204, "step": 24499 }, { "epoch": 2.905253172062137, "grad_norm": 0.7097875189676165, "learning_rate": 9.19496881687441e-06, "loss": 0.0755, "step": 24500 }, { "epoch": 2.905371753824262, "grad_norm": 0.4913583882469641, "learning_rate": 9.193109079786285e-06, "loss": 0.0628, "step": 24501 }, { "epoch": 2.9054903355863866, "grad_norm": 0.47536910532375665, "learning_rate": 9.191249488418322e-06, "loss": 0.0659, "step": 24502 }, { "epoch": 2.905608917348512, "grad_norm": 0.7220503525035675, "learning_rate": 9.189390042787644e-06, "loss": 0.117, "step": 24503 }, { "epoch": 2.905727499110637, "grad_norm": 0.5324155276899389, "learning_rate": 9.187530742911399e-06, "loss": 0.0709, "step": 24504 }, { "epoch": 2.9058460808727617, "grad_norm": 0.7622600092772447, "learning_rate": 9.18567158880673e-06, "loss": 0.0814, "step": 24505 }, { "epoch": 2.9059646626348865, "grad_norm": 0.8023893866944384, "learning_rate": 9.183812580490786e-06, "loss": 0.087, "step": 24506 }, { "epoch": 2.9060832443970117, "grad_norm": 0.6054624450717911, "learning_rate": 9.181953717980687e-06, "loss": 0.0791, "step": 24507 }, { "epoch": 2.906201826159137, "grad_norm": 0.786739984315748, "learning_rate": 9.180095001293575e-06, "loss": 0.1054, "step": 24508 }, { "epoch": 2.9063204079212617, "grad_norm": 1.0980185522615622, "learning_rate": 9.17823643044659e-06, "loss": 0.1262, "step": 24509 }, { "epoch": 2.9064389896833864, "grad_norm": 0.6681452202792318, "learning_rate": 9.176378005456865e-06, "loss": 0.0604, "step": 24510 }, { "epoch": 2.9065575714455116, "grad_norm": 0.6578010767055299, "learning_rate": 9.174519726341527e-06, "loss": 0.0826, "step": 24511 }, { "epoch": 2.906676153207637, "grad_norm": 0.3885663889487954, "learning_rate": 9.172661593117712e-06, "loss": 0.0522, "step": 24512 }, { "epoch": 2.9067947349697616, "grad_norm": 0.800850309857716, "learning_rate": 9.170803605802555e-06, "loss": 0.086, "step": 24513 }, { "epoch": 2.906913316731887, "grad_norm": 0.5484241021453575, "learning_rate": 9.16894576441317e-06, "loss": 0.0689, "step": 24514 }, { "epoch": 2.9070318984940116, "grad_norm": 0.6259210803973991, "learning_rate": 9.167088068966694e-06, "loss": 0.0688, "step": 24515 }, { "epoch": 2.9071504802561368, "grad_norm": 0.8748246710085603, "learning_rate": 9.165230519480247e-06, "loss": 0.0807, "step": 24516 }, { "epoch": 2.9072690620182615, "grad_norm": 0.48554359081651755, "learning_rate": 9.16337311597097e-06, "loss": 0.0424, "step": 24517 }, { "epoch": 2.9073876437803867, "grad_norm": 0.995207417158025, "learning_rate": 9.16151585845596e-06, "loss": 0.1192, "step": 24518 }, { "epoch": 2.9075062255425115, "grad_norm": 0.5110789422574848, "learning_rate": 9.159658746952352e-06, "loss": 0.0619, "step": 24519 }, { "epoch": 2.9076248073046367, "grad_norm": 0.526599794379962, "learning_rate": 9.157801781477266e-06, "loss": 0.0687, "step": 24520 }, { "epoch": 2.9077433890667614, "grad_norm": 0.7276440937991232, "learning_rate": 9.155944962047821e-06, "loss": 0.0821, "step": 24521 }, { "epoch": 2.9078619708288866, "grad_norm": 0.5814665249325005, "learning_rate": 9.154088288681142e-06, "loss": 0.0678, "step": 24522 }, { "epoch": 2.9079805525910114, "grad_norm": 0.8252102173866787, "learning_rate": 9.152231761394323e-06, "loss": 0.1182, "step": 24523 }, { "epoch": 2.9080991343531366, "grad_norm": 0.5587723778567326, "learning_rate": 9.150375380204507e-06, "loss": 0.0843, "step": 24524 }, { "epoch": 2.9082177161152614, "grad_norm": 0.48529664469034295, "learning_rate": 9.148519145128786e-06, "loss": 0.0573, "step": 24525 }, { "epoch": 2.9083362978773866, "grad_norm": 0.4851058338169155, "learning_rate": 9.146663056184282e-06, "loss": 0.0655, "step": 24526 }, { "epoch": 2.9084548796395113, "grad_norm": 0.7755412264867338, "learning_rate": 9.144807113388101e-06, "loss": 0.1089, "step": 24527 }, { "epoch": 2.9085734614016365, "grad_norm": 0.7449878408653902, "learning_rate": 9.142951316757367e-06, "loss": 0.1039, "step": 24528 }, { "epoch": 2.9086920431637613, "grad_norm": 1.0050036259990787, "learning_rate": 9.141095666309165e-06, "loss": 0.1068, "step": 24529 }, { "epoch": 2.9088106249258865, "grad_norm": 0.5594837307832465, "learning_rate": 9.139240162060617e-06, "loss": 0.075, "step": 24530 }, { "epoch": 2.9089292066880112, "grad_norm": 0.6296518455637233, "learning_rate": 9.137384804028823e-06, "loss": 0.0724, "step": 24531 }, { "epoch": 2.9090477884501365, "grad_norm": 0.7503108575886691, "learning_rate": 9.13552959223089e-06, "loss": 0.0748, "step": 24532 }, { "epoch": 2.909166370212261, "grad_norm": 0.5293340427527681, "learning_rate": 9.133674526683927e-06, "loss": 0.071, "step": 24533 }, { "epoch": 2.9092849519743864, "grad_norm": 0.9763628532769871, "learning_rate": 9.131819607405012e-06, "loss": 0.1304, "step": 24534 }, { "epoch": 2.909403533736511, "grad_norm": 0.824233451163118, "learning_rate": 9.129964834411281e-06, "loss": 0.0667, "step": 24535 }, { "epoch": 2.9095221154986364, "grad_norm": 0.6235406387976102, "learning_rate": 9.128110207719801e-06, "loss": 0.0802, "step": 24536 }, { "epoch": 2.909640697260761, "grad_norm": 0.4718102426395717, "learning_rate": 9.126255727347683e-06, "loss": 0.0693, "step": 24537 }, { "epoch": 2.9097592790228863, "grad_norm": 0.5091859969568697, "learning_rate": 9.12440139331202e-06, "loss": 0.0465, "step": 24538 }, { "epoch": 2.909877860785011, "grad_norm": 0.7585168609901312, "learning_rate": 9.12254720562992e-06, "loss": 0.0785, "step": 24539 }, { "epoch": 2.9099964425471363, "grad_norm": 1.0817770864806353, "learning_rate": 9.120693164318452e-06, "loss": 0.1564, "step": 24540 }, { "epoch": 2.9101150243092615, "grad_norm": 0.5578094042262853, "learning_rate": 9.118839269394722e-06, "loss": 0.0726, "step": 24541 }, { "epoch": 2.9102336060713863, "grad_norm": 0.7299095670134237, "learning_rate": 9.116985520875821e-06, "loss": 0.075, "step": 24542 }, { "epoch": 2.910352187833511, "grad_norm": 0.9523248345298998, "learning_rate": 9.115131918778835e-06, "loss": 0.1027, "step": 24543 }, { "epoch": 2.910470769595636, "grad_norm": 0.8507416103598583, "learning_rate": 9.11327846312086e-06, "loss": 0.1034, "step": 24544 }, { "epoch": 2.9105893513577614, "grad_norm": 0.5802364976262676, "learning_rate": 9.111425153918962e-06, "loss": 0.0726, "step": 24545 }, { "epoch": 2.910707933119886, "grad_norm": 0.596298680937823, "learning_rate": 9.109571991190254e-06, "loss": 0.0893, "step": 24546 }, { "epoch": 2.910826514882011, "grad_norm": 0.551890142620337, "learning_rate": 9.1077189749518e-06, "loss": 0.0654, "step": 24547 }, { "epoch": 2.910945096644136, "grad_norm": 0.5866829205007619, "learning_rate": 9.10586610522069e-06, "loss": 0.0832, "step": 24548 }, { "epoch": 2.9110636784062613, "grad_norm": 0.5478386057304628, "learning_rate": 9.104013382014e-06, "loss": 0.0798, "step": 24549 }, { "epoch": 2.911182260168386, "grad_norm": 0.35101596617253017, "learning_rate": 9.102160805348817e-06, "loss": 0.0426, "step": 24550 }, { "epoch": 2.911300841930511, "grad_norm": 0.724655782747847, "learning_rate": 9.100308375242225e-06, "loss": 0.073, "step": 24551 }, { "epoch": 2.911419423692636, "grad_norm": 0.6054737068192145, "learning_rate": 9.098456091711275e-06, "loss": 0.0683, "step": 24552 }, { "epoch": 2.9115380054547613, "grad_norm": 0.5593408177447576, "learning_rate": 9.096603954773075e-06, "loss": 0.0668, "step": 24553 }, { "epoch": 2.911656587216886, "grad_norm": 0.6647969582815879, "learning_rate": 9.094751964444676e-06, "loss": 0.0935, "step": 24554 }, { "epoch": 2.911775168979011, "grad_norm": 0.945188173537514, "learning_rate": 9.09290012074317e-06, "loss": 0.1139, "step": 24555 }, { "epoch": 2.911893750741136, "grad_norm": 0.5777541736012495, "learning_rate": 9.0910484236856e-06, "loss": 0.0785, "step": 24556 }, { "epoch": 2.912012332503261, "grad_norm": 0.6449401310609532, "learning_rate": 9.089196873289071e-06, "loss": 0.0934, "step": 24557 }, { "epoch": 2.912130914265386, "grad_norm": 0.8397985425881849, "learning_rate": 9.087345469570629e-06, "loss": 0.0864, "step": 24558 }, { "epoch": 2.9122494960275107, "grad_norm": 0.7867180067438102, "learning_rate": 9.085494212547346e-06, "loss": 0.1244, "step": 24559 }, { "epoch": 2.912368077789636, "grad_norm": 0.6550225224911509, "learning_rate": 9.083643102236294e-06, "loss": 0.0696, "step": 24560 }, { "epoch": 2.912486659551761, "grad_norm": 0.7065264655061265, "learning_rate": 9.081792138654533e-06, "loss": 0.0969, "step": 24561 }, { "epoch": 2.912605241313886, "grad_norm": 0.5905222804629374, "learning_rate": 9.079941321819135e-06, "loss": 0.0954, "step": 24562 }, { "epoch": 2.912723823076011, "grad_norm": 0.3338167647359463, "learning_rate": 9.078090651747151e-06, "loss": 0.0432, "step": 24563 }, { "epoch": 2.912842404838136, "grad_norm": 0.5704439065255432, "learning_rate": 9.076240128455644e-06, "loss": 0.0766, "step": 24564 }, { "epoch": 2.912960986600261, "grad_norm": 0.5826241571123815, "learning_rate": 9.074389751961677e-06, "loss": 0.0895, "step": 24565 }, { "epoch": 2.913079568362386, "grad_norm": 0.9833514042613896, "learning_rate": 9.072539522282314e-06, "loss": 0.1309, "step": 24566 }, { "epoch": 2.913198150124511, "grad_norm": 0.7677964088990966, "learning_rate": 9.070689439434587e-06, "loss": 0.1017, "step": 24567 }, { "epoch": 2.9133167318866358, "grad_norm": 0.715549241582121, "learning_rate": 9.06883950343559e-06, "loss": 0.0798, "step": 24568 }, { "epoch": 2.913435313648761, "grad_norm": 0.48223449142172586, "learning_rate": 9.066989714302345e-06, "loss": 0.0594, "step": 24569 }, { "epoch": 2.9135538954108857, "grad_norm": 0.8447312785872844, "learning_rate": 9.06514007205192e-06, "loss": 0.0853, "step": 24570 }, { "epoch": 2.913672477173011, "grad_norm": 0.6170664255268716, "learning_rate": 9.063290576701362e-06, "loss": 0.0821, "step": 24571 }, { "epoch": 2.9137910589351357, "grad_norm": 0.5266074273398179, "learning_rate": 9.06144122826772e-06, "loss": 0.0628, "step": 24572 }, { "epoch": 2.913909640697261, "grad_norm": 0.8998910404972797, "learning_rate": 9.059592026768054e-06, "loss": 0.1018, "step": 24573 }, { "epoch": 2.9140282224593856, "grad_norm": 0.7679503312634754, "learning_rate": 9.057742972219397e-06, "loss": 0.0987, "step": 24574 }, { "epoch": 2.914146804221511, "grad_norm": 0.8251374611790814, "learning_rate": 9.055894064638795e-06, "loss": 0.1047, "step": 24575 }, { "epoch": 2.9142653859836356, "grad_norm": 0.7020711741097312, "learning_rate": 9.0540453040433e-06, "loss": 0.0885, "step": 24576 }, { "epoch": 2.914383967745761, "grad_norm": 0.4692628626252707, "learning_rate": 9.052196690449963e-06, "loss": 0.0612, "step": 24577 }, { "epoch": 2.9145025495078856, "grad_norm": 0.4992200649538469, "learning_rate": 9.050348223875804e-06, "loss": 0.0584, "step": 24578 }, { "epoch": 2.9146211312700108, "grad_norm": 0.8157333203874256, "learning_rate": 9.04849990433788e-06, "loss": 0.0956, "step": 24579 }, { "epoch": 2.9147397130321355, "grad_norm": 0.7884160770057916, "learning_rate": 9.046651731853223e-06, "loss": 0.0969, "step": 24580 }, { "epoch": 2.9148582947942607, "grad_norm": 0.7720295596017132, "learning_rate": 9.044803706438873e-06, "loss": 0.0865, "step": 24581 }, { "epoch": 2.9149768765563855, "grad_norm": 0.5977832833181875, "learning_rate": 9.042955828111868e-06, "loss": 0.082, "step": 24582 }, { "epoch": 2.9150954583185107, "grad_norm": 0.5264287908229192, "learning_rate": 9.041108096889242e-06, "loss": 0.0588, "step": 24583 }, { "epoch": 2.9152140400806354, "grad_norm": 0.5750669073661085, "learning_rate": 9.03926051278804e-06, "loss": 0.0676, "step": 24584 }, { "epoch": 2.9153326218427607, "grad_norm": 0.7687242926380207, "learning_rate": 9.037413075825275e-06, "loss": 0.0709, "step": 24585 }, { "epoch": 2.9154512036048854, "grad_norm": 0.9981743438269084, "learning_rate": 9.035565786017986e-06, "loss": 0.1376, "step": 24586 }, { "epoch": 2.9155697853670106, "grad_norm": 0.6868287604211158, "learning_rate": 9.033718643383201e-06, "loss": 0.0954, "step": 24587 }, { "epoch": 2.9156883671291354, "grad_norm": 0.7652729826443756, "learning_rate": 9.031871647937961e-06, "loss": 0.0961, "step": 24588 }, { "epoch": 2.9158069488912606, "grad_norm": 0.7800252313588222, "learning_rate": 9.030024799699276e-06, "loss": 0.0659, "step": 24589 }, { "epoch": 2.9159255306533858, "grad_norm": 0.3879649306763293, "learning_rate": 9.02817809868418e-06, "loss": 0.0509, "step": 24590 }, { "epoch": 2.9160441124155105, "grad_norm": 1.1559761245524864, "learning_rate": 9.026331544909692e-06, "loss": 0.1745, "step": 24591 }, { "epoch": 2.9161626941776353, "grad_norm": 0.539229000152414, "learning_rate": 9.024485138392841e-06, "loss": 0.0707, "step": 24592 }, { "epoch": 2.9162812759397605, "grad_norm": 0.5103741350237563, "learning_rate": 9.022638879150656e-06, "loss": 0.0546, "step": 24593 }, { "epoch": 2.9163998577018857, "grad_norm": 0.903861819156669, "learning_rate": 9.020792767200129e-06, "loss": 0.1348, "step": 24594 }, { "epoch": 2.9165184394640105, "grad_norm": 0.49291135759426813, "learning_rate": 9.018946802558315e-06, "loss": 0.0728, "step": 24595 }, { "epoch": 2.916637021226135, "grad_norm": 0.7130782729136605, "learning_rate": 9.017100985242207e-06, "loss": 0.1008, "step": 24596 }, { "epoch": 2.9167556029882604, "grad_norm": 0.717943051571022, "learning_rate": 9.015255315268828e-06, "loss": 0.1056, "step": 24597 }, { "epoch": 2.9168741847503856, "grad_norm": 0.44462226229293017, "learning_rate": 9.013409792655193e-06, "loss": 0.0512, "step": 24598 }, { "epoch": 2.9169927665125104, "grad_norm": 0.5893542222583511, "learning_rate": 9.011564417418314e-06, "loss": 0.066, "step": 24599 }, { "epoch": 2.917111348274635, "grad_norm": 0.6074012851065281, "learning_rate": 9.009719189575213e-06, "loss": 0.0933, "step": 24600 }, { "epoch": 2.9172299300367603, "grad_norm": 0.9547761506371666, "learning_rate": 9.00787410914288e-06, "loss": 0.1265, "step": 24601 }, { "epoch": 2.9173485117988855, "grad_norm": 0.594829012684177, "learning_rate": 9.00602917613835e-06, "loss": 0.0655, "step": 24602 }, { "epoch": 2.9174670935610103, "grad_norm": 0.8087484301969218, "learning_rate": 9.00418439057861e-06, "loss": 0.129, "step": 24603 }, { "epoch": 2.917585675323135, "grad_norm": 0.4930951070831003, "learning_rate": 9.002339752480684e-06, "loss": 0.0709, "step": 24604 }, { "epoch": 2.9177042570852603, "grad_norm": 0.4632219428155486, "learning_rate": 9.00049526186155e-06, "loss": 0.0583, "step": 24605 }, { "epoch": 2.9178228388473855, "grad_norm": 0.44180306454807344, "learning_rate": 8.998650918738246e-06, "loss": 0.0504, "step": 24606 }, { "epoch": 2.9179414206095102, "grad_norm": 0.9191200044034793, "learning_rate": 8.996806723127751e-06, "loss": 0.1065, "step": 24607 }, { "epoch": 2.918060002371635, "grad_norm": 0.5340725977944338, "learning_rate": 8.994962675047075e-06, "loss": 0.0742, "step": 24608 }, { "epoch": 2.91817858413376, "grad_norm": 0.7451228832175886, "learning_rate": 8.993118774513215e-06, "loss": 0.1164, "step": 24609 }, { "epoch": 2.9182971658958854, "grad_norm": 0.6164350011484476, "learning_rate": 8.991275021543171e-06, "loss": 0.0694, "step": 24610 }, { "epoch": 2.91841574765801, "grad_norm": 0.6182222022102911, "learning_rate": 8.989431416153949e-06, "loss": 0.0914, "step": 24611 }, { "epoch": 2.918534329420135, "grad_norm": 0.6285152632443216, "learning_rate": 8.987587958362517e-06, "loss": 0.0856, "step": 24612 }, { "epoch": 2.91865291118226, "grad_norm": 0.5228464056474311, "learning_rate": 8.98574464818591e-06, "loss": 0.0635, "step": 24613 }, { "epoch": 2.9187714929443853, "grad_norm": 0.5144794743148586, "learning_rate": 8.983901485641085e-06, "loss": 0.0637, "step": 24614 }, { "epoch": 2.91889007470651, "grad_norm": 0.7200460024503357, "learning_rate": 8.98205847074506e-06, "loss": 0.0976, "step": 24615 }, { "epoch": 2.9190086564686353, "grad_norm": 0.7285023560210849, "learning_rate": 8.980215603514797e-06, "loss": 0.1047, "step": 24616 }, { "epoch": 2.91912723823076, "grad_norm": 0.5647822893070963, "learning_rate": 8.978372883967314e-06, "loss": 0.0817, "step": 24617 }, { "epoch": 2.9192458199928852, "grad_norm": 0.6688771391323443, "learning_rate": 8.976530312119583e-06, "loss": 0.0767, "step": 24618 }, { "epoch": 2.91936440175501, "grad_norm": 0.6487078201441365, "learning_rate": 8.97468788798859e-06, "loss": 0.0798, "step": 24619 }, { "epoch": 2.919482983517135, "grad_norm": 0.6702780148704641, "learning_rate": 8.972845611591323e-06, "loss": 0.0873, "step": 24620 }, { "epoch": 2.91960156527926, "grad_norm": 0.7905632255481346, "learning_rate": 8.971003482944767e-06, "loss": 0.0867, "step": 24621 }, { "epoch": 2.919720147041385, "grad_norm": 0.46321215629215806, "learning_rate": 8.969161502065912e-06, "loss": 0.0624, "step": 24622 }, { "epoch": 2.91983872880351, "grad_norm": 0.8309863826109576, "learning_rate": 8.96731966897171e-06, "loss": 0.1168, "step": 24623 }, { "epoch": 2.919957310565635, "grad_norm": 0.6098529348179421, "learning_rate": 8.96547798367918e-06, "loss": 0.07, "step": 24624 }, { "epoch": 2.92007589232776, "grad_norm": 0.6649431780774924, "learning_rate": 8.96363644620527e-06, "loss": 0.1016, "step": 24625 }, { "epoch": 2.920194474089885, "grad_norm": 0.5719213255371928, "learning_rate": 8.961795056566974e-06, "loss": 0.077, "step": 24626 }, { "epoch": 2.92031305585201, "grad_norm": 0.809087186234105, "learning_rate": 8.959953814781247e-06, "loss": 0.0875, "step": 24627 }, { "epoch": 2.920431637614135, "grad_norm": 0.668102695289529, "learning_rate": 8.958112720865089e-06, "loss": 0.0875, "step": 24628 }, { "epoch": 2.92055021937626, "grad_norm": 0.6743606174459467, "learning_rate": 8.956271774835454e-06, "loss": 0.0892, "step": 24629 }, { "epoch": 2.920668801138385, "grad_norm": 0.6593847209950313, "learning_rate": 8.954430976709318e-06, "loss": 0.0885, "step": 24630 }, { "epoch": 2.9207873829005098, "grad_norm": 0.46436674774844977, "learning_rate": 8.952590326503655e-06, "loss": 0.0504, "step": 24631 }, { "epoch": 2.920905964662635, "grad_norm": 0.5396144921450765, "learning_rate": 8.950749824235428e-06, "loss": 0.0581, "step": 24632 }, { "epoch": 2.9210245464247597, "grad_norm": 0.6275604963658415, "learning_rate": 8.948909469921615e-06, "loss": 0.0774, "step": 24633 }, { "epoch": 2.921143128186885, "grad_norm": 0.7750265312496867, "learning_rate": 8.947069263579163e-06, "loss": 0.109, "step": 24634 }, { "epoch": 2.9212617099490097, "grad_norm": 0.6428000810637188, "learning_rate": 8.945229205225051e-06, "loss": 0.0786, "step": 24635 }, { "epoch": 2.921380291711135, "grad_norm": 0.6487431389471783, "learning_rate": 8.943389294876236e-06, "loss": 0.0684, "step": 24636 }, { "epoch": 2.9214988734732596, "grad_norm": 0.498225467813398, "learning_rate": 8.941549532549692e-06, "loss": 0.0725, "step": 24637 }, { "epoch": 2.921617455235385, "grad_norm": 0.396240331672947, "learning_rate": 8.939709918262351e-06, "loss": 0.046, "step": 24638 }, { "epoch": 2.92173603699751, "grad_norm": 0.5892018995995384, "learning_rate": 8.937870452031205e-06, "loss": 0.0692, "step": 24639 }, { "epoch": 2.921854618759635, "grad_norm": 0.7042846627434335, "learning_rate": 8.93603113387319e-06, "loss": 0.0976, "step": 24640 }, { "epoch": 2.9219732005217596, "grad_norm": 0.6233061696148056, "learning_rate": 8.934191963805272e-06, "loss": 0.0812, "step": 24641 }, { "epoch": 2.9220917822838848, "grad_norm": 0.8579335618270972, "learning_rate": 8.9323529418444e-06, "loss": 0.1245, "step": 24642 }, { "epoch": 2.92221036404601, "grad_norm": 1.0854301179500851, "learning_rate": 8.930514068007529e-06, "loss": 0.1414, "step": 24643 }, { "epoch": 2.9223289458081347, "grad_norm": 0.5006394364394183, "learning_rate": 8.928675342311625e-06, "loss": 0.0643, "step": 24644 }, { "epoch": 2.9224475275702595, "grad_norm": 0.648214586092759, "learning_rate": 8.926836764773613e-06, "loss": 0.0889, "step": 24645 }, { "epoch": 2.9225661093323847, "grad_norm": 0.4485280451013365, "learning_rate": 8.924998335410459e-06, "loss": 0.0571, "step": 24646 }, { "epoch": 2.92268469109451, "grad_norm": 0.7171626571866129, "learning_rate": 8.923160054239108e-06, "loss": 0.1179, "step": 24647 }, { "epoch": 2.9228032728566347, "grad_norm": 0.8261920462896355, "learning_rate": 8.921321921276504e-06, "loss": 0.1277, "step": 24648 }, { "epoch": 2.9229218546187594, "grad_norm": 0.9247288030825495, "learning_rate": 8.919483936539608e-06, "loss": 0.1039, "step": 24649 }, { "epoch": 2.9230404363808846, "grad_norm": 0.5691949632843076, "learning_rate": 8.91764610004533e-06, "loss": 0.0651, "step": 24650 }, { "epoch": 2.92315901814301, "grad_norm": 0.4807495894415854, "learning_rate": 8.91580841181065e-06, "loss": 0.057, "step": 24651 }, { "epoch": 2.9232775999051346, "grad_norm": 0.5744179337683247, "learning_rate": 8.913970871852487e-06, "loss": 0.0938, "step": 24652 }, { "epoch": 2.9233961816672593, "grad_norm": 0.9250710475703056, "learning_rate": 8.912133480187785e-06, "loss": 0.1025, "step": 24653 }, { "epoch": 2.9235147634293845, "grad_norm": 0.6167917133282643, "learning_rate": 8.910296236833484e-06, "loss": 0.0955, "step": 24654 }, { "epoch": 2.9236333451915097, "grad_norm": 0.7048539954028515, "learning_rate": 8.90845914180653e-06, "loss": 0.0875, "step": 24655 }, { "epoch": 2.9237519269536345, "grad_norm": 0.7231351514781331, "learning_rate": 8.906622195123842e-06, "loss": 0.1038, "step": 24656 }, { "epoch": 2.9238705087157593, "grad_norm": 0.6390732490459617, "learning_rate": 8.904785396802363e-06, "loss": 0.0744, "step": 24657 }, { "epoch": 2.9239890904778845, "grad_norm": 0.6566273075315932, "learning_rate": 8.902948746859025e-06, "loss": 0.0814, "step": 24658 }, { "epoch": 2.9241076722400097, "grad_norm": 0.5699865364524855, "learning_rate": 8.901112245310758e-06, "loss": 0.0561, "step": 24659 }, { "epoch": 2.9242262540021344, "grad_norm": 0.5475287301645801, "learning_rate": 8.899275892174506e-06, "loss": 0.0545, "step": 24660 }, { "epoch": 2.924344835764259, "grad_norm": 0.5687601253948326, "learning_rate": 8.897439687467165e-06, "loss": 0.0754, "step": 24661 }, { "epoch": 2.9244634175263844, "grad_norm": 0.5485067429610113, "learning_rate": 8.895603631205705e-06, "loss": 0.0735, "step": 24662 }, { "epoch": 2.9245819992885096, "grad_norm": 0.7721686334340689, "learning_rate": 8.89376772340702e-06, "loss": 0.1086, "step": 24663 }, { "epoch": 2.9247005810506344, "grad_norm": 0.7807047624704917, "learning_rate": 8.891931964088054e-06, "loss": 0.1341, "step": 24664 }, { "epoch": 2.9248191628127596, "grad_norm": 0.8467109116724829, "learning_rate": 8.89009635326571e-06, "loss": 0.0929, "step": 24665 }, { "epoch": 2.9249377445748843, "grad_norm": 0.41194673130605264, "learning_rate": 8.888260890956935e-06, "loss": 0.047, "step": 24666 }, { "epoch": 2.9250563263370095, "grad_norm": 0.9455020209947617, "learning_rate": 8.886425577178628e-06, "loss": 0.1286, "step": 24667 }, { "epoch": 2.9251749080991343, "grad_norm": 0.6685832469525708, "learning_rate": 8.884590411947719e-06, "loss": 0.0924, "step": 24668 }, { "epoch": 2.9252934898612595, "grad_norm": 0.7490620127521881, "learning_rate": 8.882755395281123e-06, "loss": 0.0862, "step": 24669 }, { "epoch": 2.9254120716233842, "grad_norm": 0.46771905294981925, "learning_rate": 8.88092052719576e-06, "loss": 0.0633, "step": 24670 }, { "epoch": 2.9255306533855094, "grad_norm": 0.697712878110552, "learning_rate": 8.879085807708552e-06, "loss": 0.0864, "step": 24671 }, { "epoch": 2.925649235147634, "grad_norm": 0.6061346917250952, "learning_rate": 8.877251236836383e-06, "loss": 0.0845, "step": 24672 }, { "epoch": 2.9257678169097594, "grad_norm": 0.5909073209392788, "learning_rate": 8.875416814596207e-06, "loss": 0.0689, "step": 24673 }, { "epoch": 2.925886398671884, "grad_norm": 0.5452377077395657, "learning_rate": 8.873582541004907e-06, "loss": 0.0807, "step": 24674 }, { "epoch": 2.9260049804340094, "grad_norm": 0.7187729996279948, "learning_rate": 8.871748416079406e-06, "loss": 0.1081, "step": 24675 }, { "epoch": 2.926123562196134, "grad_norm": 0.7911401264971634, "learning_rate": 8.869914439836594e-06, "loss": 0.0838, "step": 24676 }, { "epoch": 2.9262421439582593, "grad_norm": 0.4966693046276024, "learning_rate": 8.868080612293403e-06, "loss": 0.0664, "step": 24677 }, { "epoch": 2.926360725720384, "grad_norm": 0.815528489649856, "learning_rate": 8.866246933466721e-06, "loss": 0.0985, "step": 24678 }, { "epoch": 2.9264793074825093, "grad_norm": 1.0855030725468082, "learning_rate": 8.864413403373456e-06, "loss": 0.1283, "step": 24679 }, { "epoch": 2.926597889244634, "grad_norm": 0.5622127189312047, "learning_rate": 8.862580022030515e-06, "loss": 0.0701, "step": 24680 }, { "epoch": 2.9267164710067592, "grad_norm": 0.7776329305306384, "learning_rate": 8.860746789454796e-06, "loss": 0.0958, "step": 24681 }, { "epoch": 2.926835052768884, "grad_norm": 0.5031012913141516, "learning_rate": 8.858913705663207e-06, "loss": 0.0617, "step": 24682 }, { "epoch": 2.926953634531009, "grad_norm": 0.6847242554563422, "learning_rate": 8.857080770672626e-06, "loss": 0.0734, "step": 24683 }, { "epoch": 2.927072216293134, "grad_norm": 0.6169857162620137, "learning_rate": 8.85524798449998e-06, "loss": 0.0784, "step": 24684 }, { "epoch": 2.927190798055259, "grad_norm": 0.42632011916537993, "learning_rate": 8.853415347162142e-06, "loss": 0.0539, "step": 24685 }, { "epoch": 2.927309379817384, "grad_norm": 0.6859775026656922, "learning_rate": 8.851582858676022e-06, "loss": 0.0738, "step": 24686 }, { "epoch": 2.927427961579509, "grad_norm": 0.6105624407216185, "learning_rate": 8.84975051905849e-06, "loss": 0.0816, "step": 24687 }, { "epoch": 2.9275465433416343, "grad_norm": 0.6316565231513708, "learning_rate": 8.847918328326469e-06, "loss": 0.0887, "step": 24688 }, { "epoch": 2.927665125103759, "grad_norm": 0.5190690438104787, "learning_rate": 8.846086286496824e-06, "loss": 0.0738, "step": 24689 }, { "epoch": 2.927783706865884, "grad_norm": 0.5598389706474172, "learning_rate": 8.844254393586456e-06, "loss": 0.0564, "step": 24690 }, { "epoch": 2.927902288628009, "grad_norm": 0.560566394850453, "learning_rate": 8.842422649612253e-06, "loss": 0.072, "step": 24691 }, { "epoch": 2.9280208703901343, "grad_norm": 0.6223906236228309, "learning_rate": 8.840591054591096e-06, "loss": 0.0701, "step": 24692 }, { "epoch": 2.928139452152259, "grad_norm": 0.7250588018316637, "learning_rate": 8.838759608539884e-06, "loss": 0.0817, "step": 24693 }, { "epoch": 2.9282580339143838, "grad_norm": 0.7143226237190697, "learning_rate": 8.836928311475473e-06, "loss": 0.0845, "step": 24694 }, { "epoch": 2.928376615676509, "grad_norm": 0.6380522300206152, "learning_rate": 8.83509716341478e-06, "loss": 0.0812, "step": 24695 }, { "epoch": 2.928495197438634, "grad_norm": 0.4294251711641108, "learning_rate": 8.833266164374657e-06, "loss": 0.0602, "step": 24696 }, { "epoch": 2.928613779200759, "grad_norm": 0.5587718264127084, "learning_rate": 8.831435314371996e-06, "loss": 0.0904, "step": 24697 }, { "epoch": 2.9287323609628837, "grad_norm": 0.9896487192926706, "learning_rate": 8.829604613423678e-06, "loss": 0.0817, "step": 24698 }, { "epoch": 2.928850942725009, "grad_norm": 0.3720439336818933, "learning_rate": 8.827774061546574e-06, "loss": 0.05, "step": 24699 }, { "epoch": 2.928969524487134, "grad_norm": 0.6630687419879981, "learning_rate": 8.825943658757568e-06, "loss": 0.0934, "step": 24700 }, { "epoch": 2.929088106249259, "grad_norm": 0.40713828424587184, "learning_rate": 8.82411340507352e-06, "loss": 0.0566, "step": 24701 }, { "epoch": 2.9292066880113836, "grad_norm": 0.5028429455011845, "learning_rate": 8.822283300511312e-06, "loss": 0.0676, "step": 24702 }, { "epoch": 2.929325269773509, "grad_norm": 0.6802922654237165, "learning_rate": 8.820453345087809e-06, "loss": 0.0901, "step": 24703 }, { "epoch": 2.929443851535634, "grad_norm": 0.6203841947370131, "learning_rate": 8.818623538819898e-06, "loss": 0.073, "step": 24704 }, { "epoch": 2.929562433297759, "grad_norm": 0.8628518246160247, "learning_rate": 8.816793881724424e-06, "loss": 0.0977, "step": 24705 }, { "epoch": 2.9296810150598835, "grad_norm": 0.773125234113313, "learning_rate": 8.814964373818268e-06, "loss": 0.1172, "step": 24706 }, { "epoch": 2.9297995968220087, "grad_norm": 0.6782259658374168, "learning_rate": 8.813135015118292e-06, "loss": 0.0616, "step": 24707 }, { "epoch": 2.929918178584134, "grad_norm": 0.6149852643136238, "learning_rate": 8.811305805641363e-06, "loss": 0.0624, "step": 24708 }, { "epoch": 2.9300367603462587, "grad_norm": 0.5758848304287975, "learning_rate": 8.80947674540434e-06, "loss": 0.0883, "step": 24709 }, { "epoch": 2.9301553421083835, "grad_norm": 0.5535755407820191, "learning_rate": 8.807647834424088e-06, "loss": 0.0676, "step": 24710 }, { "epoch": 2.9302739238705087, "grad_norm": 0.5384370086560454, "learning_rate": 8.805819072717475e-06, "loss": 0.0702, "step": 24711 }, { "epoch": 2.930392505632634, "grad_norm": 1.0593883243901645, "learning_rate": 8.803990460301343e-06, "loss": 0.1308, "step": 24712 }, { "epoch": 2.9305110873947586, "grad_norm": 0.6221016689316223, "learning_rate": 8.80216199719256e-06, "loss": 0.0854, "step": 24713 }, { "epoch": 2.930629669156884, "grad_norm": 0.4723742873395775, "learning_rate": 8.800333683407977e-06, "loss": 0.076, "step": 24714 }, { "epoch": 2.9307482509190086, "grad_norm": 0.8703270975149389, "learning_rate": 8.798505518964465e-06, "loss": 0.1187, "step": 24715 }, { "epoch": 2.930866832681134, "grad_norm": 0.5447046087274953, "learning_rate": 8.79667750387885e-06, "loss": 0.0684, "step": 24716 }, { "epoch": 2.9309854144432586, "grad_norm": 0.5589663524525419, "learning_rate": 8.794849638168004e-06, "loss": 0.0617, "step": 24717 }, { "epoch": 2.9311039962053838, "grad_norm": 0.8464465475164042, "learning_rate": 8.793021921848772e-06, "loss": 0.0978, "step": 24718 }, { "epoch": 2.9312225779675085, "grad_norm": 0.9922520043326963, "learning_rate": 8.791194354937998e-06, "loss": 0.1329, "step": 24719 }, { "epoch": 2.9313411597296337, "grad_norm": 0.8459845493191571, "learning_rate": 8.78936693745255e-06, "loss": 0.0998, "step": 24720 }, { "epoch": 2.9314597414917585, "grad_norm": 0.8323812875289722, "learning_rate": 8.787539669409239e-06, "loss": 0.0964, "step": 24721 }, { "epoch": 2.9315783232538837, "grad_norm": 0.4620373146061505, "learning_rate": 8.78571255082495e-06, "loss": 0.0615, "step": 24722 }, { "epoch": 2.9316969050160084, "grad_norm": 0.717080625708546, "learning_rate": 8.7838855817165e-06, "loss": 0.0979, "step": 24723 }, { "epoch": 2.9318154867781336, "grad_norm": 0.5040615255725238, "learning_rate": 8.782058762100739e-06, "loss": 0.0571, "step": 24724 }, { "epoch": 2.9319340685402584, "grad_norm": 0.6417192795877278, "learning_rate": 8.780232091994509e-06, "loss": 0.0903, "step": 24725 }, { "epoch": 2.9320526503023836, "grad_norm": 0.7360877623724098, "learning_rate": 8.778405571414658e-06, "loss": 0.0772, "step": 24726 }, { "epoch": 2.9321712320645084, "grad_norm": 0.5935160281793074, "learning_rate": 8.776579200378004e-06, "loss": 0.0845, "step": 24727 }, { "epoch": 2.9322898138266336, "grad_norm": 0.5516238640933109, "learning_rate": 8.7747529789014e-06, "loss": 0.0802, "step": 24728 }, { "epoch": 2.9324083955887583, "grad_norm": 0.6396126585044817, "learning_rate": 8.772926907001672e-06, "loss": 0.0606, "step": 24729 }, { "epoch": 2.9325269773508835, "grad_norm": 0.7513802395381658, "learning_rate": 8.771100984695662e-06, "loss": 0.0876, "step": 24730 }, { "epoch": 2.9326455591130083, "grad_norm": 0.7429696194609956, "learning_rate": 8.769275212000208e-06, "loss": 0.1014, "step": 24731 }, { "epoch": 2.9327641408751335, "grad_norm": 0.7586738873501908, "learning_rate": 8.767449588932114e-06, "loss": 0.0968, "step": 24732 }, { "epoch": 2.9328827226372582, "grad_norm": 1.2887219140965822, "learning_rate": 8.765624115508247e-06, "loss": 0.1333, "step": 24733 }, { "epoch": 2.9330013043993834, "grad_norm": 0.8100491864330978, "learning_rate": 8.763798791745411e-06, "loss": 0.1077, "step": 24734 }, { "epoch": 2.933119886161508, "grad_norm": 0.5373006659545942, "learning_rate": 8.761973617660446e-06, "loss": 0.0622, "step": 24735 }, { "epoch": 2.9332384679236334, "grad_norm": 0.5633486661377247, "learning_rate": 8.760148593270156e-06, "loss": 0.07, "step": 24736 }, { "epoch": 2.933357049685758, "grad_norm": 0.7064455730295925, "learning_rate": 8.758323718591399e-06, "loss": 0.1137, "step": 24737 }, { "epoch": 2.9334756314478834, "grad_norm": 0.5849761869605637, "learning_rate": 8.756498993640972e-06, "loss": 0.087, "step": 24738 }, { "epoch": 2.933594213210008, "grad_norm": 0.709219773385666, "learning_rate": 8.754674418435704e-06, "loss": 0.0652, "step": 24739 }, { "epoch": 2.9337127949721333, "grad_norm": 0.783231466224613, "learning_rate": 8.752849992992417e-06, "loss": 0.1143, "step": 24740 }, { "epoch": 2.9338313767342585, "grad_norm": 0.5357709462577418, "learning_rate": 8.751025717327929e-06, "loss": 0.0658, "step": 24741 }, { "epoch": 2.9339499584963833, "grad_norm": 1.1876952322933052, "learning_rate": 8.749201591459069e-06, "loss": 0.1096, "step": 24742 }, { "epoch": 2.934068540258508, "grad_norm": 0.5775612374245657, "learning_rate": 8.747377615402624e-06, "loss": 0.0695, "step": 24743 }, { "epoch": 2.9341871220206333, "grad_norm": 0.7962539643400423, "learning_rate": 8.745553789175443e-06, "loss": 0.1055, "step": 24744 }, { "epoch": 2.9343057037827585, "grad_norm": 0.5370744660576445, "learning_rate": 8.743730112794315e-06, "loss": 0.0602, "step": 24745 }, { "epoch": 2.934424285544883, "grad_norm": 0.5256012041210142, "learning_rate": 8.741906586276068e-06, "loss": 0.073, "step": 24746 }, { "epoch": 2.934542867307008, "grad_norm": 0.6077567697521994, "learning_rate": 8.740083209637493e-06, "loss": 0.0584, "step": 24747 }, { "epoch": 2.934661449069133, "grad_norm": 0.768853600281149, "learning_rate": 8.738259982895425e-06, "loss": 0.1042, "step": 24748 }, { "epoch": 2.9347800308312584, "grad_norm": 0.7186041625939387, "learning_rate": 8.73643690606665e-06, "loss": 0.0858, "step": 24749 }, { "epoch": 2.934898612593383, "grad_norm": 0.5656160271605689, "learning_rate": 8.734613979167985e-06, "loss": 0.0798, "step": 24750 }, { "epoch": 2.935017194355508, "grad_norm": 0.6221128715674417, "learning_rate": 8.732791202216232e-06, "loss": 0.0822, "step": 24751 }, { "epoch": 2.935135776117633, "grad_norm": 0.6447154099687978, "learning_rate": 8.730968575228193e-06, "loss": 0.0839, "step": 24752 }, { "epoch": 2.9352543578797583, "grad_norm": 0.6771676591759492, "learning_rate": 8.729146098220687e-06, "loss": 0.0977, "step": 24753 }, { "epoch": 2.935372939641883, "grad_norm": 0.41102605502643474, "learning_rate": 8.72732377121048e-06, "loss": 0.0514, "step": 24754 }, { "epoch": 2.935491521404008, "grad_norm": 0.5856953485085992, "learning_rate": 8.725501594214413e-06, "loss": 0.0684, "step": 24755 }, { "epoch": 2.935610103166133, "grad_norm": 0.47818693549072966, "learning_rate": 8.723679567249254e-06, "loss": 0.064, "step": 24756 }, { "epoch": 2.9357286849282582, "grad_norm": 1.0708643362904045, "learning_rate": 8.72185769033181e-06, "loss": 0.1324, "step": 24757 }, { "epoch": 2.935847266690383, "grad_norm": 0.5928517530064702, "learning_rate": 8.720035963478878e-06, "loss": 0.0867, "step": 24758 }, { "epoch": 2.9359658484525077, "grad_norm": 0.6483791990100886, "learning_rate": 8.71821438670725e-06, "loss": 0.0915, "step": 24759 }, { "epoch": 2.936084430214633, "grad_norm": 0.8310517485902916, "learning_rate": 8.716392960033729e-06, "loss": 0.0914, "step": 24760 }, { "epoch": 2.936203011976758, "grad_norm": 0.5965338021885986, "learning_rate": 8.714571683475083e-06, "loss": 0.0733, "step": 24761 }, { "epoch": 2.936321593738883, "grad_norm": 0.48540391515062387, "learning_rate": 8.712750557048122e-06, "loss": 0.0746, "step": 24762 }, { "epoch": 2.936440175501008, "grad_norm": 0.9832935264964617, "learning_rate": 8.710929580769625e-06, "loss": 0.1345, "step": 24763 }, { "epoch": 2.936558757263133, "grad_norm": 0.4145882546448261, "learning_rate": 8.709108754656392e-06, "loss": 0.0441, "step": 24764 }, { "epoch": 2.936677339025258, "grad_norm": 0.5593425107200681, "learning_rate": 8.70728807872519e-06, "loss": 0.0793, "step": 24765 }, { "epoch": 2.936795920787383, "grad_norm": 0.6079767405943625, "learning_rate": 8.70546755299281e-06, "loss": 0.0913, "step": 24766 }, { "epoch": 2.936914502549508, "grad_norm": 0.7112365716887155, "learning_rate": 8.70364717747604e-06, "loss": 0.0906, "step": 24767 }, { "epoch": 2.937033084311633, "grad_norm": 0.6082529972350551, "learning_rate": 8.70182695219166e-06, "loss": 0.0866, "step": 24768 }, { "epoch": 2.937151666073758, "grad_norm": 0.8958845124015028, "learning_rate": 8.700006877156447e-06, "loss": 0.1041, "step": 24769 }, { "epoch": 2.9372702478358828, "grad_norm": 0.7080701050598175, "learning_rate": 8.698186952387186e-06, "loss": 0.0963, "step": 24770 }, { "epoch": 2.937388829598008, "grad_norm": 0.5351035425892591, "learning_rate": 8.696367177900654e-06, "loss": 0.0831, "step": 24771 }, { "epoch": 2.9375074113601327, "grad_norm": 0.5778009730022323, "learning_rate": 8.69454755371362e-06, "loss": 0.0794, "step": 24772 }, { "epoch": 2.937625993122258, "grad_norm": 0.8391491650282211, "learning_rate": 8.69272807984286e-06, "loss": 0.089, "step": 24773 }, { "epoch": 2.9377445748843827, "grad_norm": 0.6850317050622876, "learning_rate": 8.69090875630515e-06, "loss": 0.0943, "step": 24774 }, { "epoch": 2.937863156646508, "grad_norm": 0.7564830617307748, "learning_rate": 8.689089583117269e-06, "loss": 0.0919, "step": 24775 }, { "epoch": 2.9379817384086326, "grad_norm": 0.5077279713063442, "learning_rate": 8.687270560295974e-06, "loss": 0.0539, "step": 24776 }, { "epoch": 2.938100320170758, "grad_norm": 0.528191085574374, "learning_rate": 8.68545168785804e-06, "loss": 0.0772, "step": 24777 }, { "epoch": 2.9382189019328826, "grad_norm": 1.131480232924144, "learning_rate": 8.683632965820235e-06, "loss": 0.0941, "step": 24778 }, { "epoch": 2.938337483695008, "grad_norm": 0.5230578114063742, "learning_rate": 8.681814394199325e-06, "loss": 0.0725, "step": 24779 }, { "epoch": 2.9384560654571326, "grad_norm": 0.8413845362559406, "learning_rate": 8.679995973012076e-06, "loss": 0.0939, "step": 24780 }, { "epoch": 2.9385746472192578, "grad_norm": 0.6645354165832876, "learning_rate": 8.67817770227525e-06, "loss": 0.0764, "step": 24781 }, { "epoch": 2.9386932289813825, "grad_norm": 0.6830753717994534, "learning_rate": 8.676359582005622e-06, "loss": 0.0936, "step": 24782 }, { "epoch": 2.9388118107435077, "grad_norm": 0.7895109603097671, "learning_rate": 8.67454161221993e-06, "loss": 0.0856, "step": 24783 }, { "epoch": 2.9389303925056325, "grad_norm": 0.7710936825768432, "learning_rate": 8.672723792934946e-06, "loss": 0.1071, "step": 24784 }, { "epoch": 2.9390489742677577, "grad_norm": 0.5486363583566113, "learning_rate": 8.670906124167427e-06, "loss": 0.0606, "step": 24785 }, { "epoch": 2.9391675560298824, "grad_norm": 0.729783791320936, "learning_rate": 8.669088605934137e-06, "loss": 0.1025, "step": 24786 }, { "epoch": 2.9392861377920076, "grad_norm": 0.695958022958059, "learning_rate": 8.667271238251814e-06, "loss": 0.0877, "step": 24787 }, { "epoch": 2.9394047195541324, "grad_norm": 0.5908772374661878, "learning_rate": 8.665454021137226e-06, "loss": 0.0798, "step": 24788 }, { "epoch": 2.9395233013162576, "grad_norm": 0.7300094154407167, "learning_rate": 8.663636954607119e-06, "loss": 0.103, "step": 24789 }, { "epoch": 2.939641883078383, "grad_norm": 0.40577617231242286, "learning_rate": 8.661820038678245e-06, "loss": 0.0603, "step": 24790 }, { "epoch": 2.9397604648405076, "grad_norm": 0.5689876723912375, "learning_rate": 8.660003273367365e-06, "loss": 0.0622, "step": 24791 }, { "epoch": 2.9398790466026323, "grad_norm": 0.9421611514933969, "learning_rate": 8.6581866586912e-06, "loss": 0.124, "step": 24792 }, { "epoch": 2.9399976283647575, "grad_norm": 0.4122304771566546, "learning_rate": 8.65637019466653e-06, "loss": 0.0429, "step": 24793 }, { "epoch": 2.9401162101268827, "grad_norm": 0.5904227874452045, "learning_rate": 8.654553881310078e-06, "loss": 0.0662, "step": 24794 }, { "epoch": 2.9402347918890075, "grad_norm": 0.476765823747926, "learning_rate": 8.652737718638596e-06, "loss": 0.0706, "step": 24795 }, { "epoch": 2.9403533736511323, "grad_norm": 0.5587955606299259, "learning_rate": 8.650921706668823e-06, "loss": 0.0812, "step": 24796 }, { "epoch": 2.9404719554132575, "grad_norm": 0.695489738978365, "learning_rate": 8.649105845417513e-06, "loss": 0.0965, "step": 24797 }, { "epoch": 2.9405905371753827, "grad_norm": 0.5661081060809391, "learning_rate": 8.64729013490139e-06, "loss": 0.0713, "step": 24798 }, { "epoch": 2.9407091189375074, "grad_norm": 0.7597726475816727, "learning_rate": 8.645474575137197e-06, "loss": 0.1235, "step": 24799 }, { "epoch": 2.940827700699632, "grad_norm": 0.37006713561369814, "learning_rate": 8.643659166141673e-06, "loss": 0.0445, "step": 24800 }, { "epoch": 2.9409462824617574, "grad_norm": 0.66183928496454, "learning_rate": 8.641843907931555e-06, "loss": 0.0802, "step": 24801 }, { "epoch": 2.9410648642238826, "grad_norm": 0.883969700142438, "learning_rate": 8.640028800523583e-06, "loss": 0.1116, "step": 24802 }, { "epoch": 2.9411834459860073, "grad_norm": 0.5527947764433717, "learning_rate": 8.638213843934467e-06, "loss": 0.0796, "step": 24803 }, { "epoch": 2.941302027748132, "grad_norm": 0.8099259027376194, "learning_rate": 8.636399038180975e-06, "loss": 0.1081, "step": 24804 }, { "epoch": 2.9414206095102573, "grad_norm": 0.6464849850683712, "learning_rate": 8.634584383279806e-06, "loss": 0.0832, "step": 24805 }, { "epoch": 2.9415391912723825, "grad_norm": 0.5934857759688033, "learning_rate": 8.6327698792477e-06, "loss": 0.0838, "step": 24806 }, { "epoch": 2.9416577730345073, "grad_norm": 0.633325724755356, "learning_rate": 8.630955526101389e-06, "loss": 0.1012, "step": 24807 }, { "epoch": 2.941776354796632, "grad_norm": 0.5836177236633525, "learning_rate": 8.629141323857593e-06, "loss": 0.0651, "step": 24808 }, { "epoch": 2.9418949365587572, "grad_norm": 0.531091823877163, "learning_rate": 8.627327272533046e-06, "loss": 0.0773, "step": 24809 }, { "epoch": 2.9420135183208824, "grad_norm": 0.8416932371869764, "learning_rate": 8.625513372144447e-06, "loss": 0.1096, "step": 24810 }, { "epoch": 2.942132100083007, "grad_norm": 0.543199966491379, "learning_rate": 8.623699622708554e-06, "loss": 0.065, "step": 24811 }, { "epoch": 2.942250681845132, "grad_norm": 0.4810339741080473, "learning_rate": 8.621886024242057e-06, "loss": 0.0567, "step": 24812 }, { "epoch": 2.942369263607257, "grad_norm": 1.03391179959168, "learning_rate": 8.620072576761697e-06, "loss": 0.0842, "step": 24813 }, { "epoch": 2.9424878453693823, "grad_norm": 0.43167789310052, "learning_rate": 8.618259280284164e-06, "loss": 0.0592, "step": 24814 }, { "epoch": 2.942606427131507, "grad_norm": 0.84607828112058, "learning_rate": 8.616446134826209e-06, "loss": 0.0976, "step": 24815 }, { "epoch": 2.9427250088936323, "grad_norm": 0.5566094102298988, "learning_rate": 8.614633140404521e-06, "loss": 0.0763, "step": 24816 }, { "epoch": 2.942843590655757, "grad_norm": 0.5321887873103681, "learning_rate": 8.612820297035823e-06, "loss": 0.0608, "step": 24817 }, { "epoch": 2.9429621724178823, "grad_norm": 0.7088996926898826, "learning_rate": 8.611007604736827e-06, "loss": 0.0669, "step": 24818 }, { "epoch": 2.943080754180007, "grad_norm": 0.8924546912679007, "learning_rate": 8.609195063524242e-06, "loss": 0.1047, "step": 24819 }, { "epoch": 2.9431993359421322, "grad_norm": 0.47075146627214687, "learning_rate": 8.607382673414787e-06, "loss": 0.0798, "step": 24820 }, { "epoch": 2.943317917704257, "grad_norm": 0.6462048173919328, "learning_rate": 8.605570434425156e-06, "loss": 0.0724, "step": 24821 }, { "epoch": 2.943436499466382, "grad_norm": 0.8569473381666879, "learning_rate": 8.60375834657206e-06, "loss": 0.1308, "step": 24822 }, { "epoch": 2.943555081228507, "grad_norm": 1.0918654396600556, "learning_rate": 8.601946409872205e-06, "loss": 0.0977, "step": 24823 }, { "epoch": 2.943673662990632, "grad_norm": 0.8436090383775363, "learning_rate": 8.600134624342304e-06, "loss": 0.0796, "step": 24824 }, { "epoch": 2.943792244752757, "grad_norm": 0.64977172343791, "learning_rate": 8.598322989999036e-06, "loss": 0.1004, "step": 24825 }, { "epoch": 2.943910826514882, "grad_norm": 0.5252524752845684, "learning_rate": 8.596511506859132e-06, "loss": 0.0629, "step": 24826 }, { "epoch": 2.944029408277007, "grad_norm": 0.5367878055079518, "learning_rate": 8.594700174939269e-06, "loss": 0.0806, "step": 24827 }, { "epoch": 2.944147990039132, "grad_norm": 0.638582383622751, "learning_rate": 8.592888994256151e-06, "loss": 0.0733, "step": 24828 }, { "epoch": 2.944266571801257, "grad_norm": 0.7826894977517451, "learning_rate": 8.591077964826477e-06, "loss": 0.1023, "step": 24829 }, { "epoch": 2.944385153563382, "grad_norm": 0.7345088143916557, "learning_rate": 8.589267086666945e-06, "loss": 0.102, "step": 24830 }, { "epoch": 2.944503735325507, "grad_norm": 0.8711993271787554, "learning_rate": 8.587456359794253e-06, "loss": 0.1183, "step": 24831 }, { "epoch": 2.944622317087632, "grad_norm": 0.45433350623435903, "learning_rate": 8.585645784225077e-06, "loss": 0.0747, "step": 24832 }, { "epoch": 2.9447408988497568, "grad_norm": 0.7533104116620911, "learning_rate": 8.583835359976119e-06, "loss": 0.0795, "step": 24833 }, { "epoch": 2.944859480611882, "grad_norm": 0.6116656994096187, "learning_rate": 8.582025087064069e-06, "loss": 0.0797, "step": 24834 }, { "epoch": 2.9449780623740067, "grad_norm": 0.44621516219507434, "learning_rate": 8.580214965505622e-06, "loss": 0.0592, "step": 24835 }, { "epoch": 2.945096644136132, "grad_norm": 0.5661043462759373, "learning_rate": 8.578404995317452e-06, "loss": 0.0868, "step": 24836 }, { "epoch": 2.9452152258982567, "grad_norm": 0.9192700460723962, "learning_rate": 8.57659517651625e-06, "loss": 0.1147, "step": 24837 }, { "epoch": 2.945333807660382, "grad_norm": 0.7366552196281656, "learning_rate": 8.5747855091187e-06, "loss": 0.104, "step": 24838 }, { "epoch": 2.945452389422507, "grad_norm": 1.1217396287282013, "learning_rate": 8.572975993141483e-06, "loss": 0.1206, "step": 24839 }, { "epoch": 2.945570971184632, "grad_norm": 0.6685521317026742, "learning_rate": 8.571166628601287e-06, "loss": 0.0805, "step": 24840 }, { "epoch": 2.9456895529467566, "grad_norm": 0.7118684838264259, "learning_rate": 8.569357415514786e-06, "loss": 0.1027, "step": 24841 }, { "epoch": 2.945808134708882, "grad_norm": 0.9054536084023223, "learning_rate": 8.567548353898668e-06, "loss": 0.1147, "step": 24842 }, { "epoch": 2.945926716471007, "grad_norm": 0.5126747133043137, "learning_rate": 8.565739443769599e-06, "loss": 0.06, "step": 24843 }, { "epoch": 2.9460452982331318, "grad_norm": 0.48421645178380696, "learning_rate": 8.563930685144258e-06, "loss": 0.0654, "step": 24844 }, { "epoch": 2.9461638799952565, "grad_norm": 0.8550750406716907, "learning_rate": 8.56212207803932e-06, "loss": 0.1133, "step": 24845 }, { "epoch": 2.9462824617573817, "grad_norm": 0.398746977071758, "learning_rate": 8.560313622471466e-06, "loss": 0.0518, "step": 24846 }, { "epoch": 2.946401043519507, "grad_norm": 0.6700064202730145, "learning_rate": 8.558505318457356e-06, "loss": 0.0908, "step": 24847 }, { "epoch": 2.9465196252816317, "grad_norm": 0.5268597144375241, "learning_rate": 8.556697166013664e-06, "loss": 0.0753, "step": 24848 }, { "epoch": 2.9466382070437565, "grad_norm": 0.8061635432075565, "learning_rate": 8.554889165157057e-06, "loss": 0.0977, "step": 24849 }, { "epoch": 2.9467567888058817, "grad_norm": 0.5352903107254531, "learning_rate": 8.553081315904208e-06, "loss": 0.0682, "step": 24850 }, { "epoch": 2.946875370568007, "grad_norm": 0.8506801156107616, "learning_rate": 8.551273618271788e-06, "loss": 0.1117, "step": 24851 }, { "epoch": 2.9469939523301316, "grad_norm": 0.5927901605876238, "learning_rate": 8.549466072276441e-06, "loss": 0.0779, "step": 24852 }, { "epoch": 2.9471125340922564, "grad_norm": 0.7321066979785279, "learning_rate": 8.547658677934855e-06, "loss": 0.0903, "step": 24853 }, { "epoch": 2.9472311158543816, "grad_norm": 0.41059949918497873, "learning_rate": 8.545851435263677e-06, "loss": 0.0528, "step": 24854 }, { "epoch": 2.947349697616507, "grad_norm": 0.7077606295676836, "learning_rate": 8.54404434427957e-06, "loss": 0.088, "step": 24855 }, { "epoch": 2.9474682793786315, "grad_norm": 0.7797914032803686, "learning_rate": 8.542237404999196e-06, "loss": 0.1112, "step": 24856 }, { "epoch": 2.9475868611407563, "grad_norm": 0.5395873961467965, "learning_rate": 8.540430617439208e-06, "loss": 0.0634, "step": 24857 }, { "epoch": 2.9477054429028815, "grad_norm": 0.6185022815217405, "learning_rate": 8.538623981616275e-06, "loss": 0.088, "step": 24858 }, { "epoch": 2.9478240246650067, "grad_norm": 0.6757455335825635, "learning_rate": 8.536817497547029e-06, "loss": 0.0935, "step": 24859 }, { "epoch": 2.9479426064271315, "grad_norm": 0.9048632900337387, "learning_rate": 8.53501116524815e-06, "loss": 0.1021, "step": 24860 }, { "epoch": 2.948061188189256, "grad_norm": 0.9357293604857116, "learning_rate": 8.53320498473627e-06, "loss": 0.1207, "step": 24861 }, { "epoch": 2.9481797699513814, "grad_norm": 0.6373912866305842, "learning_rate": 8.531398956028055e-06, "loss": 0.0909, "step": 24862 }, { "epoch": 2.9482983517135066, "grad_norm": 0.7187456551582418, "learning_rate": 8.529593079140133e-06, "loss": 0.0907, "step": 24863 }, { "epoch": 2.9484169334756314, "grad_norm": 0.7052476654526412, "learning_rate": 8.527787354089179e-06, "loss": 0.0791, "step": 24864 }, { "epoch": 2.9485355152377566, "grad_norm": 0.8560693790522845, "learning_rate": 8.525981780891818e-06, "loss": 0.1299, "step": 24865 }, { "epoch": 2.9486540969998813, "grad_norm": 0.7665981581159069, "learning_rate": 8.524176359564703e-06, "loss": 0.0986, "step": 24866 }, { "epoch": 2.9487726787620065, "grad_norm": 0.6707623420171622, "learning_rate": 8.522371090124479e-06, "loss": 0.0781, "step": 24867 }, { "epoch": 2.9488912605241313, "grad_norm": 0.35517605935763413, "learning_rate": 8.520565972587788e-06, "loss": 0.0459, "step": 24868 }, { "epoch": 2.9490098422862565, "grad_norm": 0.6893995382469758, "learning_rate": 8.518761006971276e-06, "loss": 0.0753, "step": 24869 }, { "epoch": 2.9491284240483813, "grad_norm": 0.6206184171640068, "learning_rate": 8.516956193291565e-06, "loss": 0.0591, "step": 24870 }, { "epoch": 2.9492470058105065, "grad_norm": 0.5875098371817383, "learning_rate": 8.515151531565316e-06, "loss": 0.0862, "step": 24871 }, { "epoch": 2.9493655875726312, "grad_norm": 0.48817298508389634, "learning_rate": 8.51334702180915e-06, "loss": 0.0792, "step": 24872 }, { "epoch": 2.9494841693347564, "grad_norm": 0.5163145161420353, "learning_rate": 8.511542664039715e-06, "loss": 0.0753, "step": 24873 }, { "epoch": 2.949602751096881, "grad_norm": 0.43760987512852734, "learning_rate": 8.50973845827362e-06, "loss": 0.0609, "step": 24874 }, { "epoch": 2.9497213328590064, "grad_norm": 0.6036459884231009, "learning_rate": 8.507934404527531e-06, "loss": 0.0858, "step": 24875 }, { "epoch": 2.949839914621131, "grad_norm": 0.9735970517292327, "learning_rate": 8.506130502818055e-06, "loss": 0.1042, "step": 24876 }, { "epoch": 2.9499584963832564, "grad_norm": 0.7573242904364428, "learning_rate": 8.50432675316183e-06, "loss": 0.1069, "step": 24877 }, { "epoch": 2.950077078145381, "grad_norm": 0.614379295989985, "learning_rate": 8.502523155575485e-06, "loss": 0.0901, "step": 24878 }, { "epoch": 2.9501956599075063, "grad_norm": 0.6069737465775981, "learning_rate": 8.500719710075645e-06, "loss": 0.0791, "step": 24879 }, { "epoch": 2.950314241669631, "grad_norm": 0.7546342542907178, "learning_rate": 8.498916416678942e-06, "loss": 0.1143, "step": 24880 }, { "epoch": 2.9504328234317563, "grad_norm": 0.6171893307501221, "learning_rate": 8.497113275401983e-06, "loss": 0.0706, "step": 24881 }, { "epoch": 2.950551405193881, "grad_norm": 0.5549327958481122, "learning_rate": 8.495310286261416e-06, "loss": 0.0771, "step": 24882 }, { "epoch": 2.9506699869560062, "grad_norm": 0.48456935229215914, "learning_rate": 8.493507449273841e-06, "loss": 0.0723, "step": 24883 }, { "epoch": 2.950788568718131, "grad_norm": 0.6980285587077166, "learning_rate": 8.491704764455893e-06, "loss": 0.0798, "step": 24884 }, { "epoch": 2.950907150480256, "grad_norm": 0.9105776784779269, "learning_rate": 8.489902231824168e-06, "loss": 0.1383, "step": 24885 }, { "epoch": 2.951025732242381, "grad_norm": 0.7045000905132064, "learning_rate": 8.488099851395314e-06, "loss": 0.1152, "step": 24886 }, { "epoch": 2.951144314004506, "grad_norm": 0.44307057672015726, "learning_rate": 8.486297623185926e-06, "loss": 0.063, "step": 24887 }, { "epoch": 2.9512628957666314, "grad_norm": 0.8150608713537312, "learning_rate": 8.48449554721262e-06, "loss": 0.1255, "step": 24888 }, { "epoch": 2.951381477528756, "grad_norm": 0.450778306241808, "learning_rate": 8.482693623492013e-06, "loss": 0.0563, "step": 24889 }, { "epoch": 2.951500059290881, "grad_norm": 0.40262196696530844, "learning_rate": 8.480891852040717e-06, "loss": 0.0562, "step": 24890 }, { "epoch": 2.951618641053006, "grad_norm": 0.5475768222472176, "learning_rate": 8.47909023287535e-06, "loss": 0.0685, "step": 24891 }, { "epoch": 2.9517372228151313, "grad_norm": 0.6165750851533892, "learning_rate": 8.4772887660125e-06, "loss": 0.0706, "step": 24892 }, { "epoch": 2.951855804577256, "grad_norm": 0.6231212714169482, "learning_rate": 8.47548745146879e-06, "loss": 0.0749, "step": 24893 }, { "epoch": 2.951974386339381, "grad_norm": 0.40342469706944756, "learning_rate": 8.473686289260821e-06, "loss": 0.0424, "step": 24894 }, { "epoch": 2.952092968101506, "grad_norm": 0.6520423772733575, "learning_rate": 8.471885279405207e-06, "loss": 0.0765, "step": 24895 }, { "epoch": 2.952211549863631, "grad_norm": 0.6804896637432355, "learning_rate": 8.470084421918522e-06, "loss": 0.0764, "step": 24896 }, { "epoch": 2.952330131625756, "grad_norm": 0.5279512521683731, "learning_rate": 8.46828371681741e-06, "loss": 0.0637, "step": 24897 }, { "epoch": 2.9524487133878807, "grad_norm": 0.6622766670859754, "learning_rate": 8.466483164118439e-06, "loss": 0.0848, "step": 24898 }, { "epoch": 2.952567295150006, "grad_norm": 0.7534715583473958, "learning_rate": 8.46468276383822e-06, "loss": 0.0989, "step": 24899 }, { "epoch": 2.952685876912131, "grad_norm": 0.4014553904105658, "learning_rate": 8.462882515993348e-06, "loss": 0.0545, "step": 24900 }, { "epoch": 2.952804458674256, "grad_norm": 0.41021199532896985, "learning_rate": 8.461082420600419e-06, "loss": 0.0556, "step": 24901 }, { "epoch": 2.9529230404363807, "grad_norm": 0.5031025696954625, "learning_rate": 8.459282477676036e-06, "loss": 0.0648, "step": 24902 }, { "epoch": 2.953041622198506, "grad_norm": 0.5224439758505441, "learning_rate": 8.45748268723678e-06, "loss": 0.0688, "step": 24903 }, { "epoch": 2.953160203960631, "grad_norm": 0.6120297824600056, "learning_rate": 8.455683049299245e-06, "loss": 0.084, "step": 24904 }, { "epoch": 2.953278785722756, "grad_norm": 0.7840301593991882, "learning_rate": 8.453883563880027e-06, "loss": 0.0927, "step": 24905 }, { "epoch": 2.9533973674848806, "grad_norm": 0.664680639161485, "learning_rate": 8.45208423099571e-06, "loss": 0.0858, "step": 24906 }, { "epoch": 2.953515949247006, "grad_norm": 0.4663220041705974, "learning_rate": 8.450285050662896e-06, "loss": 0.0611, "step": 24907 }, { "epoch": 2.953634531009131, "grad_norm": 0.5758439353293299, "learning_rate": 8.448486022898139e-06, "loss": 0.0746, "step": 24908 }, { "epoch": 2.9537531127712557, "grad_norm": 0.6130956577972677, "learning_rate": 8.446687147718063e-06, "loss": 0.0744, "step": 24909 }, { "epoch": 2.9538716945333805, "grad_norm": 0.6976046985249674, "learning_rate": 8.444888425139223e-06, "loss": 0.0789, "step": 24910 }, { "epoch": 2.9539902762955057, "grad_norm": 0.5626273116748953, "learning_rate": 8.44308985517821e-06, "loss": 0.0636, "step": 24911 }, { "epoch": 2.954108858057631, "grad_norm": 0.46582125879776565, "learning_rate": 8.441291437851605e-06, "loss": 0.0575, "step": 24912 }, { "epoch": 2.9542274398197557, "grad_norm": 0.974592152147461, "learning_rate": 8.439493173175996e-06, "loss": 0.1461, "step": 24913 }, { "epoch": 2.954346021581881, "grad_norm": 0.6057572292138327, "learning_rate": 8.437695061167944e-06, "loss": 0.0574, "step": 24914 }, { "epoch": 2.9544646033440056, "grad_norm": 0.7322178267994706, "learning_rate": 8.435897101844035e-06, "loss": 0.0784, "step": 24915 }, { "epoch": 2.954583185106131, "grad_norm": 0.7639605283537837, "learning_rate": 8.43409929522084e-06, "loss": 0.0948, "step": 24916 }, { "epoch": 2.9547017668682556, "grad_norm": 0.6881922942485581, "learning_rate": 8.432301641314935e-06, "loss": 0.0772, "step": 24917 }, { "epoch": 2.954820348630381, "grad_norm": 0.6065526348839341, "learning_rate": 8.430504140142903e-06, "loss": 0.0834, "step": 24918 }, { "epoch": 2.9549389303925055, "grad_norm": 0.5705579041980235, "learning_rate": 8.428706791721283e-06, "loss": 0.0535, "step": 24919 }, { "epoch": 2.9550575121546307, "grad_norm": 0.4436107608015805, "learning_rate": 8.426909596066687e-06, "loss": 0.0661, "step": 24920 }, { "epoch": 2.9551760939167555, "grad_norm": 0.5276316933132184, "learning_rate": 8.425112553195649e-06, "loss": 0.0656, "step": 24921 }, { "epoch": 2.9552946756788807, "grad_norm": 0.7230092524933076, "learning_rate": 8.42331566312476e-06, "loss": 0.0973, "step": 24922 }, { "epoch": 2.9554132574410055, "grad_norm": 0.658192532938016, "learning_rate": 8.421518925870552e-06, "loss": 0.0942, "step": 24923 }, { "epoch": 2.9555318392031307, "grad_norm": 0.7096681203841102, "learning_rate": 8.419722341449628e-06, "loss": 0.0851, "step": 24924 }, { "epoch": 2.9556504209652554, "grad_norm": 0.615011693064617, "learning_rate": 8.417925909878524e-06, "loss": 0.0661, "step": 24925 }, { "epoch": 2.9557690027273806, "grad_norm": 0.9388965619283623, "learning_rate": 8.41612963117381e-06, "loss": 0.0986, "step": 24926 }, { "epoch": 2.9558875844895054, "grad_norm": 0.5065306430750611, "learning_rate": 8.414333505352042e-06, "loss": 0.0695, "step": 24927 }, { "epoch": 2.9560061662516306, "grad_norm": 0.45424222150541815, "learning_rate": 8.412537532429782e-06, "loss": 0.0556, "step": 24928 }, { "epoch": 2.9561247480137554, "grad_norm": 1.1487314347013853, "learning_rate": 8.410741712423596e-06, "loss": 0.1431, "step": 24929 }, { "epoch": 2.9562433297758806, "grad_norm": 0.8276240532007881, "learning_rate": 8.408946045350006e-06, "loss": 0.1235, "step": 24930 }, { "epoch": 2.9563619115380053, "grad_norm": 0.6186766952924879, "learning_rate": 8.407150531225608e-06, "loss": 0.0792, "step": 24931 }, { "epoch": 2.9564804933001305, "grad_norm": 0.6409551939919713, "learning_rate": 8.405355170066926e-06, "loss": 0.0874, "step": 24932 }, { "epoch": 2.9565990750622553, "grad_norm": 0.6432513905771361, "learning_rate": 8.403559961890528e-06, "loss": 0.0705, "step": 24933 }, { "epoch": 2.9567176568243805, "grad_norm": 0.8751046585981382, "learning_rate": 8.401764906712937e-06, "loss": 0.1084, "step": 24934 }, { "epoch": 2.9568362385865052, "grad_norm": 0.8442325381182473, "learning_rate": 8.399970004550736e-06, "loss": 0.1236, "step": 24935 }, { "epoch": 2.9569548203486304, "grad_norm": 1.0964589189566087, "learning_rate": 8.398175255420448e-06, "loss": 0.095, "step": 24936 }, { "epoch": 2.957073402110755, "grad_norm": 0.587066634022615, "learning_rate": 8.396380659338627e-06, "loss": 0.0738, "step": 24937 }, { "epoch": 2.9571919838728804, "grad_norm": 0.8372759296058759, "learning_rate": 8.394586216321814e-06, "loss": 0.0823, "step": 24938 }, { "epoch": 2.957310565635005, "grad_norm": 0.6830286638055436, "learning_rate": 8.392791926386554e-06, "loss": 0.0767, "step": 24939 }, { "epoch": 2.9574291473971304, "grad_norm": 0.6784927743323133, "learning_rate": 8.390997789549395e-06, "loss": 0.0678, "step": 24940 }, { "epoch": 2.9575477291592556, "grad_norm": 0.6682048388701711, "learning_rate": 8.389203805826853e-06, "loss": 0.0833, "step": 24941 }, { "epoch": 2.9576663109213803, "grad_norm": 0.9032408096846829, "learning_rate": 8.3874099752355e-06, "loss": 0.1319, "step": 24942 }, { "epoch": 2.957784892683505, "grad_norm": 0.5744825164087858, "learning_rate": 8.385616297791846e-06, "loss": 0.0753, "step": 24943 }, { "epoch": 2.9579034744456303, "grad_norm": 1.066853948100581, "learning_rate": 8.383822773512442e-06, "loss": 0.1414, "step": 24944 }, { "epoch": 2.9580220562077555, "grad_norm": 0.7057981815665716, "learning_rate": 8.382029402413801e-06, "loss": 0.099, "step": 24945 }, { "epoch": 2.9581406379698802, "grad_norm": 0.5536071748795899, "learning_rate": 8.380236184512489e-06, "loss": 0.0704, "step": 24946 }, { "epoch": 2.958259219732005, "grad_norm": 0.6924956092841382, "learning_rate": 8.378443119825011e-06, "loss": 0.0762, "step": 24947 }, { "epoch": 2.95837780149413, "grad_norm": 0.8730313336863538, "learning_rate": 8.3766502083679e-06, "loss": 0.1284, "step": 24948 }, { "epoch": 2.9584963832562554, "grad_norm": 0.6459489174827514, "learning_rate": 8.374857450157693e-06, "loss": 0.0868, "step": 24949 }, { "epoch": 2.95861496501838, "grad_norm": 0.3820607941439928, "learning_rate": 8.373064845210913e-06, "loss": 0.0404, "step": 24950 }, { "epoch": 2.958733546780505, "grad_norm": 0.4814439350680984, "learning_rate": 8.371272393544093e-06, "loss": 0.0518, "step": 24951 }, { "epoch": 2.95885212854263, "grad_norm": 0.6389993808306158, "learning_rate": 8.369480095173735e-06, "loss": 0.0835, "step": 24952 }, { "epoch": 2.9589707103047553, "grad_norm": 1.1415060595763102, "learning_rate": 8.36768795011639e-06, "loss": 0.1438, "step": 24953 }, { "epoch": 2.95908929206688, "grad_norm": 0.7000315933670301, "learning_rate": 8.365895958388561e-06, "loss": 0.0913, "step": 24954 }, { "epoch": 2.959207873829005, "grad_norm": 0.566761377535573, "learning_rate": 8.36410412000678e-06, "loss": 0.0638, "step": 24955 }, { "epoch": 2.95932645559113, "grad_norm": 0.5980539438693783, "learning_rate": 8.362312434987543e-06, "loss": 0.0839, "step": 24956 }, { "epoch": 2.9594450373532553, "grad_norm": 0.6694450929582195, "learning_rate": 8.3605209033474e-06, "loss": 0.0787, "step": 24957 }, { "epoch": 2.95956361911538, "grad_norm": 0.523309094179477, "learning_rate": 8.35872952510284e-06, "loss": 0.072, "step": 24958 }, { "epoch": 2.9596822008775048, "grad_norm": 0.6343074793551762, "learning_rate": 8.356938300270386e-06, "loss": 0.0862, "step": 24959 }, { "epoch": 2.95980078263963, "grad_norm": 0.5588065474297204, "learning_rate": 8.355147228866555e-06, "loss": 0.0727, "step": 24960 }, { "epoch": 2.959919364401755, "grad_norm": 0.7615503096170427, "learning_rate": 8.353356310907853e-06, "loss": 0.0846, "step": 24961 }, { "epoch": 2.96003794616388, "grad_norm": 0.5055747846184072, "learning_rate": 8.3515655464108e-06, "loss": 0.0539, "step": 24962 }, { "epoch": 2.960156527926005, "grad_norm": 0.6113934538800171, "learning_rate": 8.34977493539189e-06, "loss": 0.0708, "step": 24963 }, { "epoch": 2.96027510968813, "grad_norm": 0.523179979814414, "learning_rate": 8.347984477867637e-06, "loss": 0.0601, "step": 24964 }, { "epoch": 2.960393691450255, "grad_norm": 0.7051673993579949, "learning_rate": 8.34619417385455e-06, "loss": 0.0889, "step": 24965 }, { "epoch": 2.96051227321238, "grad_norm": 0.7551440718466316, "learning_rate": 8.344404023369126e-06, "loss": 0.1023, "step": 24966 }, { "epoch": 2.960630854974505, "grad_norm": 0.6617571194315479, "learning_rate": 8.342614026427876e-06, "loss": 0.0886, "step": 24967 }, { "epoch": 2.96074943673663, "grad_norm": 0.8119947604468143, "learning_rate": 8.340824183047293e-06, "loss": 0.0934, "step": 24968 }, { "epoch": 2.960868018498755, "grad_norm": 0.6300027997815346, "learning_rate": 8.339034493243894e-06, "loss": 0.0715, "step": 24969 }, { "epoch": 2.96098660026088, "grad_norm": 0.5744852813496583, "learning_rate": 8.337244957034157e-06, "loss": 0.0646, "step": 24970 }, { "epoch": 2.961105182023005, "grad_norm": 0.5950266103382224, "learning_rate": 8.335455574434589e-06, "loss": 0.0747, "step": 24971 }, { "epoch": 2.9612237637851297, "grad_norm": 0.7031356589860767, "learning_rate": 8.333666345461686e-06, "loss": 0.0886, "step": 24972 }, { "epoch": 2.961342345547255, "grad_norm": 0.6283254057240496, "learning_rate": 8.331877270131949e-06, "loss": 0.077, "step": 24973 }, { "epoch": 2.9614609273093797, "grad_norm": 0.7839815011265179, "learning_rate": 8.330088348461854e-06, "loss": 0.0947, "step": 24974 }, { "epoch": 2.961579509071505, "grad_norm": 0.556575998804133, "learning_rate": 8.328299580467905e-06, "loss": 0.0733, "step": 24975 }, { "epoch": 2.9616980908336297, "grad_norm": 0.7212489717964854, "learning_rate": 8.326510966166588e-06, "loss": 0.09, "step": 24976 }, { "epoch": 2.961816672595755, "grad_norm": 0.616468115921763, "learning_rate": 8.324722505574392e-06, "loss": 0.0781, "step": 24977 }, { "epoch": 2.9619352543578796, "grad_norm": 0.7464546269954274, "learning_rate": 8.322934198707818e-06, "loss": 0.1033, "step": 24978 }, { "epoch": 2.962053836120005, "grad_norm": 0.7326200635554021, "learning_rate": 8.32114604558332e-06, "loss": 0.1105, "step": 24979 }, { "epoch": 2.9621724178821296, "grad_norm": 0.6174065793461089, "learning_rate": 8.319358046217421e-06, "loss": 0.092, "step": 24980 }, { "epoch": 2.962290999644255, "grad_norm": 0.825138387094861, "learning_rate": 8.317570200626577e-06, "loss": 0.1058, "step": 24981 }, { "epoch": 2.9624095814063796, "grad_norm": 0.76689809894808, "learning_rate": 8.315782508827277e-06, "loss": 0.09, "step": 24982 }, { "epoch": 2.9625281631685048, "grad_norm": 0.6810528099339241, "learning_rate": 8.313994970836007e-06, "loss": 0.1004, "step": 24983 }, { "epoch": 2.9626467449306295, "grad_norm": 0.595290978873251, "learning_rate": 8.312207586669247e-06, "loss": 0.0802, "step": 24984 }, { "epoch": 2.9627653266927547, "grad_norm": 0.6317833699046512, "learning_rate": 8.31042035634346e-06, "loss": 0.0878, "step": 24985 }, { "epoch": 2.9628839084548795, "grad_norm": 0.4705830497135391, "learning_rate": 8.308633279875131e-06, "loss": 0.0753, "step": 24986 }, { "epoch": 2.9630024902170047, "grad_norm": 0.6474715211364932, "learning_rate": 8.306846357280734e-06, "loss": 0.0928, "step": 24987 }, { "epoch": 2.9631210719791294, "grad_norm": 0.5600371833194989, "learning_rate": 8.305059588576746e-06, "loss": 0.0871, "step": 24988 }, { "epoch": 2.9632396537412546, "grad_norm": 0.6482965053701433, "learning_rate": 8.303272973779645e-06, "loss": 0.098, "step": 24989 }, { "epoch": 2.96335823550338, "grad_norm": 0.7784142344313768, "learning_rate": 8.301486512905873e-06, "loss": 0.1271, "step": 24990 }, { "epoch": 2.9634768172655046, "grad_norm": 0.37296140211835693, "learning_rate": 8.299700205971936e-06, "loss": 0.0545, "step": 24991 }, { "epoch": 2.9635953990276294, "grad_norm": 0.7429509722716806, "learning_rate": 8.297914052994275e-06, "loss": 0.0812, "step": 24992 }, { "epoch": 2.9637139807897546, "grad_norm": 0.5310847473421103, "learning_rate": 8.296128053989372e-06, "loss": 0.0757, "step": 24993 }, { "epoch": 2.9638325625518798, "grad_norm": 0.5524271113748292, "learning_rate": 8.294342208973671e-06, "loss": 0.0868, "step": 24994 }, { "epoch": 2.9639511443140045, "grad_norm": 0.6420537004001788, "learning_rate": 8.292556517963661e-06, "loss": 0.0827, "step": 24995 }, { "epoch": 2.9640697260761293, "grad_norm": 0.7446449060933787, "learning_rate": 8.29077098097579e-06, "loss": 0.09, "step": 24996 }, { "epoch": 2.9641883078382545, "grad_norm": 0.620192476012595, "learning_rate": 8.288985598026517e-06, "loss": 0.0907, "step": 24997 }, { "epoch": 2.9643068896003797, "grad_norm": 0.5121201546230418, "learning_rate": 8.287200369132302e-06, "loss": 0.0622, "step": 24998 }, { "epoch": 2.9644254713625044, "grad_norm": 0.5149099330101119, "learning_rate": 8.285415294309608e-06, "loss": 0.0689, "step": 24999 }, { "epoch": 2.964544053124629, "grad_norm": 0.6606328405294786, "learning_rate": 8.283630373574896e-06, "loss": 0.0828, "step": 25000 }, { "epoch": 2.9646626348867544, "grad_norm": 0.7175444026779345, "learning_rate": 8.281845606944596e-06, "loss": 0.0812, "step": 25001 }, { "epoch": 2.9647812166488796, "grad_norm": 0.5784303993634138, "learning_rate": 8.280060994435196e-06, "loss": 0.0859, "step": 25002 }, { "epoch": 2.9648997984110044, "grad_norm": 0.5397351333180957, "learning_rate": 8.278276536063123e-06, "loss": 0.076, "step": 25003 }, { "epoch": 2.965018380173129, "grad_norm": 0.5455669767193746, "learning_rate": 8.27649223184484e-06, "loss": 0.0583, "step": 25004 }, { "epoch": 2.9651369619352543, "grad_norm": 0.7795770411864749, "learning_rate": 8.274708081796775e-06, "loss": 0.1262, "step": 25005 }, { "epoch": 2.9652555436973795, "grad_norm": 0.5786239306539307, "learning_rate": 8.27292408593541e-06, "loss": 0.0755, "step": 25006 }, { "epoch": 2.9653741254595043, "grad_norm": 0.6720398280172609, "learning_rate": 8.271140244277164e-06, "loss": 0.0742, "step": 25007 }, { "epoch": 2.965492707221629, "grad_norm": 0.6616950366586247, "learning_rate": 8.26935655683849e-06, "loss": 0.099, "step": 25008 }, { "epoch": 2.9656112889837543, "grad_norm": 0.5150329522093849, "learning_rate": 8.267573023635832e-06, "loss": 0.0566, "step": 25009 }, { "epoch": 2.9657298707458795, "grad_norm": 0.4846534428684467, "learning_rate": 8.26578964468563e-06, "loss": 0.0587, "step": 25010 }, { "epoch": 2.965848452508004, "grad_norm": 0.6917207924186327, "learning_rate": 8.264006420004338e-06, "loss": 0.076, "step": 25011 }, { "epoch": 2.965967034270129, "grad_norm": 0.586488191059885, "learning_rate": 8.262223349608366e-06, "loss": 0.0671, "step": 25012 }, { "epoch": 2.966085616032254, "grad_norm": 0.8621787675289279, "learning_rate": 8.260440433514189e-06, "loss": 0.1044, "step": 25013 }, { "epoch": 2.9662041977943794, "grad_norm": 1.151435090955448, "learning_rate": 8.258657671738212e-06, "loss": 0.1375, "step": 25014 }, { "epoch": 2.966322779556504, "grad_norm": 0.8155259989003408, "learning_rate": 8.256875064296882e-06, "loss": 0.1142, "step": 25015 }, { "epoch": 2.9664413613186293, "grad_norm": 0.4618325501879452, "learning_rate": 8.255092611206633e-06, "loss": 0.0503, "step": 25016 }, { "epoch": 2.966559943080754, "grad_norm": 0.5129818091889444, "learning_rate": 8.253310312483897e-06, "loss": 0.0682, "step": 25017 }, { "epoch": 2.9666785248428793, "grad_norm": 0.6090363915865067, "learning_rate": 8.251528168145109e-06, "loss": 0.0837, "step": 25018 }, { "epoch": 2.966797106605004, "grad_norm": 0.6858547679689723, "learning_rate": 8.249746178206688e-06, "loss": 0.0814, "step": 25019 }, { "epoch": 2.9669156883671293, "grad_norm": 0.6904840052808182, "learning_rate": 8.247964342685066e-06, "loss": 0.0893, "step": 25020 }, { "epoch": 2.967034270129254, "grad_norm": 0.7925494334207361, "learning_rate": 8.246182661596669e-06, "loss": 0.1103, "step": 25021 }, { "epoch": 2.9671528518913792, "grad_norm": 1.0122343222670969, "learning_rate": 8.244401134957932e-06, "loss": 0.1413, "step": 25022 }, { "epoch": 2.967271433653504, "grad_norm": 0.6578290043626758, "learning_rate": 8.242619762785253e-06, "loss": 0.0793, "step": 25023 }, { "epoch": 2.967390015415629, "grad_norm": 0.5304652616567476, "learning_rate": 8.240838545095087e-06, "loss": 0.0798, "step": 25024 }, { "epoch": 2.967508597177754, "grad_norm": 0.47449869860404453, "learning_rate": 8.239057481903828e-06, "loss": 0.0637, "step": 25025 }, { "epoch": 2.967627178939879, "grad_norm": 0.6700878788742385, "learning_rate": 8.237276573227906e-06, "loss": 0.0843, "step": 25026 }, { "epoch": 2.967745760702004, "grad_norm": 0.682191247952562, "learning_rate": 8.23549581908374e-06, "loss": 0.0883, "step": 25027 }, { "epoch": 2.967864342464129, "grad_norm": 0.5458445562386511, "learning_rate": 8.233715219487744e-06, "loss": 0.0726, "step": 25028 }, { "epoch": 2.967982924226254, "grad_norm": 0.6453179608862155, "learning_rate": 8.23193477445634e-06, "loss": 0.0942, "step": 25029 }, { "epoch": 2.968101505988379, "grad_norm": 0.6551343656216135, "learning_rate": 8.230154484005931e-06, "loss": 0.0751, "step": 25030 }, { "epoch": 2.968220087750504, "grad_norm": 0.6788283655715286, "learning_rate": 8.228374348152928e-06, "loss": 0.0892, "step": 25031 }, { "epoch": 2.968338669512629, "grad_norm": 0.6485794714592112, "learning_rate": 8.226594366913751e-06, "loss": 0.0804, "step": 25032 }, { "epoch": 2.968457251274754, "grad_norm": 0.628419345756267, "learning_rate": 8.22481454030481e-06, "loss": 0.0858, "step": 25033 }, { "epoch": 2.968575833036879, "grad_norm": 0.8395659290745587, "learning_rate": 8.223034868342503e-06, "loss": 0.1034, "step": 25034 }, { "epoch": 2.9686944147990038, "grad_norm": 0.4864948025161855, "learning_rate": 8.22125535104324e-06, "loss": 0.0577, "step": 25035 }, { "epoch": 2.968812996561129, "grad_norm": 0.31370958234907537, "learning_rate": 8.219475988423425e-06, "loss": 0.044, "step": 25036 }, { "epoch": 2.9689315783232537, "grad_norm": 0.5497591518060113, "learning_rate": 8.217696780499465e-06, "loss": 0.0711, "step": 25037 }, { "epoch": 2.969050160085379, "grad_norm": 0.7402247613853963, "learning_rate": 8.21591772728776e-06, "loss": 0.0824, "step": 25038 }, { "epoch": 2.969168741847504, "grad_norm": 0.8431266282939593, "learning_rate": 8.214138828804712e-06, "loss": 0.0946, "step": 25039 }, { "epoch": 2.969287323609629, "grad_norm": 0.43937806776982236, "learning_rate": 8.212360085066728e-06, "loss": 0.0594, "step": 25040 }, { "epoch": 2.9694059053717536, "grad_norm": 0.6895547736020192, "learning_rate": 8.210581496090192e-06, "loss": 0.0926, "step": 25041 }, { "epoch": 2.969524487133879, "grad_norm": 0.8912721197965394, "learning_rate": 8.208803061891505e-06, "loss": 0.0992, "step": 25042 }, { "epoch": 2.969643068896004, "grad_norm": 0.5135453498500496, "learning_rate": 8.20702478248706e-06, "loss": 0.057, "step": 25043 }, { "epoch": 2.969761650658129, "grad_norm": 1.037444693549099, "learning_rate": 8.205246657893265e-06, "loss": 0.1179, "step": 25044 }, { "epoch": 2.9698802324202536, "grad_norm": 0.9024078168717531, "learning_rate": 8.203468688126493e-06, "loss": 0.0812, "step": 25045 }, { "epoch": 2.9699988141823788, "grad_norm": 0.5554014741807811, "learning_rate": 8.201690873203139e-06, "loss": 0.0688, "step": 25046 }, { "epoch": 2.970117395944504, "grad_norm": 0.6714481478236857, "learning_rate": 8.199913213139598e-06, "loss": 0.0921, "step": 25047 }, { "epoch": 2.9702359777066287, "grad_norm": 0.588650698050504, "learning_rate": 8.198135707952256e-06, "loss": 0.0811, "step": 25048 }, { "epoch": 2.9703545594687535, "grad_norm": 0.6701419682103539, "learning_rate": 8.196358357657506e-06, "loss": 0.0934, "step": 25049 }, { "epoch": 2.9704731412308787, "grad_norm": 0.8381190804391415, "learning_rate": 8.194581162271708e-06, "loss": 0.0985, "step": 25050 }, { "epoch": 2.970591722993004, "grad_norm": 0.8113337098540379, "learning_rate": 8.192804121811285e-06, "loss": 0.0866, "step": 25051 }, { "epoch": 2.9707103047551287, "grad_norm": 0.6823460880288441, "learning_rate": 8.191027236292584e-06, "loss": 0.0913, "step": 25052 }, { "epoch": 2.9708288865172534, "grad_norm": 0.7223242128255606, "learning_rate": 8.189250505732002e-06, "loss": 0.0822, "step": 25053 }, { "epoch": 2.9709474682793786, "grad_norm": 0.5341774446927711, "learning_rate": 8.187473930145914e-06, "loss": 0.0695, "step": 25054 }, { "epoch": 2.971066050041504, "grad_norm": 0.7374500866007058, "learning_rate": 8.18569750955071e-06, "loss": 0.0835, "step": 25055 }, { "epoch": 2.9711846318036286, "grad_norm": 0.5536378343154398, "learning_rate": 8.183921243962747e-06, "loss": 0.0516, "step": 25056 }, { "epoch": 2.9713032135657533, "grad_norm": 0.6965738086151192, "learning_rate": 8.182145133398408e-06, "loss": 0.092, "step": 25057 }, { "epoch": 2.9714217953278785, "grad_norm": 1.1716439432172778, "learning_rate": 8.18036917787407e-06, "loss": 0.1129, "step": 25058 }, { "epoch": 2.9715403770900037, "grad_norm": 0.45483086163496295, "learning_rate": 8.1785933774061e-06, "loss": 0.0525, "step": 25059 }, { "epoch": 2.9716589588521285, "grad_norm": 0.5092475800589117, "learning_rate": 8.17681773201088e-06, "loss": 0.0704, "step": 25060 }, { "epoch": 2.9717775406142533, "grad_norm": 0.6717739268805931, "learning_rate": 8.175042241704753e-06, "loss": 0.0915, "step": 25061 }, { "epoch": 2.9718961223763785, "grad_norm": 0.6178427753006543, "learning_rate": 8.173266906504124e-06, "loss": 0.0829, "step": 25062 }, { "epoch": 2.9720147041385037, "grad_norm": 0.7315092287547226, "learning_rate": 8.171491726425329e-06, "loss": 0.0759, "step": 25063 }, { "epoch": 2.9721332859006284, "grad_norm": 0.5244750610410481, "learning_rate": 8.169716701484744e-06, "loss": 0.0589, "step": 25064 }, { "epoch": 2.9722518676627536, "grad_norm": 0.5256088894048527, "learning_rate": 8.167941831698733e-06, "loss": 0.073, "step": 25065 }, { "epoch": 2.9723704494248784, "grad_norm": 0.5570578480607858, "learning_rate": 8.166167117083656e-06, "loss": 0.0704, "step": 25066 }, { "epoch": 2.9724890311870036, "grad_norm": 0.7344410582613705, "learning_rate": 8.164392557655887e-06, "loss": 0.0919, "step": 25067 }, { "epoch": 2.9726076129491283, "grad_norm": 0.6438672764973817, "learning_rate": 8.162618153431753e-06, "loss": 0.0884, "step": 25068 }, { "epoch": 2.9727261947112535, "grad_norm": 0.6042705331948534, "learning_rate": 8.160843904427652e-06, "loss": 0.0724, "step": 25069 }, { "epoch": 2.9728447764733783, "grad_norm": 0.5890480272489989, "learning_rate": 8.159069810659909e-06, "loss": 0.0828, "step": 25070 }, { "epoch": 2.9729633582355035, "grad_norm": 0.4379957252162962, "learning_rate": 8.157295872144901e-06, "loss": 0.0599, "step": 25071 }, { "epoch": 2.9730819399976283, "grad_norm": 0.4819715993467981, "learning_rate": 8.155522088898954e-06, "loss": 0.0627, "step": 25072 }, { "epoch": 2.9732005217597535, "grad_norm": 0.537008111018313, "learning_rate": 8.153748460938454e-06, "loss": 0.0798, "step": 25073 }, { "epoch": 2.9733191035218782, "grad_norm": 1.0088238905509268, "learning_rate": 8.151974988279728e-06, "loss": 0.1404, "step": 25074 }, { "epoch": 2.9734376852840034, "grad_norm": 1.0400447928137853, "learning_rate": 8.15020167093913e-06, "loss": 0.0955, "step": 25075 }, { "epoch": 2.973556267046128, "grad_norm": 0.9964000230263935, "learning_rate": 8.148428508933012e-06, "loss": 0.1431, "step": 25076 }, { "epoch": 2.9736748488082534, "grad_norm": 0.7717813645370776, "learning_rate": 8.146655502277717e-06, "loss": 0.1109, "step": 25077 }, { "epoch": 2.973793430570378, "grad_norm": 0.9096200466330429, "learning_rate": 8.144882650989599e-06, "loss": 0.0935, "step": 25078 }, { "epoch": 2.9739120123325034, "grad_norm": 0.6179439713576736, "learning_rate": 8.143109955084988e-06, "loss": 0.0716, "step": 25079 }, { "epoch": 2.974030594094628, "grad_norm": 0.6080656266707402, "learning_rate": 8.141337414580231e-06, "loss": 0.0588, "step": 25080 }, { "epoch": 2.9741491758567533, "grad_norm": 0.8512911391406075, "learning_rate": 8.13956502949167e-06, "loss": 0.1092, "step": 25081 }, { "epoch": 2.974267757618878, "grad_norm": 0.7962475539373741, "learning_rate": 8.13779279983565e-06, "loss": 0.1053, "step": 25082 }, { "epoch": 2.9743863393810033, "grad_norm": 0.8183231892099131, "learning_rate": 8.136020725628487e-06, "loss": 0.0845, "step": 25083 }, { "epoch": 2.974504921143128, "grad_norm": 0.626896227545927, "learning_rate": 8.134248806886549e-06, "loss": 0.0738, "step": 25084 }, { "epoch": 2.9746235029052532, "grad_norm": 0.7393767906963233, "learning_rate": 8.132477043626147e-06, "loss": 0.1124, "step": 25085 }, { "epoch": 2.974742084667378, "grad_norm": 0.48056985660529977, "learning_rate": 8.130705435863622e-06, "loss": 0.0503, "step": 25086 }, { "epoch": 2.974860666429503, "grad_norm": 0.5775181147132066, "learning_rate": 8.128933983615309e-06, "loss": 0.0785, "step": 25087 }, { "epoch": 2.9749792481916284, "grad_norm": 0.7809476031598717, "learning_rate": 8.127162686897533e-06, "loss": 0.0883, "step": 25088 }, { "epoch": 2.975097829953753, "grad_norm": 0.8324888258418185, "learning_rate": 8.125391545726635e-06, "loss": 0.1128, "step": 25089 }, { "epoch": 2.975216411715878, "grad_norm": 0.487183895950958, "learning_rate": 8.123620560118928e-06, "loss": 0.0745, "step": 25090 }, { "epoch": 2.975334993478003, "grad_norm": 0.5206158406887362, "learning_rate": 8.121849730090741e-06, "loss": 0.0805, "step": 25091 }, { "epoch": 2.9754535752401283, "grad_norm": 0.6666742379371215, "learning_rate": 8.120079055658402e-06, "loss": 0.0664, "step": 25092 }, { "epoch": 2.975572157002253, "grad_norm": 0.8207468404070949, "learning_rate": 8.118308536838245e-06, "loss": 0.0803, "step": 25093 }, { "epoch": 2.975690738764378, "grad_norm": 0.7380469103614871, "learning_rate": 8.116538173646574e-06, "loss": 0.0754, "step": 25094 }, { "epoch": 2.975809320526503, "grad_norm": 0.7258183859010993, "learning_rate": 8.114767966099715e-06, "loss": 0.0897, "step": 25095 }, { "epoch": 2.9759279022886282, "grad_norm": 0.8576498891058388, "learning_rate": 8.11299791421399e-06, "loss": 0.1104, "step": 25096 }, { "epoch": 2.976046484050753, "grad_norm": 0.5385527161379939, "learning_rate": 8.111228018005718e-06, "loss": 0.0779, "step": 25097 }, { "epoch": 2.9761650658128778, "grad_norm": 0.6299972618197482, "learning_rate": 8.109458277491212e-06, "loss": 0.0679, "step": 25098 }, { "epoch": 2.976283647575003, "grad_norm": 0.6564966006296298, "learning_rate": 8.107688692686786e-06, "loss": 0.0991, "step": 25099 }, { "epoch": 2.976402229337128, "grad_norm": 1.033689108991281, "learning_rate": 8.105919263608766e-06, "loss": 0.1136, "step": 25100 }, { "epoch": 2.976520811099253, "grad_norm": 1.2916528159799927, "learning_rate": 8.104149990273444e-06, "loss": 0.1072, "step": 25101 }, { "epoch": 2.9766393928613777, "grad_norm": 0.49294503347397245, "learning_rate": 8.10238087269714e-06, "loss": 0.0548, "step": 25102 }, { "epoch": 2.976757974623503, "grad_norm": 0.6955175169004751, "learning_rate": 8.100611910896164e-06, "loss": 0.0983, "step": 25103 }, { "epoch": 2.976876556385628, "grad_norm": 0.7190641452825438, "learning_rate": 8.098843104886833e-06, "loss": 0.0832, "step": 25104 }, { "epoch": 2.976995138147753, "grad_norm": 0.5776247333443606, "learning_rate": 8.09707445468543e-06, "loss": 0.0726, "step": 25105 }, { "epoch": 2.9771137199098776, "grad_norm": 0.6973471171558073, "learning_rate": 8.095305960308278e-06, "loss": 0.1049, "step": 25106 }, { "epoch": 2.977232301672003, "grad_norm": 0.5798014429969458, "learning_rate": 8.093537621771671e-06, "loss": 0.0644, "step": 25107 }, { "epoch": 2.977350883434128, "grad_norm": 0.5008700981837714, "learning_rate": 8.091769439091917e-06, "loss": 0.0597, "step": 25108 }, { "epoch": 2.9774694651962528, "grad_norm": 0.6633031513686832, "learning_rate": 8.090001412285315e-06, "loss": 0.087, "step": 25109 }, { "epoch": 2.9775880469583775, "grad_norm": 0.5376947845189397, "learning_rate": 8.088233541368162e-06, "loss": 0.0636, "step": 25110 }, { "epoch": 2.9777066287205027, "grad_norm": 1.2052986175006224, "learning_rate": 8.086465826356765e-06, "loss": 0.1145, "step": 25111 }, { "epoch": 2.977825210482628, "grad_norm": 0.7310029290668759, "learning_rate": 8.084698267267408e-06, "loss": 0.1103, "step": 25112 }, { "epoch": 2.9779437922447527, "grad_norm": 0.538592009460005, "learning_rate": 8.082930864116384e-06, "loss": 0.0693, "step": 25113 }, { "epoch": 2.978062374006878, "grad_norm": 0.536763732153074, "learning_rate": 8.081163616919996e-06, "loss": 0.074, "step": 25114 }, { "epoch": 2.9781809557690027, "grad_norm": 0.482183290540528, "learning_rate": 8.079396525694532e-06, "loss": 0.0472, "step": 25115 }, { "epoch": 2.978299537531128, "grad_norm": 0.4944550722359037, "learning_rate": 8.07762959045629e-06, "loss": 0.0732, "step": 25116 }, { "epoch": 2.9784181192932526, "grad_norm": 0.9080878338910515, "learning_rate": 8.075862811221537e-06, "loss": 0.0944, "step": 25117 }, { "epoch": 2.978536701055378, "grad_norm": 0.893106016034601, "learning_rate": 8.074096188006591e-06, "loss": 0.124, "step": 25118 }, { "epoch": 2.9786552828175026, "grad_norm": 0.7048207009191976, "learning_rate": 8.072329720827711e-06, "loss": 0.0844, "step": 25119 }, { "epoch": 2.978773864579628, "grad_norm": 0.5159845284960578, "learning_rate": 8.070563409701204e-06, "loss": 0.0709, "step": 25120 }, { "epoch": 2.9788924463417525, "grad_norm": 0.9420771758714879, "learning_rate": 8.068797254643327e-06, "loss": 0.1432, "step": 25121 }, { "epoch": 2.9790110281038777, "grad_norm": 0.9578213414064427, "learning_rate": 8.06703125567039e-06, "loss": 0.116, "step": 25122 }, { "epoch": 2.9791296098660025, "grad_norm": 0.6979383618934383, "learning_rate": 8.065265412798654e-06, "loss": 0.0977, "step": 25123 }, { "epoch": 2.9792481916281277, "grad_norm": 0.4313835608468267, "learning_rate": 8.063499726044405e-06, "loss": 0.0673, "step": 25124 }, { "epoch": 2.9793667733902525, "grad_norm": 0.6086859259313473, "learning_rate": 8.061734195423917e-06, "loss": 0.0719, "step": 25125 }, { "epoch": 2.9794853551523777, "grad_norm": 0.5549573498375592, "learning_rate": 8.05996882095347e-06, "loss": 0.0604, "step": 25126 }, { "epoch": 2.9796039369145024, "grad_norm": 0.5883752638881121, "learning_rate": 8.058203602649344e-06, "loss": 0.0802, "step": 25127 }, { "epoch": 2.9797225186766276, "grad_norm": 0.6569459361251455, "learning_rate": 8.056438540527792e-06, "loss": 0.0724, "step": 25128 }, { "epoch": 2.9798411004387524, "grad_norm": 0.49845489120230535, "learning_rate": 8.054673634605114e-06, "loss": 0.0793, "step": 25129 }, { "epoch": 2.9799596822008776, "grad_norm": 0.621818348069744, "learning_rate": 8.052908884897555e-06, "loss": 0.0792, "step": 25130 }, { "epoch": 2.9800782639630023, "grad_norm": 0.6550864695381045, "learning_rate": 8.051144291421406e-06, "loss": 0.0865, "step": 25131 }, { "epoch": 2.9801968457251276, "grad_norm": 0.5804156557730891, "learning_rate": 8.049379854192904e-06, "loss": 0.0634, "step": 25132 }, { "epoch": 2.9803154274872523, "grad_norm": 0.6735563436682396, "learning_rate": 8.047615573228351e-06, "loss": 0.0664, "step": 25133 }, { "epoch": 2.9804340092493775, "grad_norm": 0.6203687160229402, "learning_rate": 8.045851448543986e-06, "loss": 0.0639, "step": 25134 }, { "epoch": 2.9805525910115023, "grad_norm": 0.8118766900975812, "learning_rate": 8.044087480156079e-06, "loss": 0.0817, "step": 25135 }, { "epoch": 2.9806711727736275, "grad_norm": 0.8372651659800823, "learning_rate": 8.042323668080892e-06, "loss": 0.1058, "step": 25136 }, { "epoch": 2.9807897545357527, "grad_norm": 0.8849844320122865, "learning_rate": 8.040560012334688e-06, "loss": 0.1027, "step": 25137 }, { "epoch": 2.9809083362978774, "grad_norm": 0.4896335176102449, "learning_rate": 8.03879651293373e-06, "loss": 0.0555, "step": 25138 }, { "epoch": 2.981026918060002, "grad_norm": 0.6437804681545011, "learning_rate": 8.037033169894253e-06, "loss": 0.0741, "step": 25139 }, { "epoch": 2.9811454998221274, "grad_norm": 0.3382461594304886, "learning_rate": 8.035269983232546e-06, "loss": 0.0481, "step": 25140 }, { "epoch": 2.9812640815842526, "grad_norm": 1.1197995087708081, "learning_rate": 8.03350695296484e-06, "loss": 0.1401, "step": 25141 }, { "epoch": 2.9813826633463774, "grad_norm": 0.6532482121885097, "learning_rate": 8.031744079107397e-06, "loss": 0.08, "step": 25142 }, { "epoch": 2.981501245108502, "grad_norm": 0.9340862915422692, "learning_rate": 8.029981361676456e-06, "loss": 0.1304, "step": 25143 }, { "epoch": 2.9816198268706273, "grad_norm": 0.5424286419443398, "learning_rate": 8.02821880068829e-06, "loss": 0.0758, "step": 25144 }, { "epoch": 2.9817384086327525, "grad_norm": 0.5204117364670016, "learning_rate": 8.026456396159124e-06, "loss": 0.0607, "step": 25145 }, { "epoch": 2.9818569903948773, "grad_norm": 0.8658795909123477, "learning_rate": 8.024694148105217e-06, "loss": 0.101, "step": 25146 }, { "epoch": 2.981975572157002, "grad_norm": 0.8915524426930114, "learning_rate": 8.022932056542815e-06, "loss": 0.1191, "step": 25147 }, { "epoch": 2.9820941539191272, "grad_norm": 0.6900858814950592, "learning_rate": 8.021170121488159e-06, "loss": 0.0851, "step": 25148 }, { "epoch": 2.9822127356812524, "grad_norm": 0.5877577741465561, "learning_rate": 8.019408342957504e-06, "loss": 0.074, "step": 25149 }, { "epoch": 2.982331317443377, "grad_norm": 0.8464906746191311, "learning_rate": 8.01764672096707e-06, "loss": 0.0911, "step": 25150 }, { "epoch": 2.982449899205502, "grad_norm": 0.8797362246964182, "learning_rate": 8.01588525553311e-06, "loss": 0.1213, "step": 25151 }, { "epoch": 2.982568480967627, "grad_norm": 0.7726497814496257, "learning_rate": 8.014123946671862e-06, "loss": 0.1316, "step": 25152 }, { "epoch": 2.9826870627297524, "grad_norm": 0.6008900014386737, "learning_rate": 8.012362794399566e-06, "loss": 0.0838, "step": 25153 }, { "epoch": 2.982805644491877, "grad_norm": 0.5225933285169287, "learning_rate": 8.010601798732439e-06, "loss": 0.0704, "step": 25154 }, { "epoch": 2.982924226254002, "grad_norm": 0.6901202357523094, "learning_rate": 8.008840959686747e-06, "loss": 0.0904, "step": 25155 }, { "epoch": 2.983042808016127, "grad_norm": 0.5416255035636441, "learning_rate": 8.007080277278697e-06, "loss": 0.0845, "step": 25156 }, { "epoch": 2.9831613897782523, "grad_norm": 0.6390979658380151, "learning_rate": 8.005319751524529e-06, "loss": 0.0659, "step": 25157 }, { "epoch": 2.983279971540377, "grad_norm": 0.6161378148701206, "learning_rate": 8.003559382440473e-06, "loss": 0.0776, "step": 25158 }, { "epoch": 2.983398553302502, "grad_norm": 0.6733098218050515, "learning_rate": 8.001799170042756e-06, "loss": 0.0815, "step": 25159 }, { "epoch": 2.983517135064627, "grad_norm": 0.5885467724007992, "learning_rate": 8.000039114347613e-06, "loss": 0.0722, "step": 25160 }, { "epoch": 2.983635716826752, "grad_norm": 0.9101713520277888, "learning_rate": 7.998279215371258e-06, "loss": 0.119, "step": 25161 }, { "epoch": 2.983754298588877, "grad_norm": 0.8201354298683057, "learning_rate": 7.996519473129915e-06, "loss": 0.1159, "step": 25162 }, { "epoch": 2.983872880351002, "grad_norm": 0.9108977375950748, "learning_rate": 7.994759887639816e-06, "loss": 0.111, "step": 25163 }, { "epoch": 2.983991462113127, "grad_norm": 0.6955988415098615, "learning_rate": 7.993000458917175e-06, "loss": 0.1001, "step": 25164 }, { "epoch": 2.984110043875252, "grad_norm": 0.773114947746799, "learning_rate": 7.991241186978221e-06, "loss": 0.0986, "step": 25165 }, { "epoch": 2.984228625637377, "grad_norm": 0.6509424296130524, "learning_rate": 7.989482071839152e-06, "loss": 0.085, "step": 25166 }, { "epoch": 2.984347207399502, "grad_norm": 0.729516847836406, "learning_rate": 7.987723113516216e-06, "loss": 0.074, "step": 25167 }, { "epoch": 2.984465789161627, "grad_norm": 0.6435366613240379, "learning_rate": 7.9859643120256e-06, "loss": 0.0836, "step": 25168 }, { "epoch": 2.984584370923752, "grad_norm": 0.5272618926144214, "learning_rate": 7.984205667383531e-06, "loss": 0.0788, "step": 25169 }, { "epoch": 2.984702952685877, "grad_norm": 0.7230919285159274, "learning_rate": 7.98244717960622e-06, "loss": 0.0786, "step": 25170 }, { "epoch": 2.984821534448002, "grad_norm": 0.6038744499148276, "learning_rate": 7.980688848709886e-06, "loss": 0.0779, "step": 25171 }, { "epoch": 2.984940116210127, "grad_norm": 0.5505003389398774, "learning_rate": 7.978930674710719e-06, "loss": 0.0645, "step": 25172 }, { "epoch": 2.985058697972252, "grad_norm": 0.7174649530263422, "learning_rate": 7.97717265762494e-06, "loss": 0.0978, "step": 25173 }, { "epoch": 2.9851772797343767, "grad_norm": 0.5693701235233923, "learning_rate": 7.975414797468755e-06, "loss": 0.0655, "step": 25174 }, { "epoch": 2.985295861496502, "grad_norm": 0.7920706566572723, "learning_rate": 7.973657094258369e-06, "loss": 0.0809, "step": 25175 }, { "epoch": 2.9854144432586267, "grad_norm": 0.7223894520757056, "learning_rate": 7.971899548009994e-06, "loss": 0.0866, "step": 25176 }, { "epoch": 2.985533025020752, "grad_norm": 0.7473784714485121, "learning_rate": 7.970142158739807e-06, "loss": 0.0977, "step": 25177 }, { "epoch": 2.9856516067828767, "grad_norm": 0.5033799143451254, "learning_rate": 7.968384926464042e-06, "loss": 0.0651, "step": 25178 }, { "epoch": 2.985770188545002, "grad_norm": 0.48212426373074635, "learning_rate": 7.966627851198874e-06, "loss": 0.052, "step": 25179 }, { "epoch": 2.9858887703071266, "grad_norm": 0.5246198623595583, "learning_rate": 7.96487093296052e-06, "loss": 0.0799, "step": 25180 }, { "epoch": 2.986007352069252, "grad_norm": 0.46927330092265485, "learning_rate": 7.963114171765146e-06, "loss": 0.0484, "step": 25181 }, { "epoch": 2.9861259338313766, "grad_norm": 0.7116452570151323, "learning_rate": 7.961357567628986e-06, "loss": 0.0725, "step": 25182 }, { "epoch": 2.986244515593502, "grad_norm": 0.6278910140924329, "learning_rate": 7.959601120568208e-06, "loss": 0.0677, "step": 25183 }, { "epoch": 2.9863630973556266, "grad_norm": 0.5622990782732475, "learning_rate": 7.95784483059901e-06, "loss": 0.0657, "step": 25184 }, { "epoch": 2.9864816791177518, "grad_norm": 0.9172211387297396, "learning_rate": 7.956088697737582e-06, "loss": 0.112, "step": 25185 }, { "epoch": 2.9866002608798765, "grad_norm": 0.5107751513274198, "learning_rate": 7.954332722000119e-06, "loss": 0.0708, "step": 25186 }, { "epoch": 2.9867188426420017, "grad_norm": 1.0070136947658836, "learning_rate": 7.952576903402812e-06, "loss": 0.1242, "step": 25187 }, { "epoch": 2.9868374244041265, "grad_norm": 0.5611844529217258, "learning_rate": 7.950821241961825e-06, "loss": 0.0576, "step": 25188 }, { "epoch": 2.9869560061662517, "grad_norm": 0.7137708571220374, "learning_rate": 7.949065737693376e-06, "loss": 0.1051, "step": 25189 }, { "epoch": 2.987074587928377, "grad_norm": 0.7926122991065726, "learning_rate": 7.947310390613621e-06, "loss": 0.0964, "step": 25190 }, { "epoch": 2.9871931696905016, "grad_norm": 0.5689281747864622, "learning_rate": 7.945555200738764e-06, "loss": 0.0581, "step": 25191 }, { "epoch": 2.9873117514526264, "grad_norm": 0.5423210326666267, "learning_rate": 7.943800168084956e-06, "loss": 0.0672, "step": 25192 }, { "epoch": 2.9874303332147516, "grad_norm": 0.6022002742544891, "learning_rate": 7.942045292668412e-06, "loss": 0.0795, "step": 25193 }, { "epoch": 2.987548914976877, "grad_norm": 0.7056863704625274, "learning_rate": 7.940290574505286e-06, "loss": 0.0796, "step": 25194 }, { "epoch": 2.9876674967390016, "grad_norm": 0.620413613110901, "learning_rate": 7.938536013611759e-06, "loss": 0.0829, "step": 25195 }, { "epoch": 2.9877860785011263, "grad_norm": 0.4261206926600032, "learning_rate": 7.936781610004007e-06, "loss": 0.0451, "step": 25196 }, { "epoch": 2.9879046602632515, "grad_norm": 0.6858231657706492, "learning_rate": 7.935027363698206e-06, "loss": 0.0893, "step": 25197 }, { "epoch": 2.9880232420253767, "grad_norm": 0.4686473568475326, "learning_rate": 7.933273274710534e-06, "loss": 0.0635, "step": 25198 }, { "epoch": 2.9881418237875015, "grad_norm": 0.4551520150346174, "learning_rate": 7.931519343057136e-06, "loss": 0.0636, "step": 25199 }, { "epoch": 2.9882604055496262, "grad_norm": 0.8589423160306479, "learning_rate": 7.929765568754219e-06, "loss": 0.0943, "step": 25200 }, { "epoch": 2.9883789873117514, "grad_norm": 0.9696076472123668, "learning_rate": 7.92801195181792e-06, "loss": 0.1374, "step": 25201 }, { "epoch": 2.9884975690738766, "grad_norm": 0.4102760201324566, "learning_rate": 7.926258492264425e-06, "loss": 0.0454, "step": 25202 }, { "epoch": 2.9886161508360014, "grad_norm": 0.6632497242275227, "learning_rate": 7.924505190109871e-06, "loss": 0.0819, "step": 25203 }, { "epoch": 2.988734732598126, "grad_norm": 0.8474446853171365, "learning_rate": 7.922752045370458e-06, "loss": 0.1036, "step": 25204 }, { "epoch": 2.9888533143602514, "grad_norm": 0.6992645165523089, "learning_rate": 7.92099905806232e-06, "loss": 0.0861, "step": 25205 }, { "epoch": 2.9889718961223766, "grad_norm": 0.7600498511882009, "learning_rate": 7.919246228201626e-06, "loss": 0.1059, "step": 25206 }, { "epoch": 2.9890904778845013, "grad_norm": 0.7897153132120887, "learning_rate": 7.917493555804539e-06, "loss": 0.0901, "step": 25207 }, { "epoch": 2.989209059646626, "grad_norm": 0.9524295993778669, "learning_rate": 7.915741040887212e-06, "loss": 0.1182, "step": 25208 }, { "epoch": 2.9893276414087513, "grad_norm": 1.1557778904548075, "learning_rate": 7.913988683465811e-06, "loss": 0.1113, "step": 25209 }, { "epoch": 2.9894462231708765, "grad_norm": 0.7065524279376227, "learning_rate": 7.912236483556465e-06, "loss": 0.084, "step": 25210 }, { "epoch": 2.9895648049330013, "grad_norm": 0.5736492023046409, "learning_rate": 7.910484441175362e-06, "loss": 0.0585, "step": 25211 }, { "epoch": 2.9896833866951265, "grad_norm": 0.8911983281743301, "learning_rate": 7.908732556338628e-06, "loss": 0.1275, "step": 25212 }, { "epoch": 2.989801968457251, "grad_norm": 0.831958483125377, "learning_rate": 7.906980829062428e-06, "loss": 0.1184, "step": 25213 }, { "epoch": 2.9899205502193764, "grad_norm": 0.819593132040254, "learning_rate": 7.905229259362887e-06, "loss": 0.0827, "step": 25214 }, { "epoch": 2.990039131981501, "grad_norm": 0.6787546838148077, "learning_rate": 7.903477847256185e-06, "loss": 0.0934, "step": 25215 }, { "epoch": 2.9901577137436264, "grad_norm": 0.5760385754448866, "learning_rate": 7.901726592758446e-06, "loss": 0.0837, "step": 25216 }, { "epoch": 2.990276295505751, "grad_norm": 0.5717104254658266, "learning_rate": 7.899975495885819e-06, "loss": 0.0576, "step": 25217 }, { "epoch": 2.9903948772678763, "grad_norm": 0.6747365930417875, "learning_rate": 7.89822455665445e-06, "loss": 0.0681, "step": 25218 }, { "epoch": 2.990513459030001, "grad_norm": 0.8124179686344388, "learning_rate": 7.896473775080476e-06, "loss": 0.1168, "step": 25219 }, { "epoch": 2.9906320407921263, "grad_norm": 0.6586963205968985, "learning_rate": 7.894723151180054e-06, "loss": 0.0659, "step": 25220 }, { "epoch": 2.990750622554251, "grad_norm": 0.598642011208212, "learning_rate": 7.892972684969294e-06, "loss": 0.0749, "step": 25221 }, { "epoch": 2.9908692043163763, "grad_norm": 0.6574014956272434, "learning_rate": 7.891222376464353e-06, "loss": 0.0767, "step": 25222 }, { "epoch": 2.990987786078501, "grad_norm": 0.6631876099562936, "learning_rate": 7.889472225681357e-06, "loss": 0.0822, "step": 25223 }, { "epoch": 2.9911063678406262, "grad_norm": 0.656952448763852, "learning_rate": 7.88772223263645e-06, "loss": 0.0858, "step": 25224 }, { "epoch": 2.991224949602751, "grad_norm": 0.8077323341683675, "learning_rate": 7.885972397345756e-06, "loss": 0.0961, "step": 25225 }, { "epoch": 2.991343531364876, "grad_norm": 0.5285834188856238, "learning_rate": 7.88422271982541e-06, "loss": 0.0707, "step": 25226 }, { "epoch": 2.991462113127001, "grad_norm": 0.7982717677192989, "learning_rate": 7.88247320009155e-06, "loss": 0.1157, "step": 25227 }, { "epoch": 2.991580694889126, "grad_norm": 0.6520966914470229, "learning_rate": 7.88072383816029e-06, "loss": 0.0818, "step": 25228 }, { "epoch": 2.991699276651251, "grad_norm": 0.6322162696009809, "learning_rate": 7.878974634047759e-06, "loss": 0.0757, "step": 25229 }, { "epoch": 2.991817858413376, "grad_norm": 0.4765633115323194, "learning_rate": 7.87722558777009e-06, "loss": 0.0726, "step": 25230 }, { "epoch": 2.991936440175501, "grad_norm": 0.6050242119454192, "learning_rate": 7.875476699343412e-06, "loss": 0.0883, "step": 25231 }, { "epoch": 2.992055021937626, "grad_norm": 0.8694844534459029, "learning_rate": 7.873727968783831e-06, "loss": 0.0859, "step": 25232 }, { "epoch": 2.992173603699751, "grad_norm": 0.521512058807829, "learning_rate": 7.871979396107476e-06, "loss": 0.061, "step": 25233 }, { "epoch": 2.992292185461876, "grad_norm": 0.7138826160336067, "learning_rate": 7.870230981330468e-06, "loss": 0.088, "step": 25234 }, { "epoch": 2.992410767224001, "grad_norm": 0.9295340591340745, "learning_rate": 7.868482724468923e-06, "loss": 0.1061, "step": 25235 }, { "epoch": 2.992529348986126, "grad_norm": 0.5549676758937078, "learning_rate": 7.866734625538966e-06, "loss": 0.0622, "step": 25236 }, { "epoch": 2.9926479307482508, "grad_norm": 0.4951886607993743, "learning_rate": 7.864986684556692e-06, "loss": 0.071, "step": 25237 }, { "epoch": 2.992766512510376, "grad_norm": 0.5509325861015106, "learning_rate": 7.863238901538245e-06, "loss": 0.0777, "step": 25238 }, { "epoch": 2.992885094272501, "grad_norm": 0.6070510956704995, "learning_rate": 7.86149127649971e-06, "loss": 0.068, "step": 25239 }, { "epoch": 2.993003676034626, "grad_norm": 0.5233767665289992, "learning_rate": 7.859743809457212e-06, "loss": 0.0545, "step": 25240 }, { "epoch": 2.9931222577967507, "grad_norm": 0.5705709742330571, "learning_rate": 7.857996500426856e-06, "loss": 0.0699, "step": 25241 }, { "epoch": 2.993240839558876, "grad_norm": 0.7184691218353527, "learning_rate": 7.856249349424757e-06, "loss": 0.1195, "step": 25242 }, { "epoch": 2.993359421321001, "grad_norm": 0.8460818116815699, "learning_rate": 7.85450235646701e-06, "loss": 0.1306, "step": 25243 }, { "epoch": 2.993478003083126, "grad_norm": 0.6231702991214165, "learning_rate": 7.852755521569727e-06, "loss": 0.0799, "step": 25244 }, { "epoch": 2.9935965848452506, "grad_norm": 0.5269989492860602, "learning_rate": 7.851008844749011e-06, "loss": 0.0728, "step": 25245 }, { "epoch": 2.993715166607376, "grad_norm": 0.5950432786858968, "learning_rate": 7.849262326020963e-06, "loss": 0.0715, "step": 25246 }, { "epoch": 2.993833748369501, "grad_norm": 0.7145967738703982, "learning_rate": 7.847515965401692e-06, "loss": 0.0982, "step": 25247 }, { "epoch": 2.9939523301316258, "grad_norm": 0.815379042412025, "learning_rate": 7.845769762907274e-06, "loss": 0.101, "step": 25248 }, { "epoch": 2.9940709118937505, "grad_norm": 0.741114986648285, "learning_rate": 7.844023718553839e-06, "loss": 0.0917, "step": 25249 }, { "epoch": 2.9941894936558757, "grad_norm": 0.690947211112258, "learning_rate": 7.842277832357461e-06, "loss": 0.0886, "step": 25250 }, { "epoch": 2.994308075418001, "grad_norm": 0.8754880215846245, "learning_rate": 7.840532104334247e-06, "loss": 0.1234, "step": 25251 }, { "epoch": 2.9944266571801257, "grad_norm": 0.7607310659269182, "learning_rate": 7.83878653450027e-06, "loss": 0.0824, "step": 25252 }, { "epoch": 2.9945452389422504, "grad_norm": 0.7239485446626567, "learning_rate": 7.837041122871652e-06, "loss": 0.1, "step": 25253 }, { "epoch": 2.9946638207043756, "grad_norm": 1.0981227518580587, "learning_rate": 7.835295869464462e-06, "loss": 0.1331, "step": 25254 }, { "epoch": 2.994782402466501, "grad_norm": 0.8828140819771635, "learning_rate": 7.833550774294793e-06, "loss": 0.1142, "step": 25255 }, { "epoch": 2.9949009842286256, "grad_norm": 0.8095807662199901, "learning_rate": 7.831805837378736e-06, "loss": 0.1085, "step": 25256 }, { "epoch": 2.9950195659907504, "grad_norm": 0.6413380262594447, "learning_rate": 7.830061058732377e-06, "loss": 0.0806, "step": 25257 }, { "epoch": 2.9951381477528756, "grad_norm": 0.5935886533654066, "learning_rate": 7.828316438371805e-06, "loss": 0.0797, "step": 25258 }, { "epoch": 2.9952567295150008, "grad_norm": 0.9397436959122861, "learning_rate": 7.826571976313085e-06, "loss": 0.1404, "step": 25259 }, { "epoch": 2.9953753112771255, "grad_norm": 0.6745401105418741, "learning_rate": 7.824827672572326e-06, "loss": 0.1099, "step": 25260 }, { "epoch": 2.9954938930392503, "grad_norm": 0.6330757498653369, "learning_rate": 7.82308352716559e-06, "loss": 0.0894, "step": 25261 }, { "epoch": 2.9956124748013755, "grad_norm": 0.6003050689912874, "learning_rate": 7.821339540108962e-06, "loss": 0.0808, "step": 25262 }, { "epoch": 2.9957310565635007, "grad_norm": 0.7722834072031718, "learning_rate": 7.819595711418507e-06, "loss": 0.0668, "step": 25263 }, { "epoch": 2.9958496383256255, "grad_norm": 0.5536692675252685, "learning_rate": 7.817852041110324e-06, "loss": 0.0713, "step": 25264 }, { "epoch": 2.9959682200877507, "grad_norm": 0.8908127539836134, "learning_rate": 7.81610852920047e-06, "loss": 0.1049, "step": 25265 }, { "epoch": 2.9960868018498754, "grad_norm": 0.5425158439026335, "learning_rate": 7.814365175705022e-06, "loss": 0.0613, "step": 25266 }, { "epoch": 2.9962053836120006, "grad_norm": 0.7505593778187494, "learning_rate": 7.81262198064005e-06, "loss": 0.0934, "step": 25267 }, { "epoch": 2.9963239653741254, "grad_norm": 0.6088132107322531, "learning_rate": 7.81087894402163e-06, "loss": 0.0727, "step": 25268 }, { "epoch": 2.9964425471362506, "grad_norm": 0.6069165277321645, "learning_rate": 7.809136065865836e-06, "loss": 0.0835, "step": 25269 }, { "epoch": 2.9965611288983753, "grad_norm": 0.5711286678982278, "learning_rate": 7.807393346188706e-06, "loss": 0.0877, "step": 25270 }, { "epoch": 2.9966797106605005, "grad_norm": 1.189273296662366, "learning_rate": 7.805650785006346e-06, "loss": 0.1082, "step": 25271 }, { "epoch": 2.9967982924226253, "grad_norm": 0.4869226958481434, "learning_rate": 7.803908382334792e-06, "loss": 0.0639, "step": 25272 }, { "epoch": 2.9969168741847505, "grad_norm": 1.0287821596449727, "learning_rate": 7.802166138190117e-06, "loss": 0.1257, "step": 25273 }, { "epoch": 2.9970354559468753, "grad_norm": 0.5355696746484441, "learning_rate": 7.800424052588379e-06, "loss": 0.0671, "step": 25274 }, { "epoch": 2.9971540377090005, "grad_norm": 0.5875351473657155, "learning_rate": 7.798682125545642e-06, "loss": 0.0711, "step": 25275 }, { "epoch": 2.997272619471125, "grad_norm": 0.5942348404748906, "learning_rate": 7.796940357077966e-06, "loss": 0.0759, "step": 25276 }, { "epoch": 2.9973912012332504, "grad_norm": 0.4826426034613959, "learning_rate": 7.7951987472014e-06, "loss": 0.0549, "step": 25277 }, { "epoch": 2.997509782995375, "grad_norm": 0.4229056151173487, "learning_rate": 7.793457295932002e-06, "loss": 0.054, "step": 25278 }, { "epoch": 2.9976283647575004, "grad_norm": 0.7746374040415275, "learning_rate": 7.79171600328583e-06, "loss": 0.0824, "step": 25279 }, { "epoch": 2.997746946519625, "grad_norm": 0.5730604645236836, "learning_rate": 7.789974869278938e-06, "loss": 0.0672, "step": 25280 }, { "epoch": 2.9978655282817503, "grad_norm": 0.7358408537569285, "learning_rate": 7.78823389392736e-06, "loss": 0.1132, "step": 25281 }, { "epoch": 2.997984110043875, "grad_norm": 0.4748562226311641, "learning_rate": 7.786493077247174e-06, "loss": 0.058, "step": 25282 }, { "epoch": 2.9981026918060003, "grad_norm": 0.7299297033352722, "learning_rate": 7.784752419254406e-06, "loss": 0.088, "step": 25283 }, { "epoch": 2.998221273568125, "grad_norm": 0.790749497579351, "learning_rate": 7.783011919965108e-06, "loss": 0.1043, "step": 25284 }, { "epoch": 2.9983398553302503, "grad_norm": 0.5135826227828129, "learning_rate": 7.78127157939533e-06, "loss": 0.0872, "step": 25285 }, { "epoch": 2.998458437092375, "grad_norm": 0.4881535141370342, "learning_rate": 7.779531397561113e-06, "loss": 0.054, "step": 25286 }, { "epoch": 2.9985770188545002, "grad_norm": 0.8641967477189936, "learning_rate": 7.777791374478505e-06, "loss": 0.1046, "step": 25287 }, { "epoch": 2.9986956006166254, "grad_norm": 0.39636011073817334, "learning_rate": 7.776051510163534e-06, "loss": 0.0448, "step": 25288 }, { "epoch": 2.99881418237875, "grad_norm": 0.7625473208992781, "learning_rate": 7.774311804632248e-06, "loss": 0.1118, "step": 25289 }, { "epoch": 2.998932764140875, "grad_norm": 0.5668778963932375, "learning_rate": 7.772572257900684e-06, "loss": 0.0687, "step": 25290 }, { "epoch": 2.999051345903, "grad_norm": 0.8637773032695936, "learning_rate": 7.770832869984885e-06, "loss": 0.1227, "step": 25291 }, { "epoch": 2.9991699276651254, "grad_norm": 0.6867367231646458, "learning_rate": 7.76909364090087e-06, "loss": 0.0735, "step": 25292 }, { "epoch": 2.99928850942725, "grad_norm": 0.37403637025196546, "learning_rate": 7.767354570664684e-06, "loss": 0.0518, "step": 25293 }, { "epoch": 2.999407091189375, "grad_norm": 0.6623590080242845, "learning_rate": 7.765615659292355e-06, "loss": 0.0716, "step": 25294 }, { "epoch": 2.9995256729515, "grad_norm": 0.7604372295553489, "learning_rate": 7.763876906799917e-06, "loss": 0.0896, "step": 25295 }, { "epoch": 2.9996442547136253, "grad_norm": 0.6685344707824025, "learning_rate": 7.762138313203396e-06, "loss": 0.0658, "step": 25296 }, { "epoch": 2.99976283647575, "grad_norm": 0.583796315175131, "learning_rate": 7.76039987851882e-06, "loss": 0.0627, "step": 25297 }, { "epoch": 2.999881418237875, "grad_norm": 0.7340032238519414, "learning_rate": 7.758661602762227e-06, "loss": 0.0837, "step": 25298 }, { "epoch": 3.0, "grad_norm": 0.6339409897348451, "learning_rate": 7.756923485949624e-06, "loss": 0.0847, "step": 25299 }, { "epoch": 3.000118581762125, "grad_norm": 0.27818727890102973, "learning_rate": 7.755185528097039e-06, "loss": 0.021, "step": 25300 }, { "epoch": 3.00023716352425, "grad_norm": 0.35217358455840414, "learning_rate": 7.753447729220498e-06, "loss": 0.0347, "step": 25301 }, { "epoch": 3.000355745286375, "grad_norm": 0.27283024111008736, "learning_rate": 7.751710089336028e-06, "loss": 0.0244, "step": 25302 }, { "epoch": 3.0004743270485, "grad_norm": 0.43092502941457056, "learning_rate": 7.749972608459633e-06, "loss": 0.0262, "step": 25303 }, { "epoch": 3.000592908810625, "grad_norm": 0.7553318211907485, "learning_rate": 7.748235286607334e-06, "loss": 0.053, "step": 25304 }, { "epoch": 3.00071149057275, "grad_norm": 0.36197013319953925, "learning_rate": 7.746498123795152e-06, "loss": 0.0305, "step": 25305 }, { "epoch": 3.000830072334875, "grad_norm": 0.3202295039619893, "learning_rate": 7.744761120039098e-06, "loss": 0.034, "step": 25306 }, { "epoch": 3.000948654097, "grad_norm": 0.48235980957807045, "learning_rate": 7.743024275355196e-06, "loss": 0.0558, "step": 25307 }, { "epoch": 3.001067235859125, "grad_norm": 0.3056866212466977, "learning_rate": 7.741287589759433e-06, "loss": 0.0284, "step": 25308 }, { "epoch": 3.00118581762125, "grad_norm": 0.5736939229410981, "learning_rate": 7.73955106326785e-06, "loss": 0.0585, "step": 25309 }, { "epoch": 3.001304399383375, "grad_norm": 0.5367376690072162, "learning_rate": 7.73781469589643e-06, "loss": 0.0422, "step": 25310 }, { "epoch": 3.0014229811454998, "grad_norm": 0.31583627608957515, "learning_rate": 7.736078487661195e-06, "loss": 0.0372, "step": 25311 }, { "epoch": 3.001541562907625, "grad_norm": 0.551794205818285, "learning_rate": 7.73434243857814e-06, "loss": 0.0591, "step": 25312 }, { "epoch": 3.0016601446697497, "grad_norm": 0.38367492631767275, "learning_rate": 7.732606548663287e-06, "loss": 0.0326, "step": 25313 }, { "epoch": 3.001778726431875, "grad_norm": 0.4613275293292097, "learning_rate": 7.730870817932617e-06, "loss": 0.0383, "step": 25314 }, { "epoch": 3.0018973081939997, "grad_norm": 0.5358568882722229, "learning_rate": 7.729135246402145e-06, "loss": 0.0634, "step": 25315 }, { "epoch": 3.002015889956125, "grad_norm": 0.42210067041232824, "learning_rate": 7.727399834087862e-06, "loss": 0.0408, "step": 25316 }, { "epoch": 3.0021344717182497, "grad_norm": 0.3145389464558861, "learning_rate": 7.725664581005773e-06, "loss": 0.0237, "step": 25317 }, { "epoch": 3.002253053480375, "grad_norm": 0.2520551295018644, "learning_rate": 7.723929487171882e-06, "loss": 0.0172, "step": 25318 }, { "epoch": 3.0023716352424996, "grad_norm": 0.37448281255490334, "learning_rate": 7.722194552602161e-06, "loss": 0.0389, "step": 25319 }, { "epoch": 3.002490217004625, "grad_norm": 0.46808233269923394, "learning_rate": 7.720459777312633e-06, "loss": 0.0441, "step": 25320 }, { "epoch": 3.0026087987667496, "grad_norm": 0.34471910179115867, "learning_rate": 7.718725161319271e-06, "loss": 0.0244, "step": 25321 }, { "epoch": 3.002727380528875, "grad_norm": 0.35315788093451483, "learning_rate": 7.71699070463807e-06, "loss": 0.028, "step": 25322 }, { "epoch": 3.0028459622909995, "grad_norm": 0.46816812381872286, "learning_rate": 7.715256407285019e-06, "loss": 0.0428, "step": 25323 }, { "epoch": 3.0029645440531247, "grad_norm": 0.5268345562761996, "learning_rate": 7.713522269276107e-06, "loss": 0.0413, "step": 25324 }, { "epoch": 3.0030831258152495, "grad_norm": 0.36311358433956514, "learning_rate": 7.711788290627333e-06, "loss": 0.036, "step": 25325 }, { "epoch": 3.0032017075773747, "grad_norm": 0.4759122861182023, "learning_rate": 7.710054471354655e-06, "loss": 0.034, "step": 25326 }, { "epoch": 3.0033202893394995, "grad_norm": 0.3904688167882179, "learning_rate": 7.708320811474087e-06, "loss": 0.0339, "step": 25327 }, { "epoch": 3.0034388711016247, "grad_norm": 0.4223150674943986, "learning_rate": 7.706587311001587e-06, "loss": 0.033, "step": 25328 }, { "epoch": 3.0035574528637494, "grad_norm": 0.36237905522696834, "learning_rate": 7.704853969953155e-06, "loss": 0.0348, "step": 25329 }, { "epoch": 3.0036760346258746, "grad_norm": 0.4359669304129758, "learning_rate": 7.703120788344745e-06, "loss": 0.0349, "step": 25330 }, { "epoch": 3.0037946163879994, "grad_norm": 0.3435525988853921, "learning_rate": 7.701387766192367e-06, "loss": 0.0257, "step": 25331 }, { "epoch": 3.0039131981501246, "grad_norm": 0.6073826643054211, "learning_rate": 7.699654903511972e-06, "loss": 0.0459, "step": 25332 }, { "epoch": 3.0040317799122493, "grad_norm": 0.4289672442904183, "learning_rate": 7.697922200319543e-06, "loss": 0.0367, "step": 25333 }, { "epoch": 3.0041503616743745, "grad_norm": 0.8193486560506394, "learning_rate": 7.696189656631056e-06, "loss": 0.0604, "step": 25334 }, { "epoch": 3.0042689434364993, "grad_norm": 0.503661824178371, "learning_rate": 7.694457272462479e-06, "loss": 0.0549, "step": 25335 }, { "epoch": 3.0043875251986245, "grad_norm": 0.5903578466248951, "learning_rate": 7.692725047829794e-06, "loss": 0.0469, "step": 25336 }, { "epoch": 3.0045061069607493, "grad_norm": 0.46457097160583094, "learning_rate": 7.690992982748951e-06, "loss": 0.0306, "step": 25337 }, { "epoch": 3.0046246887228745, "grad_norm": 0.20876976530492458, "learning_rate": 7.68926107723593e-06, "loss": 0.0185, "step": 25338 }, { "epoch": 3.0047432704849992, "grad_norm": 0.4331729389407821, "learning_rate": 7.68752933130669e-06, "loss": 0.0337, "step": 25339 }, { "epoch": 3.0048618522471244, "grad_norm": 0.31807159279801295, "learning_rate": 7.68579774497721e-06, "loss": 0.0265, "step": 25340 }, { "epoch": 3.004980434009249, "grad_norm": 0.42754621474455795, "learning_rate": 7.684066318263425e-06, "loss": 0.0242, "step": 25341 }, { "epoch": 3.0050990157713744, "grad_norm": 0.26186124757045237, "learning_rate": 7.682335051181332e-06, "loss": 0.0193, "step": 25342 }, { "epoch": 3.005217597533499, "grad_norm": 0.5245283633670818, "learning_rate": 7.680603943746866e-06, "loss": 0.0257, "step": 25343 }, { "epoch": 3.0053361792956244, "grad_norm": 0.29416918952890303, "learning_rate": 7.678872995975992e-06, "loss": 0.025, "step": 25344 }, { "epoch": 3.005454761057749, "grad_norm": 0.3433107662747983, "learning_rate": 7.677142207884668e-06, "loss": 0.0256, "step": 25345 }, { "epoch": 3.0055733428198743, "grad_norm": 0.5501222903932953, "learning_rate": 7.67541157948885e-06, "loss": 0.0405, "step": 25346 }, { "epoch": 3.005691924581999, "grad_norm": 0.2768194733225938, "learning_rate": 7.673681110804504e-06, "loss": 0.0231, "step": 25347 }, { "epoch": 3.0058105063441243, "grad_norm": 0.2941069138209204, "learning_rate": 7.671950801847558e-06, "loss": 0.0253, "step": 25348 }, { "epoch": 3.0059290881062495, "grad_norm": 0.6416164977314439, "learning_rate": 7.670220652633981e-06, "loss": 0.0427, "step": 25349 }, { "epoch": 3.0060476698683742, "grad_norm": 0.470782626272427, "learning_rate": 7.668490663179715e-06, "loss": 0.0349, "step": 25350 }, { "epoch": 3.0061662516304994, "grad_norm": 0.5461681129762601, "learning_rate": 7.666760833500722e-06, "loss": 0.0427, "step": 25351 }, { "epoch": 3.006284833392624, "grad_norm": 0.4361119557985114, "learning_rate": 7.665031163612921e-06, "loss": 0.0277, "step": 25352 }, { "epoch": 3.0064034151547494, "grad_norm": 0.4890419971828236, "learning_rate": 7.663301653532293e-06, "loss": 0.0321, "step": 25353 }, { "epoch": 3.006521996916874, "grad_norm": 0.5029343081983291, "learning_rate": 7.661572303274756e-06, "loss": 0.0265, "step": 25354 }, { "epoch": 3.0066405786789994, "grad_norm": 0.44630892821797713, "learning_rate": 7.659843112856258e-06, "loss": 0.0367, "step": 25355 }, { "epoch": 3.006759160441124, "grad_norm": 0.47680585725105595, "learning_rate": 7.658114082292744e-06, "loss": 0.0289, "step": 25356 }, { "epoch": 3.0068777422032493, "grad_norm": 0.3759872146988128, "learning_rate": 7.656385211600154e-06, "loss": 0.0308, "step": 25357 }, { "epoch": 3.006996323965374, "grad_norm": 0.44452476696037296, "learning_rate": 7.65465650079443e-06, "loss": 0.0411, "step": 25358 }, { "epoch": 3.0071149057274993, "grad_norm": 0.4222140817141822, "learning_rate": 7.652927949891495e-06, "loss": 0.0411, "step": 25359 }, { "epoch": 3.007233487489624, "grad_norm": 0.4472191310101325, "learning_rate": 7.651199558907293e-06, "loss": 0.0241, "step": 25360 }, { "epoch": 3.0073520692517492, "grad_norm": 0.5802810185708355, "learning_rate": 7.649471327857754e-06, "loss": 0.0471, "step": 25361 }, { "epoch": 3.007470651013874, "grad_norm": 0.6064955604777108, "learning_rate": 7.647743256758824e-06, "loss": 0.0303, "step": 25362 }, { "epoch": 3.007589232775999, "grad_norm": 0.36335979433933546, "learning_rate": 7.646015345626414e-06, "loss": 0.029, "step": 25363 }, { "epoch": 3.007707814538124, "grad_norm": 0.2531722069430395, "learning_rate": 7.644287594476459e-06, "loss": 0.0155, "step": 25364 }, { "epoch": 3.007826396300249, "grad_norm": 0.46042782393622717, "learning_rate": 7.642560003324892e-06, "loss": 0.0315, "step": 25365 }, { "epoch": 3.007944978062374, "grad_norm": 0.3109669690984699, "learning_rate": 7.640832572187636e-06, "loss": 0.0165, "step": 25366 }, { "epoch": 3.008063559824499, "grad_norm": 0.5457014048185769, "learning_rate": 7.639105301080615e-06, "loss": 0.0299, "step": 25367 }, { "epoch": 3.008182141586624, "grad_norm": 0.3474581289206686, "learning_rate": 7.637378190019757e-06, "loss": 0.0248, "step": 25368 }, { "epoch": 3.008300723348749, "grad_norm": 0.42491363989754444, "learning_rate": 7.635651239020986e-06, "loss": 0.0241, "step": 25369 }, { "epoch": 3.008419305110874, "grad_norm": 0.3587769260082179, "learning_rate": 7.63392444810021e-06, "loss": 0.0286, "step": 25370 }, { "epoch": 3.008537886872999, "grad_norm": 0.5104000025690947, "learning_rate": 7.632197817273354e-06, "loss": 0.0319, "step": 25371 }, { "epoch": 3.008656468635124, "grad_norm": 0.5650411135422209, "learning_rate": 7.630471346556337e-06, "loss": 0.0419, "step": 25372 }, { "epoch": 3.008775050397249, "grad_norm": 0.3713623111155855, "learning_rate": 7.628745035965073e-06, "loss": 0.023, "step": 25373 }, { "epoch": 3.0088936321593738, "grad_norm": 0.5907039796316705, "learning_rate": 7.627018885515489e-06, "loss": 0.0381, "step": 25374 }, { "epoch": 3.009012213921499, "grad_norm": 0.37882622170220515, "learning_rate": 7.625292895223468e-06, "loss": 0.0211, "step": 25375 }, { "epoch": 3.0091307956836237, "grad_norm": 0.39308977428435743, "learning_rate": 7.6235670651049554e-06, "loss": 0.0266, "step": 25376 }, { "epoch": 3.009249377445749, "grad_norm": 0.5816389466022209, "learning_rate": 7.6218413951758414e-06, "loss": 0.0366, "step": 25377 }, { "epoch": 3.0093679592078737, "grad_norm": 0.505493144867611, "learning_rate": 7.6201158854520446e-06, "loss": 0.0353, "step": 25378 }, { "epoch": 3.009486540969999, "grad_norm": 0.43790797403121146, "learning_rate": 7.618390535949452e-06, "loss": 0.0337, "step": 25379 }, { "epoch": 3.0096051227321237, "grad_norm": 0.6844038288853503, "learning_rate": 7.616665346683999e-06, "loss": 0.0408, "step": 25380 }, { "epoch": 3.009723704494249, "grad_norm": 0.5432711097006784, "learning_rate": 7.614940317671568e-06, "loss": 0.0291, "step": 25381 }, { "epoch": 3.0098422862563736, "grad_norm": 0.3267560731312228, "learning_rate": 7.6132154489280656e-06, "loss": 0.0244, "step": 25382 }, { "epoch": 3.009960868018499, "grad_norm": 0.531546862791762, "learning_rate": 7.611490740469398e-06, "loss": 0.0244, "step": 25383 }, { "epoch": 3.0100794497806236, "grad_norm": 0.6619541563963426, "learning_rate": 7.609766192311463e-06, "loss": 0.0385, "step": 25384 }, { "epoch": 3.010198031542749, "grad_norm": 0.519515379490031, "learning_rate": 7.6080418044701646e-06, "loss": 0.033, "step": 25385 }, { "epoch": 3.0103166133048735, "grad_norm": 0.5581110928982194, "learning_rate": 7.606317576961378e-06, "loss": 0.0408, "step": 25386 }, { "epoch": 3.0104351950669987, "grad_norm": 0.5830136000335294, "learning_rate": 7.604593509801031e-06, "loss": 0.0225, "step": 25387 }, { "epoch": 3.0105537768291235, "grad_norm": 0.5932803466550683, "learning_rate": 7.602869603004989e-06, "loss": 0.039, "step": 25388 }, { "epoch": 3.0106723585912487, "grad_norm": 0.3653226217483757, "learning_rate": 7.601145856589168e-06, "loss": 0.0205, "step": 25389 }, { "epoch": 3.0107909403533735, "grad_norm": 0.5335073654554573, "learning_rate": 7.599422270569426e-06, "loss": 0.0439, "step": 25390 }, { "epoch": 3.0109095221154987, "grad_norm": 0.501314571655461, "learning_rate": 7.59769884496169e-06, "loss": 0.0313, "step": 25391 }, { "epoch": 3.0110281038776234, "grad_norm": 0.6311929209988606, "learning_rate": 7.595975579781825e-06, "loss": 0.0293, "step": 25392 }, { "epoch": 3.0111466856397486, "grad_norm": 0.5608401543106221, "learning_rate": 7.594252475045721e-06, "loss": 0.0332, "step": 25393 }, { "epoch": 3.0112652674018734, "grad_norm": 0.7724999945553648, "learning_rate": 7.5925295307692625e-06, "loss": 0.0441, "step": 25394 }, { "epoch": 3.0113838491639986, "grad_norm": 0.3729481528411709, "learning_rate": 7.59080674696834e-06, "loss": 0.0239, "step": 25395 }, { "epoch": 3.0115024309261234, "grad_norm": 0.42519706082795705, "learning_rate": 7.589084123658835e-06, "loss": 0.0244, "step": 25396 }, { "epoch": 3.0116210126882486, "grad_norm": 0.4523489840687546, "learning_rate": 7.587361660856609e-06, "loss": 0.0384, "step": 25397 }, { "epoch": 3.0117395944503733, "grad_norm": 0.5731833973772033, "learning_rate": 7.585639358577573e-06, "loss": 0.0377, "step": 25398 }, { "epoch": 3.0118581762124985, "grad_norm": 0.45622100060080334, "learning_rate": 7.583917216837577e-06, "loss": 0.0236, "step": 25399 }, { "epoch": 3.0119767579746237, "grad_norm": 0.33163745275135703, "learning_rate": 7.5821952356525175e-06, "loss": 0.0151, "step": 25400 }, { "epoch": 3.0120953397367485, "grad_norm": 0.32951217013573225, "learning_rate": 7.580473415038241e-06, "loss": 0.016, "step": 25401 }, { "epoch": 3.0122139214988737, "grad_norm": 0.43511688077262295, "learning_rate": 7.578751755010655e-06, "loss": 0.0307, "step": 25402 }, { "epoch": 3.0123325032609984, "grad_norm": 0.6094388201504408, "learning_rate": 7.577030255585607e-06, "loss": 0.0321, "step": 25403 }, { "epoch": 3.0124510850231236, "grad_norm": 0.34180785539117436, "learning_rate": 7.575308916778973e-06, "loss": 0.0213, "step": 25404 }, { "epoch": 3.0125696667852484, "grad_norm": 0.5030362628932578, "learning_rate": 7.573587738606625e-06, "loss": 0.0348, "step": 25405 }, { "epoch": 3.0126882485473736, "grad_norm": 0.3843773073384051, "learning_rate": 7.571866721084428e-06, "loss": 0.0223, "step": 25406 }, { "epoch": 3.0128068303094984, "grad_norm": 0.3393651335151554, "learning_rate": 7.5701458642282534e-06, "loss": 0.0138, "step": 25407 }, { "epoch": 3.0129254120716236, "grad_norm": 0.4249248547583195, "learning_rate": 7.568425168053955e-06, "loss": 0.0192, "step": 25408 }, { "epoch": 3.0130439938337483, "grad_norm": 0.4269149796282927, "learning_rate": 7.566704632577401e-06, "loss": 0.0291, "step": 25409 }, { "epoch": 3.0131625755958735, "grad_norm": 0.9291809820174342, "learning_rate": 7.5649842578144505e-06, "loss": 0.0408, "step": 25410 }, { "epoch": 3.0132811573579983, "grad_norm": 0.7037282114644994, "learning_rate": 7.563264043780974e-06, "loss": 0.026, "step": 25411 }, { "epoch": 3.0133997391201235, "grad_norm": 0.6498371476007979, "learning_rate": 7.561543990492803e-06, "loss": 0.049, "step": 25412 }, { "epoch": 3.0135183208822482, "grad_norm": 0.5460178927172625, "learning_rate": 7.559824097965829e-06, "loss": 0.0416, "step": 25413 }, { "epoch": 3.0136369026443734, "grad_norm": 0.4557348617868993, "learning_rate": 7.558104366215882e-06, "loss": 0.0349, "step": 25414 }, { "epoch": 3.013755484406498, "grad_norm": 0.5160595846758902, "learning_rate": 7.5563847952588255e-06, "loss": 0.0283, "step": 25415 }, { "epoch": 3.0138740661686234, "grad_norm": 0.6561190285564303, "learning_rate": 7.554665385110507e-06, "loss": 0.0373, "step": 25416 }, { "epoch": 3.013992647930748, "grad_norm": 0.3665816233604149, "learning_rate": 7.552946135786784e-06, "loss": 0.0299, "step": 25417 }, { "epoch": 3.0141112296928734, "grad_norm": 0.5901339208803061, "learning_rate": 7.551227047303511e-06, "loss": 0.0361, "step": 25418 }, { "epoch": 3.014229811454998, "grad_norm": 0.7071965063824553, "learning_rate": 7.549508119676518e-06, "loss": 0.0428, "step": 25419 }, { "epoch": 3.0143483932171233, "grad_norm": 0.5965694740222681, "learning_rate": 7.547789352921661e-06, "loss": 0.0456, "step": 25420 }, { "epoch": 3.014466974979248, "grad_norm": 0.33008458886896436, "learning_rate": 7.546070747054784e-06, "loss": 0.0192, "step": 25421 }, { "epoch": 3.0145855567413733, "grad_norm": 0.6834306122913407, "learning_rate": 7.544352302091731e-06, "loss": 0.0317, "step": 25422 }, { "epoch": 3.014704138503498, "grad_norm": 0.4077458094104322, "learning_rate": 7.542634018048355e-06, "loss": 0.0233, "step": 25423 }, { "epoch": 3.0148227202656233, "grad_norm": 0.2889129402649483, "learning_rate": 7.540915894940467e-06, "loss": 0.014, "step": 25424 }, { "epoch": 3.014941302027748, "grad_norm": 0.4146099568217442, "learning_rate": 7.539197932783942e-06, "loss": 0.0232, "step": 25425 }, { "epoch": 3.015059883789873, "grad_norm": 0.4852870317192876, "learning_rate": 7.537480131594593e-06, "loss": 0.0237, "step": 25426 }, { "epoch": 3.015178465551998, "grad_norm": 0.5029454665275506, "learning_rate": 7.5357624913882624e-06, "loss": 0.0305, "step": 25427 }, { "epoch": 3.015297047314123, "grad_norm": 0.46594863195527386, "learning_rate": 7.534045012180785e-06, "loss": 0.0298, "step": 25428 }, { "epoch": 3.015415629076248, "grad_norm": 0.627448694880681, "learning_rate": 7.532327693988003e-06, "loss": 0.0268, "step": 25429 }, { "epoch": 3.015534210838373, "grad_norm": 0.4082427994379912, "learning_rate": 7.530610536825733e-06, "loss": 0.0246, "step": 25430 }, { "epoch": 3.015652792600498, "grad_norm": 0.5804619848273482, "learning_rate": 7.528893540709808e-06, "loss": 0.0336, "step": 25431 }, { "epoch": 3.015771374362623, "grad_norm": 0.38520930168703293, "learning_rate": 7.527176705656064e-06, "loss": 0.0178, "step": 25432 }, { "epoch": 3.015889956124748, "grad_norm": 0.46814477530008647, "learning_rate": 7.5254600316803205e-06, "loss": 0.0244, "step": 25433 }, { "epoch": 3.016008537886873, "grad_norm": 0.647671379254069, "learning_rate": 7.523743518798418e-06, "loss": 0.0317, "step": 25434 }, { "epoch": 3.016127119648998, "grad_norm": 0.42928796524742885, "learning_rate": 7.52202716702615e-06, "loss": 0.0235, "step": 25435 }, { "epoch": 3.016245701411123, "grad_norm": 0.8138572936687117, "learning_rate": 7.520310976379377e-06, "loss": 0.0365, "step": 25436 }, { "epoch": 3.016364283173248, "grad_norm": 0.5580902746399836, "learning_rate": 7.5185949468738945e-06, "loss": 0.0351, "step": 25437 }, { "epoch": 3.016482864935373, "grad_norm": 0.44280617287973545, "learning_rate": 7.516879078525527e-06, "loss": 0.0262, "step": 25438 }, { "epoch": 3.0166014466974977, "grad_norm": 0.723328104005943, "learning_rate": 7.5151633713500955e-06, "loss": 0.0423, "step": 25439 }, { "epoch": 3.016720028459623, "grad_norm": 0.3132665552190127, "learning_rate": 7.513447825363426e-06, "loss": 0.0177, "step": 25440 }, { "epoch": 3.0168386102217477, "grad_norm": 0.44284573465599325, "learning_rate": 7.511732440581315e-06, "loss": 0.025, "step": 25441 }, { "epoch": 3.016957191983873, "grad_norm": 0.5274997634394825, "learning_rate": 7.510017217019585e-06, "loss": 0.0354, "step": 25442 }, { "epoch": 3.0170757737459977, "grad_norm": 0.4956010103901101, "learning_rate": 7.508302154694049e-06, "loss": 0.0223, "step": 25443 }, { "epoch": 3.017194355508123, "grad_norm": 0.3588426228123138, "learning_rate": 7.50658725362052e-06, "loss": 0.0232, "step": 25444 }, { "epoch": 3.0173129372702476, "grad_norm": 0.5731811465014716, "learning_rate": 7.504872513814809e-06, "loss": 0.0364, "step": 25445 }, { "epoch": 3.017431519032373, "grad_norm": 0.5455588406732829, "learning_rate": 7.503157935292707e-06, "loss": 0.0185, "step": 25446 }, { "epoch": 3.0175501007944976, "grad_norm": 0.5028066201597671, "learning_rate": 7.501443518070045e-06, "loss": 0.0267, "step": 25447 }, { "epoch": 3.017668682556623, "grad_norm": 0.3065145814931162, "learning_rate": 7.499729262162611e-06, "loss": 0.0114, "step": 25448 }, { "epoch": 3.0177872643187476, "grad_norm": 0.5697332413740279, "learning_rate": 7.498015167586217e-06, "loss": 0.0267, "step": 25449 }, { "epoch": 3.0179058460808728, "grad_norm": 0.4971967592931499, "learning_rate": 7.496301234356648e-06, "loss": 0.0249, "step": 25450 }, { "epoch": 3.018024427842998, "grad_norm": 0.5962900140790216, "learning_rate": 7.494587462489733e-06, "loss": 0.0311, "step": 25451 }, { "epoch": 3.0181430096051227, "grad_norm": 0.5537780303139198, "learning_rate": 7.492873852001245e-06, "loss": 0.0335, "step": 25452 }, { "epoch": 3.018261591367248, "grad_norm": 0.4272505611817636, "learning_rate": 7.491160402906994e-06, "loss": 0.031, "step": 25453 }, { "epoch": 3.0183801731293727, "grad_norm": 0.39656564031427666, "learning_rate": 7.489447115222773e-06, "loss": 0.0268, "step": 25454 }, { "epoch": 3.018498754891498, "grad_norm": 0.3603879682030226, "learning_rate": 7.487733988964374e-06, "loss": 0.0216, "step": 25455 }, { "epoch": 3.0186173366536226, "grad_norm": 0.6655804326461775, "learning_rate": 7.486021024147602e-06, "loss": 0.0282, "step": 25456 }, { "epoch": 3.018735918415748, "grad_norm": 0.2865516950031116, "learning_rate": 7.4843082207882245e-06, "loss": 0.0133, "step": 25457 }, { "epoch": 3.0188545001778726, "grad_norm": 0.5542351168382047, "learning_rate": 7.4825955789020604e-06, "loss": 0.0265, "step": 25458 }, { "epoch": 3.018973081939998, "grad_norm": 0.5509018829834003, "learning_rate": 7.480883098504876e-06, "loss": 0.0227, "step": 25459 }, { "epoch": 3.0190916637021226, "grad_norm": 0.46691607209142694, "learning_rate": 7.479170779612476e-06, "loss": 0.0239, "step": 25460 }, { "epoch": 3.0192102454642478, "grad_norm": 0.49743638913125854, "learning_rate": 7.477458622240618e-06, "loss": 0.0186, "step": 25461 }, { "epoch": 3.0193288272263725, "grad_norm": 0.539179207255222, "learning_rate": 7.475746626405122e-06, "loss": 0.0372, "step": 25462 }, { "epoch": 3.0194474089884977, "grad_norm": 0.5734367213765001, "learning_rate": 7.474034792121742e-06, "loss": 0.0278, "step": 25463 }, { "epoch": 3.0195659907506225, "grad_norm": 0.44320008915197917, "learning_rate": 7.472323119406272e-06, "loss": 0.0235, "step": 25464 }, { "epoch": 3.0196845725127477, "grad_norm": 0.543820303240899, "learning_rate": 7.470611608274486e-06, "loss": 0.0279, "step": 25465 }, { "epoch": 3.0198031542748724, "grad_norm": 0.3452124879199205, "learning_rate": 7.468900258742167e-06, "loss": 0.0206, "step": 25466 }, { "epoch": 3.0199217360369977, "grad_norm": 0.6951963660685124, "learning_rate": 7.4671890708250954e-06, "loss": 0.0311, "step": 25467 }, { "epoch": 3.0200403177991224, "grad_norm": 0.3528550892162062, "learning_rate": 7.465478044539026e-06, "loss": 0.0204, "step": 25468 }, { "epoch": 3.0201588995612476, "grad_norm": 0.34822199429530587, "learning_rate": 7.463767179899764e-06, "loss": 0.0201, "step": 25469 }, { "epoch": 3.0202774813233724, "grad_norm": 0.5058397970584119, "learning_rate": 7.462056476923054e-06, "loss": 0.024, "step": 25470 }, { "epoch": 3.0203960630854976, "grad_norm": 0.6135611477534872, "learning_rate": 7.460345935624685e-06, "loss": 0.0297, "step": 25471 }, { "epoch": 3.0205146448476223, "grad_norm": 0.407701367865082, "learning_rate": 7.458635556020405e-06, "loss": 0.0206, "step": 25472 }, { "epoch": 3.0206332266097475, "grad_norm": 0.5893773354722381, "learning_rate": 7.456925338126009e-06, "loss": 0.0366, "step": 25473 }, { "epoch": 3.0207518083718723, "grad_norm": 0.40468513353752417, "learning_rate": 7.45521528195724e-06, "loss": 0.0237, "step": 25474 }, { "epoch": 3.0208703901339975, "grad_norm": 0.506267146506876, "learning_rate": 7.453505387529869e-06, "loss": 0.0234, "step": 25475 }, { "epoch": 3.0209889718961223, "grad_norm": 0.46690684453358017, "learning_rate": 7.451795654859664e-06, "loss": 0.0211, "step": 25476 }, { "epoch": 3.0211075536582475, "grad_norm": 0.38917389295522664, "learning_rate": 7.450086083962385e-06, "loss": 0.019, "step": 25477 }, { "epoch": 3.021226135420372, "grad_norm": 0.9520255297834674, "learning_rate": 7.448376674853796e-06, "loss": 0.0567, "step": 25478 }, { "epoch": 3.0213447171824974, "grad_norm": 0.6491049203808418, "learning_rate": 7.446667427549645e-06, "loss": 0.0348, "step": 25479 }, { "epoch": 3.021463298944622, "grad_norm": 0.5401192613479493, "learning_rate": 7.444958342065694e-06, "loss": 0.0348, "step": 25480 }, { "epoch": 3.0215818807067474, "grad_norm": 0.3978183287733996, "learning_rate": 7.4432494184176995e-06, "loss": 0.0255, "step": 25481 }, { "epoch": 3.021700462468872, "grad_norm": 0.6458013935391205, "learning_rate": 7.441540656621418e-06, "loss": 0.0371, "step": 25482 }, { "epoch": 3.0218190442309973, "grad_norm": 0.46580433101473323, "learning_rate": 7.4398320566926e-06, "loss": 0.0211, "step": 25483 }, { "epoch": 3.021937625993122, "grad_norm": 0.5672087461617801, "learning_rate": 7.438123618646994e-06, "loss": 0.0287, "step": 25484 }, { "epoch": 3.0220562077552473, "grad_norm": 0.23006123234189121, "learning_rate": 7.436415342500363e-06, "loss": 0.0141, "step": 25485 }, { "epoch": 3.022174789517372, "grad_norm": 0.4669247616101962, "learning_rate": 7.434707228268434e-06, "loss": 0.0193, "step": 25486 }, { "epoch": 3.0222933712794973, "grad_norm": 0.5390706610589175, "learning_rate": 7.4329992759669676e-06, "loss": 0.0285, "step": 25487 }, { "epoch": 3.022411953041622, "grad_norm": 0.45281783674892206, "learning_rate": 7.4312914856117025e-06, "loss": 0.0199, "step": 25488 }, { "epoch": 3.0225305348037472, "grad_norm": 0.6043914276354383, "learning_rate": 7.429583857218395e-06, "loss": 0.0311, "step": 25489 }, { "epoch": 3.022649116565872, "grad_norm": 0.4681167595521451, "learning_rate": 7.427876390802771e-06, "loss": 0.0242, "step": 25490 }, { "epoch": 3.022767698327997, "grad_norm": 0.5553074656442504, "learning_rate": 7.426169086380577e-06, "loss": 0.0229, "step": 25491 }, { "epoch": 3.022886280090122, "grad_norm": 0.4984089144414229, "learning_rate": 7.424461943967556e-06, "loss": 0.0242, "step": 25492 }, { "epoch": 3.023004861852247, "grad_norm": 0.3980506446070038, "learning_rate": 7.42275496357944e-06, "loss": 0.0202, "step": 25493 }, { "epoch": 3.023123443614372, "grad_norm": 0.4487572316534197, "learning_rate": 7.421048145231979e-06, "loss": 0.0223, "step": 25494 }, { "epoch": 3.023242025376497, "grad_norm": 0.4599336378042825, "learning_rate": 7.419341488940882e-06, "loss": 0.0191, "step": 25495 }, { "epoch": 3.023360607138622, "grad_norm": 0.6497118454685568, "learning_rate": 7.417634994721911e-06, "loss": 0.0306, "step": 25496 }, { "epoch": 3.023479188900747, "grad_norm": 0.6240516743373182, "learning_rate": 7.415928662590776e-06, "loss": 0.0341, "step": 25497 }, { "epoch": 3.023597770662872, "grad_norm": 0.6396505858844798, "learning_rate": 7.414222492563219e-06, "loss": 0.03, "step": 25498 }, { "epoch": 3.023716352424997, "grad_norm": 0.49692055649460204, "learning_rate": 7.412516484654963e-06, "loss": 0.0251, "step": 25499 }, { "epoch": 3.0238349341871222, "grad_norm": 0.38333478252429115, "learning_rate": 7.410810638881746e-06, "loss": 0.0221, "step": 25500 }, { "epoch": 3.023953515949247, "grad_norm": 0.5292921259006801, "learning_rate": 7.40910495525928e-06, "loss": 0.0292, "step": 25501 }, { "epoch": 3.024072097711372, "grad_norm": 0.7714009184534717, "learning_rate": 7.407399433803294e-06, "loss": 0.0394, "step": 25502 }, { "epoch": 3.024190679473497, "grad_norm": 0.4984785947562804, "learning_rate": 7.4056940745295095e-06, "loss": 0.0287, "step": 25503 }, { "epoch": 3.024309261235622, "grad_norm": 0.3162949668172879, "learning_rate": 7.403988877453652e-06, "loss": 0.0192, "step": 25504 }, { "epoch": 3.024427842997747, "grad_norm": 0.39241984280731035, "learning_rate": 7.402283842591448e-06, "loss": 0.0215, "step": 25505 }, { "epoch": 3.024546424759872, "grad_norm": 0.4095016754355888, "learning_rate": 7.400578969958588e-06, "loss": 0.0217, "step": 25506 }, { "epoch": 3.024665006521997, "grad_norm": 0.5149680771165915, "learning_rate": 7.398874259570826e-06, "loss": 0.0338, "step": 25507 }, { "epoch": 3.024783588284122, "grad_norm": 0.3419044233024707, "learning_rate": 7.397169711443852e-06, "loss": 0.016, "step": 25508 }, { "epoch": 3.024902170046247, "grad_norm": 0.682506261179491, "learning_rate": 7.395465325593395e-06, "loss": 0.0396, "step": 25509 }, { "epoch": 3.025020751808372, "grad_norm": 0.693481942071081, "learning_rate": 7.393761102035143e-06, "loss": 0.0564, "step": 25510 }, { "epoch": 3.025139333570497, "grad_norm": 0.8032710234003713, "learning_rate": 7.392057040784842e-06, "loss": 0.0346, "step": 25511 }, { "epoch": 3.025257915332622, "grad_norm": 0.3852059827950563, "learning_rate": 7.390353141858172e-06, "loss": 0.0197, "step": 25512 }, { "epoch": 3.0253764970947468, "grad_norm": 0.64323019124524, "learning_rate": 7.3886494052708534e-06, "loss": 0.0315, "step": 25513 }, { "epoch": 3.025495078856872, "grad_norm": 0.4890779997762815, "learning_rate": 7.3869458310385886e-06, "loss": 0.0234, "step": 25514 }, { "epoch": 3.0256136606189967, "grad_norm": 0.40555980939821606, "learning_rate": 7.385242419177086e-06, "loss": 0.0273, "step": 25515 }, { "epoch": 3.025732242381122, "grad_norm": 0.44748269893771897, "learning_rate": 7.3835391697020554e-06, "loss": 0.0321, "step": 25516 }, { "epoch": 3.0258508241432467, "grad_norm": 0.4395189246817603, "learning_rate": 7.381836082629176e-06, "loss": 0.0242, "step": 25517 }, { "epoch": 3.025969405905372, "grad_norm": 0.6776546167948023, "learning_rate": 7.380133157974178e-06, "loss": 0.0296, "step": 25518 }, { "epoch": 3.0260879876674966, "grad_norm": 0.2925616888456235, "learning_rate": 7.378430395752736e-06, "loss": 0.0137, "step": 25519 }, { "epoch": 3.026206569429622, "grad_norm": 0.4032751310903437, "learning_rate": 7.376727795980568e-06, "loss": 0.0182, "step": 25520 }, { "epoch": 3.0263251511917466, "grad_norm": 0.47261942660661505, "learning_rate": 7.37502535867334e-06, "loss": 0.0297, "step": 25521 }, { "epoch": 3.026443732953872, "grad_norm": 0.3904915060181876, "learning_rate": 7.373323083846781e-06, "loss": 0.0218, "step": 25522 }, { "epoch": 3.0265623147159966, "grad_norm": 0.4027048592966785, "learning_rate": 7.371620971516563e-06, "loss": 0.0248, "step": 25523 }, { "epoch": 3.0266808964781218, "grad_norm": 0.7468072613979407, "learning_rate": 7.369919021698379e-06, "loss": 0.0299, "step": 25524 }, { "epoch": 3.0267994782402465, "grad_norm": 0.9586876626137988, "learning_rate": 7.368217234407923e-06, "loss": 0.0348, "step": 25525 }, { "epoch": 3.0269180600023717, "grad_norm": 0.6120987427317511, "learning_rate": 7.366515609660885e-06, "loss": 0.031, "step": 25526 }, { "epoch": 3.0270366417644965, "grad_norm": 0.5589865555166775, "learning_rate": 7.3648141474729535e-06, "loss": 0.0316, "step": 25527 }, { "epoch": 3.0271552235266217, "grad_norm": 0.5701176187389578, "learning_rate": 7.3631128478597964e-06, "loss": 0.0274, "step": 25528 }, { "epoch": 3.0272738052887465, "grad_norm": 0.3921430539882623, "learning_rate": 7.361411710837127e-06, "loss": 0.0226, "step": 25529 }, { "epoch": 3.0273923870508717, "grad_norm": 0.7863232964293633, "learning_rate": 7.359710736420603e-06, "loss": 0.0398, "step": 25530 }, { "epoch": 3.0275109688129964, "grad_norm": 0.41143809000056625, "learning_rate": 7.358009924625914e-06, "loss": 0.0217, "step": 25531 }, { "epoch": 3.0276295505751216, "grad_norm": 0.6986414696695976, "learning_rate": 7.35630927546874e-06, "loss": 0.0351, "step": 25532 }, { "epoch": 3.0277481323372464, "grad_norm": 0.35532763353248, "learning_rate": 7.354608788964756e-06, "loss": 0.0155, "step": 25533 }, { "epoch": 3.0278667140993716, "grad_norm": 0.6338524528490971, "learning_rate": 7.352908465129651e-06, "loss": 0.0333, "step": 25534 }, { "epoch": 3.0279852958614963, "grad_norm": 0.5969539212927654, "learning_rate": 7.351208303979082e-06, "loss": 0.0324, "step": 25535 }, { "epoch": 3.0281038776236215, "grad_norm": 0.7062939375641806, "learning_rate": 7.3495083055287295e-06, "loss": 0.0466, "step": 25536 }, { "epoch": 3.0282224593857463, "grad_norm": 0.6272453158278343, "learning_rate": 7.347808469794265e-06, "loss": 0.0433, "step": 25537 }, { "epoch": 3.0283410411478715, "grad_norm": 0.4463708884863511, "learning_rate": 7.34610879679137e-06, "loss": 0.0233, "step": 25538 }, { "epoch": 3.0284596229099963, "grad_norm": 0.5352132734665388, "learning_rate": 7.344409286535686e-06, "loss": 0.0295, "step": 25539 }, { "epoch": 3.0285782046721215, "grad_norm": 0.44037882877644996, "learning_rate": 7.342709939042913e-06, "loss": 0.0234, "step": 25540 }, { "epoch": 3.0286967864342462, "grad_norm": 0.3895129966720545, "learning_rate": 7.341010754328695e-06, "loss": 0.0227, "step": 25541 }, { "epoch": 3.0288153681963714, "grad_norm": 0.513295188817292, "learning_rate": 7.339311732408702e-06, "loss": 0.0251, "step": 25542 }, { "epoch": 3.028933949958496, "grad_norm": 0.5039763841980448, "learning_rate": 7.3376128732986e-06, "loss": 0.0275, "step": 25543 }, { "epoch": 3.0290525317206214, "grad_norm": 0.7012567311969867, "learning_rate": 7.335914177014044e-06, "loss": 0.0505, "step": 25544 }, { "epoch": 3.029171113482746, "grad_norm": 0.2812554849044171, "learning_rate": 7.334215643570711e-06, "loss": 0.0181, "step": 25545 }, { "epoch": 3.0292896952448713, "grad_norm": 0.3976174913121928, "learning_rate": 7.332517272984235e-06, "loss": 0.0185, "step": 25546 }, { "epoch": 3.029408277006996, "grad_norm": 0.3518922581715751, "learning_rate": 7.330819065270286e-06, "loss": 0.019, "step": 25547 }, { "epoch": 3.0295268587691213, "grad_norm": 0.6428847040917856, "learning_rate": 7.3291210204445156e-06, "loss": 0.037, "step": 25548 }, { "epoch": 3.0296454405312465, "grad_norm": 0.37447333136607486, "learning_rate": 7.327423138522591e-06, "loss": 0.0198, "step": 25549 }, { "epoch": 3.0297640222933713, "grad_norm": 0.7737848156820663, "learning_rate": 7.3257254195201434e-06, "loss": 0.0454, "step": 25550 }, { "epoch": 3.0298826040554965, "grad_norm": 0.5464934604177705, "learning_rate": 7.3240278634528345e-06, "loss": 0.0229, "step": 25551 }, { "epoch": 3.0300011858176212, "grad_norm": 0.4770666812463229, "learning_rate": 7.3223304703363135e-06, "loss": 0.0283, "step": 25552 }, { "epoch": 3.0301197675797464, "grad_norm": 0.3675954774533955, "learning_rate": 7.320633240186228e-06, "loss": 0.0214, "step": 25553 }, { "epoch": 3.030238349341871, "grad_norm": 0.4034747377498527, "learning_rate": 7.318936173018223e-06, "loss": 0.0192, "step": 25554 }, { "epoch": 3.0303569311039964, "grad_norm": 0.4266273690376254, "learning_rate": 7.317239268847945e-06, "loss": 0.0234, "step": 25555 }, { "epoch": 3.030475512866121, "grad_norm": 1.0435956703356837, "learning_rate": 7.315542527691047e-06, "loss": 0.0562, "step": 25556 }, { "epoch": 3.0305940946282464, "grad_norm": 0.4538907691931695, "learning_rate": 7.31384594956315e-06, "loss": 0.0299, "step": 25557 }, { "epoch": 3.030712676390371, "grad_norm": 0.35267055838517186, "learning_rate": 7.312149534479906e-06, "loss": 0.0181, "step": 25558 }, { "epoch": 3.0308312581524963, "grad_norm": 0.4189697192965319, "learning_rate": 7.3104532824569525e-06, "loss": 0.0254, "step": 25559 }, { "epoch": 3.030949839914621, "grad_norm": 0.678255412314605, "learning_rate": 7.308757193509935e-06, "loss": 0.03, "step": 25560 }, { "epoch": 3.0310684216767463, "grad_norm": 0.48959123466019977, "learning_rate": 7.3070612676544745e-06, "loss": 0.0253, "step": 25561 }, { "epoch": 3.031187003438871, "grad_norm": 0.39231716559149554, "learning_rate": 7.305365504906214e-06, "loss": 0.0209, "step": 25562 }, { "epoch": 3.0313055852009962, "grad_norm": 0.5500229471722172, "learning_rate": 7.3036699052807826e-06, "loss": 0.0275, "step": 25563 }, { "epoch": 3.031424166963121, "grad_norm": 0.5094475728346692, "learning_rate": 7.301974468793815e-06, "loss": 0.0243, "step": 25564 }, { "epoch": 3.031542748725246, "grad_norm": 0.5467733286973911, "learning_rate": 7.300279195460949e-06, "loss": 0.0288, "step": 25565 }, { "epoch": 3.031661330487371, "grad_norm": 0.46525147750966833, "learning_rate": 7.298584085297785e-06, "loss": 0.0304, "step": 25566 }, { "epoch": 3.031779912249496, "grad_norm": 0.5371142021514349, "learning_rate": 7.296889138319988e-06, "loss": 0.0334, "step": 25567 }, { "epoch": 3.031898494011621, "grad_norm": 0.36629876386139965, "learning_rate": 7.295194354543155e-06, "loss": 0.0176, "step": 25568 }, { "epoch": 3.032017075773746, "grad_norm": 0.4126924088140684, "learning_rate": 7.293499733982919e-06, "loss": 0.0166, "step": 25569 }, { "epoch": 3.032135657535871, "grad_norm": 0.5572112703695168, "learning_rate": 7.291805276654903e-06, "loss": 0.0415, "step": 25570 }, { "epoch": 3.032254239297996, "grad_norm": 0.5998509453378863, "learning_rate": 7.290110982574732e-06, "loss": 0.0303, "step": 25571 }, { "epoch": 3.032372821060121, "grad_norm": 0.41347250991397716, "learning_rate": 7.288416851758017e-06, "loss": 0.0233, "step": 25572 }, { "epoch": 3.032491402822246, "grad_norm": 0.7657516581507618, "learning_rate": 7.286722884220376e-06, "loss": 0.0288, "step": 25573 }, { "epoch": 3.032609984584371, "grad_norm": 0.6774773212085557, "learning_rate": 7.285029079977432e-06, "loss": 0.0432, "step": 25574 }, { "epoch": 3.032728566346496, "grad_norm": 0.46274927790559184, "learning_rate": 7.283335439044792e-06, "loss": 0.0361, "step": 25575 }, { "epoch": 3.0328471481086208, "grad_norm": 0.5144320941415619, "learning_rate": 7.2816419614380845e-06, "loss": 0.0359, "step": 25576 }, { "epoch": 3.032965729870746, "grad_norm": 0.42129238380731465, "learning_rate": 7.279948647172893e-06, "loss": 0.0247, "step": 25577 }, { "epoch": 3.0330843116328707, "grad_norm": 0.45349360367703695, "learning_rate": 7.278255496264863e-06, "loss": 0.0238, "step": 25578 }, { "epoch": 3.033202893394996, "grad_norm": 0.4589787893878855, "learning_rate": 7.276562508729576e-06, "loss": 0.0273, "step": 25579 }, { "epoch": 3.0333214751571207, "grad_norm": 0.5910752154588953, "learning_rate": 7.2748696845826496e-06, "loss": 0.0321, "step": 25580 }, { "epoch": 3.033440056919246, "grad_norm": 0.2870225492228098, "learning_rate": 7.273177023839686e-06, "loss": 0.0191, "step": 25581 }, { "epoch": 3.0335586386813707, "grad_norm": 0.5123756153332801, "learning_rate": 7.271484526516295e-06, "loss": 0.023, "step": 25582 }, { "epoch": 3.033677220443496, "grad_norm": 0.5223266134086794, "learning_rate": 7.269792192628083e-06, "loss": 0.0278, "step": 25583 }, { "epoch": 3.0337958022056206, "grad_norm": 0.5712989809555142, "learning_rate": 7.26810002219063e-06, "loss": 0.0363, "step": 25584 }, { "epoch": 3.033914383967746, "grad_norm": 0.6214327098928413, "learning_rate": 7.2664080152195666e-06, "loss": 0.0308, "step": 25585 }, { "epoch": 3.0340329657298706, "grad_norm": 0.6214361828820734, "learning_rate": 7.264716171730468e-06, "loss": 0.0252, "step": 25586 }, { "epoch": 3.034151547491996, "grad_norm": 0.6398108586930592, "learning_rate": 7.263024491738943e-06, "loss": 0.0335, "step": 25587 }, { "epoch": 3.0342701292541205, "grad_norm": 0.559035698901699, "learning_rate": 7.261332975260568e-06, "loss": 0.0289, "step": 25588 }, { "epoch": 3.0343887110162457, "grad_norm": 0.27383549985859873, "learning_rate": 7.259641622310964e-06, "loss": 0.0139, "step": 25589 }, { "epoch": 3.0345072927783705, "grad_norm": 0.9987368935432386, "learning_rate": 7.257950432905702e-06, "loss": 0.0452, "step": 25590 }, { "epoch": 3.0346258745404957, "grad_norm": 0.6360451708718425, "learning_rate": 7.256259407060384e-06, "loss": 0.0404, "step": 25591 }, { "epoch": 3.0347444563026205, "grad_norm": 0.611533712873175, "learning_rate": 7.254568544790591e-06, "loss": 0.0356, "step": 25592 }, { "epoch": 3.0348630380647457, "grad_norm": 0.3582516646364771, "learning_rate": 7.252877846111916e-06, "loss": 0.017, "step": 25593 }, { "epoch": 3.0349816198268704, "grad_norm": 0.5173962866184124, "learning_rate": 7.251187311039953e-06, "loss": 0.0262, "step": 25594 }, { "epoch": 3.0351002015889956, "grad_norm": 0.6078580629437235, "learning_rate": 7.249496939590264e-06, "loss": 0.0337, "step": 25595 }, { "epoch": 3.0352187833511204, "grad_norm": 1.1519081016826567, "learning_rate": 7.247806731778459e-06, "loss": 0.0565, "step": 25596 }, { "epoch": 3.0353373651132456, "grad_norm": 0.2680548843384091, "learning_rate": 7.2461166876201e-06, "loss": 0.0144, "step": 25597 }, { "epoch": 3.035455946875371, "grad_norm": 0.3589625551513458, "learning_rate": 7.244426807130783e-06, "loss": 0.0189, "step": 25598 }, { "epoch": 3.0355745286374956, "grad_norm": 0.5832286055227365, "learning_rate": 7.24273709032606e-06, "loss": 0.0324, "step": 25599 }, { "epoch": 3.0356931103996208, "grad_norm": 0.5794606940623375, "learning_rate": 7.24104753722154e-06, "loss": 0.0264, "step": 25600 }, { "epoch": 3.0358116921617455, "grad_norm": 0.4667255672821096, "learning_rate": 7.239358147832781e-06, "loss": 0.0194, "step": 25601 }, { "epoch": 3.0359302739238707, "grad_norm": 0.5811785598811245, "learning_rate": 7.237668922175356e-06, "loss": 0.0358, "step": 25602 }, { "epoch": 3.0360488556859955, "grad_norm": 0.8183454791301821, "learning_rate": 7.2359798602648435e-06, "loss": 0.0463, "step": 25603 }, { "epoch": 3.0361674374481207, "grad_norm": 0.5252439107218659, "learning_rate": 7.234290962116813e-06, "loss": 0.0256, "step": 25604 }, { "epoch": 3.0362860192102454, "grad_norm": 0.538037475437534, "learning_rate": 7.232602227746843e-06, "loss": 0.0233, "step": 25605 }, { "epoch": 3.0364046009723706, "grad_norm": 0.47562896272659233, "learning_rate": 7.230913657170482e-06, "loss": 0.0254, "step": 25606 }, { "epoch": 3.0365231827344954, "grad_norm": 0.5927918563434063, "learning_rate": 7.22922525040331e-06, "loss": 0.0255, "step": 25607 }, { "epoch": 3.0366417644966206, "grad_norm": 0.48024653702667325, "learning_rate": 7.227537007460888e-06, "loss": 0.0272, "step": 25608 }, { "epoch": 3.0367603462587454, "grad_norm": 0.5013196079392649, "learning_rate": 7.225848928358789e-06, "loss": 0.0312, "step": 25609 }, { "epoch": 3.0368789280208706, "grad_norm": 0.3844052038060899, "learning_rate": 7.224161013112551e-06, "loss": 0.0207, "step": 25610 }, { "epoch": 3.0369975097829953, "grad_norm": 0.29359167870184716, "learning_rate": 7.222473261737767e-06, "loss": 0.0132, "step": 25611 }, { "epoch": 3.0371160915451205, "grad_norm": 0.4355699414648827, "learning_rate": 7.2207856742499695e-06, "loss": 0.0257, "step": 25612 }, { "epoch": 3.0372346733072453, "grad_norm": 0.628120709711594, "learning_rate": 7.219098250664727e-06, "loss": 0.0276, "step": 25613 }, { "epoch": 3.0373532550693705, "grad_norm": 0.40491959263471494, "learning_rate": 7.217410990997592e-06, "loss": 0.0235, "step": 25614 }, { "epoch": 3.0374718368314952, "grad_norm": 0.46840720876292546, "learning_rate": 7.215723895264123e-06, "loss": 0.0226, "step": 25615 }, { "epoch": 3.0375904185936204, "grad_norm": 0.4422022466468966, "learning_rate": 7.214036963479878e-06, "loss": 0.021, "step": 25616 }, { "epoch": 3.037709000355745, "grad_norm": 0.5858133075757084, "learning_rate": 7.212350195660397e-06, "loss": 0.0215, "step": 25617 }, { "epoch": 3.0378275821178704, "grad_norm": 0.3569306643013583, "learning_rate": 7.2106635918212315e-06, "loss": 0.0194, "step": 25618 }, { "epoch": 3.037946163879995, "grad_norm": 0.5904604738461098, "learning_rate": 7.208977151977933e-06, "loss": 0.0315, "step": 25619 }, { "epoch": 3.0380647456421204, "grad_norm": 0.6580836070541726, "learning_rate": 7.207290876146058e-06, "loss": 0.0292, "step": 25620 }, { "epoch": 3.038183327404245, "grad_norm": 0.5284492394292898, "learning_rate": 7.205604764341134e-06, "loss": 0.0243, "step": 25621 }, { "epoch": 3.0383019091663703, "grad_norm": 0.47705860229871694, "learning_rate": 7.203918816578712e-06, "loss": 0.0296, "step": 25622 }, { "epoch": 3.038420490928495, "grad_norm": 0.7113063512925267, "learning_rate": 7.202233032874337e-06, "loss": 0.0369, "step": 25623 }, { "epoch": 3.0385390726906203, "grad_norm": 0.4558707316789305, "learning_rate": 7.200547413243547e-06, "loss": 0.0235, "step": 25624 }, { "epoch": 3.038657654452745, "grad_norm": 0.7429285270477268, "learning_rate": 7.198861957701883e-06, "loss": 0.0466, "step": 25625 }, { "epoch": 3.0387762362148703, "grad_norm": 0.38822655752650936, "learning_rate": 7.197176666264882e-06, "loss": 0.0227, "step": 25626 }, { "epoch": 3.038894817976995, "grad_norm": 0.7322494642077816, "learning_rate": 7.19549153894809e-06, "loss": 0.0266, "step": 25627 }, { "epoch": 3.03901339973912, "grad_norm": 0.6333058803843093, "learning_rate": 7.193806575767023e-06, "loss": 0.0441, "step": 25628 }, { "epoch": 3.039131981501245, "grad_norm": 0.5950155366955171, "learning_rate": 7.1921217767372265e-06, "loss": 0.034, "step": 25629 }, { "epoch": 3.03925056326337, "grad_norm": 0.6807040934951091, "learning_rate": 7.190437141874229e-06, "loss": 0.0381, "step": 25630 }, { "epoch": 3.039369145025495, "grad_norm": 0.48061921916731054, "learning_rate": 7.18875267119356e-06, "loss": 0.0261, "step": 25631 }, { "epoch": 3.03948772678762, "grad_norm": 0.5642687452334069, "learning_rate": 7.187068364710758e-06, "loss": 0.0233, "step": 25632 }, { "epoch": 3.039606308549745, "grad_norm": 0.33438174643318763, "learning_rate": 7.185384222441327e-06, "loss": 0.0228, "step": 25633 }, { "epoch": 3.03972489031187, "grad_norm": 0.7844095089304419, "learning_rate": 7.183700244400824e-06, "loss": 0.044, "step": 25634 }, { "epoch": 3.039843472073995, "grad_norm": 0.9068355884035788, "learning_rate": 7.182016430604749e-06, "loss": 0.0389, "step": 25635 }, { "epoch": 3.03996205383612, "grad_norm": 1.0305473409151047, "learning_rate": 7.180332781068638e-06, "loss": 0.0415, "step": 25636 }, { "epoch": 3.040080635598245, "grad_norm": 0.4780893321237964, "learning_rate": 7.178649295807993e-06, "loss": 0.0353, "step": 25637 }, { "epoch": 3.04019921736037, "grad_norm": 0.6278386441443284, "learning_rate": 7.176965974838365e-06, "loss": 0.0206, "step": 25638 }, { "epoch": 3.040317799122495, "grad_norm": 0.33118724491277135, "learning_rate": 7.175282818175244e-06, "loss": 0.0169, "step": 25639 }, { "epoch": 3.04043638088462, "grad_norm": 0.7962534475520948, "learning_rate": 7.17359982583416e-06, "loss": 0.0301, "step": 25640 }, { "epoch": 3.0405549626467447, "grad_norm": 0.5965613816626361, "learning_rate": 7.171916997830621e-06, "loss": 0.0267, "step": 25641 }, { "epoch": 3.04067354440887, "grad_norm": 0.47351196431390985, "learning_rate": 7.170234334180151e-06, "loss": 0.0266, "step": 25642 }, { "epoch": 3.0407921261709947, "grad_norm": 0.7394377050117784, "learning_rate": 7.16855183489826e-06, "loss": 0.0343, "step": 25643 }, { "epoch": 3.04091070793312, "grad_norm": 0.3860249704907154, "learning_rate": 7.1668695000004394e-06, "loss": 0.0212, "step": 25644 }, { "epoch": 3.0410292896952447, "grad_norm": 0.6495034400487649, "learning_rate": 7.165187329502229e-06, "loss": 0.0405, "step": 25645 }, { "epoch": 3.04114787145737, "grad_norm": 0.4841623476857207, "learning_rate": 7.1635053234191145e-06, "loss": 0.0224, "step": 25646 }, { "epoch": 3.0412664532194946, "grad_norm": 0.4468764228883938, "learning_rate": 7.161823481766616e-06, "loss": 0.0261, "step": 25647 }, { "epoch": 3.04138503498162, "grad_norm": 0.42925567022030775, "learning_rate": 7.160141804560214e-06, "loss": 0.0217, "step": 25648 }, { "epoch": 3.0415036167437446, "grad_norm": 0.41470415343212974, "learning_rate": 7.158460291815444e-06, "loss": 0.0243, "step": 25649 }, { "epoch": 3.04162219850587, "grad_norm": 0.38713412142802606, "learning_rate": 7.156778943547784e-06, "loss": 0.0182, "step": 25650 }, { "epoch": 3.041740780267995, "grad_norm": 0.4204874935196305, "learning_rate": 7.155097759772742e-06, "loss": 0.0237, "step": 25651 }, { "epoch": 3.0418593620301198, "grad_norm": 0.494062708097531, "learning_rate": 7.1534167405058135e-06, "loss": 0.0177, "step": 25652 }, { "epoch": 3.041977943792245, "grad_norm": 0.5859597347144514, "learning_rate": 7.1517358857624975e-06, "loss": 0.0313, "step": 25653 }, { "epoch": 3.0420965255543697, "grad_norm": 0.4789678932838485, "learning_rate": 7.1500551955583e-06, "loss": 0.0212, "step": 25654 }, { "epoch": 3.042215107316495, "grad_norm": 0.5971098139122184, "learning_rate": 7.148374669908686e-06, "loss": 0.035, "step": 25655 }, { "epoch": 3.0423336890786197, "grad_norm": 0.28853330880716554, "learning_rate": 7.146694308829186e-06, "loss": 0.0138, "step": 25656 }, { "epoch": 3.042452270840745, "grad_norm": 0.4588879212859405, "learning_rate": 7.1450141123352604e-06, "loss": 0.0297, "step": 25657 }, { "epoch": 3.0425708526028696, "grad_norm": 0.6459429196558409, "learning_rate": 7.143334080442418e-06, "loss": 0.0316, "step": 25658 }, { "epoch": 3.042689434364995, "grad_norm": 0.8413861988211172, "learning_rate": 7.141654213166121e-06, "loss": 0.055, "step": 25659 }, { "epoch": 3.0428080161271196, "grad_norm": 0.5371077179959387, "learning_rate": 7.13997451052189e-06, "loss": 0.0294, "step": 25660 }, { "epoch": 3.042926597889245, "grad_norm": 0.4895304295407183, "learning_rate": 7.1382949725251866e-06, "loss": 0.0334, "step": 25661 }, { "epoch": 3.0430451796513696, "grad_norm": 0.3788876305901472, "learning_rate": 7.1366155991914954e-06, "loss": 0.0187, "step": 25662 }, { "epoch": 3.0431637614134948, "grad_norm": 0.9246530848047887, "learning_rate": 7.134936390536307e-06, "loss": 0.0438, "step": 25663 }, { "epoch": 3.0432823431756195, "grad_norm": 0.6251586299501516, "learning_rate": 7.133257346575095e-06, "loss": 0.0428, "step": 25664 }, { "epoch": 3.0434009249377447, "grad_norm": 0.34853761414836393, "learning_rate": 7.131578467323352e-06, "loss": 0.0169, "step": 25665 }, { "epoch": 3.0435195066998695, "grad_norm": 0.6259589930523619, "learning_rate": 7.129899752796532e-06, "loss": 0.0304, "step": 25666 }, { "epoch": 3.0436380884619947, "grad_norm": 0.3609650028266282, "learning_rate": 7.128221203010124e-06, "loss": 0.0164, "step": 25667 }, { "epoch": 3.0437566702241194, "grad_norm": 0.761655446487549, "learning_rate": 7.1265428179796005e-06, "loss": 0.0419, "step": 25668 }, { "epoch": 3.0438752519862446, "grad_norm": 0.41876560421631387, "learning_rate": 7.124864597720443e-06, "loss": 0.0183, "step": 25669 }, { "epoch": 3.0439938337483694, "grad_norm": 0.45494253828785697, "learning_rate": 7.123186542248097e-06, "loss": 0.0211, "step": 25670 }, { "epoch": 3.0441124155104946, "grad_norm": 0.5332898704893814, "learning_rate": 7.121508651578066e-06, "loss": 0.0274, "step": 25671 }, { "epoch": 3.0442309972726194, "grad_norm": 0.7185109375180969, "learning_rate": 7.119830925725793e-06, "loss": 0.0336, "step": 25672 }, { "epoch": 3.0443495790347446, "grad_norm": 0.7519227798063824, "learning_rate": 7.118153364706753e-06, "loss": 0.0434, "step": 25673 }, { "epoch": 3.0444681607968693, "grad_norm": 0.5716813863219283, "learning_rate": 7.1164759685364095e-06, "loss": 0.032, "step": 25674 }, { "epoch": 3.0445867425589945, "grad_norm": 0.5518543688276104, "learning_rate": 7.11479873723023e-06, "loss": 0.0305, "step": 25675 }, { "epoch": 3.0447053243211193, "grad_norm": 0.4662502434311144, "learning_rate": 7.11312167080368e-06, "loss": 0.0246, "step": 25676 }, { "epoch": 3.0448239060832445, "grad_norm": 0.3986982511856967, "learning_rate": 7.1114447692722045e-06, "loss": 0.0202, "step": 25677 }, { "epoch": 3.0449424878453693, "grad_norm": 0.5059119381185545, "learning_rate": 7.109768032651274e-06, "loss": 0.0226, "step": 25678 }, { "epoch": 3.0450610696074945, "grad_norm": 0.31519470986580544, "learning_rate": 7.108091460956342e-06, "loss": 0.0194, "step": 25679 }, { "epoch": 3.045179651369619, "grad_norm": 0.46108403407734533, "learning_rate": 7.1064150542028754e-06, "loss": 0.0207, "step": 25680 }, { "epoch": 3.0452982331317444, "grad_norm": 0.6638182047951712, "learning_rate": 7.104738812406303e-06, "loss": 0.0337, "step": 25681 }, { "epoch": 3.045416814893869, "grad_norm": 0.4737633590561371, "learning_rate": 7.103062735582111e-06, "loss": 0.0268, "step": 25682 }, { "epoch": 3.0455353966559944, "grad_norm": 0.6502132978790345, "learning_rate": 7.101386823745726e-06, "loss": 0.0425, "step": 25683 }, { "epoch": 3.045653978418119, "grad_norm": 0.7390798809762883, "learning_rate": 7.099711076912604e-06, "loss": 0.0479, "step": 25684 }, { "epoch": 3.0457725601802443, "grad_norm": 0.5982526814922612, "learning_rate": 7.098035495098196e-06, "loss": 0.0322, "step": 25685 }, { "epoch": 3.045891141942369, "grad_norm": 0.5172581241521865, "learning_rate": 7.096360078317949e-06, "loss": 0.023, "step": 25686 }, { "epoch": 3.0460097237044943, "grad_norm": 0.8477402082824489, "learning_rate": 7.094684826587314e-06, "loss": 0.052, "step": 25687 }, { "epoch": 3.046128305466619, "grad_norm": 0.6491577025051294, "learning_rate": 7.093009739921721e-06, "loss": 0.0401, "step": 25688 }, { "epoch": 3.0462468872287443, "grad_norm": 0.40525738123351823, "learning_rate": 7.091334818336618e-06, "loss": 0.0183, "step": 25689 }, { "epoch": 3.046365468990869, "grad_norm": 0.8061535335764177, "learning_rate": 7.089660061847447e-06, "loss": 0.0404, "step": 25690 }, { "epoch": 3.046484050752994, "grad_norm": 0.47579761689992567, "learning_rate": 7.087985470469649e-06, "loss": 0.0203, "step": 25691 }, { "epoch": 3.046602632515119, "grad_norm": 0.6117218637725415, "learning_rate": 7.086311044218666e-06, "loss": 0.0325, "step": 25692 }, { "epoch": 3.046721214277244, "grad_norm": 0.3542142086518, "learning_rate": 7.08463678310991e-06, "loss": 0.0162, "step": 25693 }, { "epoch": 3.046839796039369, "grad_norm": 0.5798577308061234, "learning_rate": 7.082962687158853e-06, "loss": 0.0311, "step": 25694 }, { "epoch": 3.046958377801494, "grad_norm": 0.49872348877988787, "learning_rate": 7.0812887563808994e-06, "loss": 0.0271, "step": 25695 }, { "epoch": 3.047076959563619, "grad_norm": 0.3739921578097023, "learning_rate": 7.079614990791492e-06, "loss": 0.0175, "step": 25696 }, { "epoch": 3.047195541325744, "grad_norm": 0.4226222232699488, "learning_rate": 7.077941390406057e-06, "loss": 0.0206, "step": 25697 }, { "epoch": 3.047314123087869, "grad_norm": 0.6009363403687604, "learning_rate": 7.076267955240032e-06, "loss": 0.0442, "step": 25698 }, { "epoch": 3.047432704849994, "grad_norm": 0.5989263077232984, "learning_rate": 7.07459468530883e-06, "loss": 0.0275, "step": 25699 }, { "epoch": 3.0475512866121193, "grad_norm": 0.521859375519805, "learning_rate": 7.072921580627884e-06, "loss": 0.0288, "step": 25700 }, { "epoch": 3.047669868374244, "grad_norm": 0.8005361329865595, "learning_rate": 7.071248641212616e-06, "loss": 0.0482, "step": 25701 }, { "epoch": 3.0477884501363692, "grad_norm": 0.42749046281241404, "learning_rate": 7.069575867078451e-06, "loss": 0.0291, "step": 25702 }, { "epoch": 3.047907031898494, "grad_norm": 0.3656719829091871, "learning_rate": 7.067903258240815e-06, "loss": 0.0196, "step": 25703 }, { "epoch": 3.048025613660619, "grad_norm": 0.5069086389646187, "learning_rate": 7.066230814715108e-06, "loss": 0.026, "step": 25704 }, { "epoch": 3.048144195422744, "grad_norm": 0.4673833707724831, "learning_rate": 7.064558536516774e-06, "loss": 0.0238, "step": 25705 }, { "epoch": 3.048262777184869, "grad_norm": 0.3814666583443058, "learning_rate": 7.062886423661211e-06, "loss": 0.0224, "step": 25706 }, { "epoch": 3.048381358946994, "grad_norm": 0.6554430138251789, "learning_rate": 7.0612144761638465e-06, "loss": 0.0347, "step": 25707 }, { "epoch": 3.048499940709119, "grad_norm": 0.43196383761449986, "learning_rate": 7.05954269404007e-06, "loss": 0.0282, "step": 25708 }, { "epoch": 3.048618522471244, "grad_norm": 0.428741652526595, "learning_rate": 7.0578710773053255e-06, "loss": 0.0285, "step": 25709 }, { "epoch": 3.048737104233369, "grad_norm": 0.4870265560148238, "learning_rate": 7.056199625974999e-06, "loss": 0.024, "step": 25710 }, { "epoch": 3.048855685995494, "grad_norm": 0.43395603041345515, "learning_rate": 7.054528340064512e-06, "loss": 0.024, "step": 25711 }, { "epoch": 3.048974267757619, "grad_norm": 0.7441802764430697, "learning_rate": 7.052857219589262e-06, "loss": 0.031, "step": 25712 }, { "epoch": 3.049092849519744, "grad_norm": 0.40220964181567764, "learning_rate": 7.051186264564663e-06, "loss": 0.0206, "step": 25713 }, { "epoch": 3.049211431281869, "grad_norm": 0.5984120999457562, "learning_rate": 7.049515475006124e-06, "loss": 0.0467, "step": 25714 }, { "epoch": 3.0493300130439938, "grad_norm": 0.4316491936542684, "learning_rate": 7.047844850929025e-06, "loss": 0.0301, "step": 25715 }, { "epoch": 3.049448594806119, "grad_norm": 0.6744264712630901, "learning_rate": 7.046174392348798e-06, "loss": 0.0376, "step": 25716 }, { "epoch": 3.0495671765682437, "grad_norm": 0.3040634034093401, "learning_rate": 7.044504099280816e-06, "loss": 0.0177, "step": 25717 }, { "epoch": 3.049685758330369, "grad_norm": 0.5858214965749343, "learning_rate": 7.042833971740498e-06, "loss": 0.0342, "step": 25718 }, { "epoch": 3.0498043400924937, "grad_norm": 0.5895239442776448, "learning_rate": 7.0411640097432145e-06, "loss": 0.0221, "step": 25719 }, { "epoch": 3.049922921854619, "grad_norm": 0.4729320149091077, "learning_rate": 7.03949421330439e-06, "loss": 0.0226, "step": 25720 }, { "epoch": 3.0500415036167436, "grad_norm": 0.7000018816006867, "learning_rate": 7.037824582439398e-06, "loss": 0.0238, "step": 25721 }, { "epoch": 3.050160085378869, "grad_norm": 0.8953740458699684, "learning_rate": 7.036155117163637e-06, "loss": 0.0523, "step": 25722 }, { "epoch": 3.0502786671409936, "grad_norm": 0.4719396906997645, "learning_rate": 7.034485817492498e-06, "loss": 0.0329, "step": 25723 }, { "epoch": 3.050397248903119, "grad_norm": 0.7087747650690799, "learning_rate": 7.032816683441365e-06, "loss": 0.0359, "step": 25724 }, { "epoch": 3.0505158306652436, "grad_norm": 0.3070650430381829, "learning_rate": 7.03114771502564e-06, "loss": 0.0137, "step": 25725 }, { "epoch": 3.0506344124273688, "grad_norm": 0.6384980543805574, "learning_rate": 7.0294789122606845e-06, "loss": 0.0335, "step": 25726 }, { "epoch": 3.0507529941894935, "grad_norm": 0.46817391892082816, "learning_rate": 7.027810275161909e-06, "loss": 0.0279, "step": 25727 }, { "epoch": 3.0508715759516187, "grad_norm": 0.3960503637737531, "learning_rate": 7.026141803744676e-06, "loss": 0.0217, "step": 25728 }, { "epoch": 3.0509901577137435, "grad_norm": 0.6453767347375313, "learning_rate": 7.024473498024384e-06, "loss": 0.0434, "step": 25729 }, { "epoch": 3.0511087394758687, "grad_norm": 0.4620642846709467, "learning_rate": 7.022805358016388e-06, "loss": 0.0202, "step": 25730 }, { "epoch": 3.0512273212379935, "grad_norm": 0.6660174823081271, "learning_rate": 7.0211373837360955e-06, "loss": 0.0403, "step": 25731 }, { "epoch": 3.0513459030001187, "grad_norm": 0.3305803551715751, "learning_rate": 7.019469575198862e-06, "loss": 0.0207, "step": 25732 }, { "epoch": 3.0514644847622434, "grad_norm": 0.4749702304505118, "learning_rate": 7.017801932420068e-06, "loss": 0.0189, "step": 25733 }, { "epoch": 3.0515830665243686, "grad_norm": 0.679584337044462, "learning_rate": 7.016134455415091e-06, "loss": 0.0284, "step": 25734 }, { "epoch": 3.0517016482864934, "grad_norm": 0.5547645119487171, "learning_rate": 7.014467144199302e-06, "loss": 0.0251, "step": 25735 }, { "epoch": 3.0518202300486186, "grad_norm": 0.5072465378202821, "learning_rate": 7.012799998788078e-06, "loss": 0.0204, "step": 25736 }, { "epoch": 3.0519388118107433, "grad_norm": 0.782930441826833, "learning_rate": 7.011133019196769e-06, "loss": 0.0399, "step": 25737 }, { "epoch": 3.0520573935728685, "grad_norm": 0.5044744566223648, "learning_rate": 7.009466205440759e-06, "loss": 0.0253, "step": 25738 }, { "epoch": 3.0521759753349933, "grad_norm": 1.0103959765221462, "learning_rate": 7.007799557535408e-06, "loss": 0.055, "step": 25739 }, { "epoch": 3.0522945570971185, "grad_norm": 0.4586430689230486, "learning_rate": 7.00613307549608e-06, "loss": 0.0231, "step": 25740 }, { "epoch": 3.0524131388592433, "grad_norm": 0.5228517753026451, "learning_rate": 7.0044667593381405e-06, "loss": 0.0192, "step": 25741 }, { "epoch": 3.0525317206213685, "grad_norm": 0.7544336612109552, "learning_rate": 7.002800609076951e-06, "loss": 0.043, "step": 25742 }, { "epoch": 3.052650302383493, "grad_norm": 0.7253538468794315, "learning_rate": 7.001134624727876e-06, "loss": 0.022, "step": 25743 }, { "epoch": 3.0527688841456184, "grad_norm": 0.42820433679385855, "learning_rate": 6.999468806306261e-06, "loss": 0.0169, "step": 25744 }, { "epoch": 3.052887465907743, "grad_norm": 0.68101123444321, "learning_rate": 6.99780315382747e-06, "loss": 0.0362, "step": 25745 }, { "epoch": 3.0530060476698684, "grad_norm": 0.5631980186546279, "learning_rate": 6.99613766730686e-06, "loss": 0.0275, "step": 25746 }, { "epoch": 3.053124629431993, "grad_norm": 0.33209260441437727, "learning_rate": 6.994472346759787e-06, "loss": 0.0148, "step": 25747 }, { "epoch": 3.0532432111941183, "grad_norm": 0.6085522096962065, "learning_rate": 6.992807192201595e-06, "loss": 0.046, "step": 25748 }, { "epoch": 3.0533617929562435, "grad_norm": 0.4694148274477506, "learning_rate": 6.991142203647635e-06, "loss": 0.0305, "step": 25749 }, { "epoch": 3.0534803747183683, "grad_norm": 0.43401706184643485, "learning_rate": 6.989477381113263e-06, "loss": 0.0222, "step": 25750 }, { "epoch": 3.0535989564804935, "grad_norm": 0.5959435707723132, "learning_rate": 6.987812724613824e-06, "loss": 0.0302, "step": 25751 }, { "epoch": 3.0537175382426183, "grad_norm": 0.7734547484918611, "learning_rate": 6.986148234164669e-06, "loss": 0.0377, "step": 25752 }, { "epoch": 3.0538361200047435, "grad_norm": 0.5260675031197611, "learning_rate": 6.9844839097811225e-06, "loss": 0.0288, "step": 25753 }, { "epoch": 3.0539547017668682, "grad_norm": 0.7853168920531363, "learning_rate": 6.982819751478559e-06, "loss": 0.0276, "step": 25754 }, { "epoch": 3.0540732835289934, "grad_norm": 0.5514933068128056, "learning_rate": 6.981155759272293e-06, "loss": 0.0269, "step": 25755 }, { "epoch": 3.054191865291118, "grad_norm": 0.3780481239459865, "learning_rate": 6.979491933177676e-06, "loss": 0.0226, "step": 25756 }, { "epoch": 3.0543104470532434, "grad_norm": 1.0214054934816537, "learning_rate": 6.977828273210046e-06, "loss": 0.0305, "step": 25757 }, { "epoch": 3.054429028815368, "grad_norm": 0.4436511779882615, "learning_rate": 6.976164779384747e-06, "loss": 0.0267, "step": 25758 }, { "epoch": 3.0545476105774934, "grad_norm": 0.38840819588017933, "learning_rate": 6.974501451717097e-06, "loss": 0.0237, "step": 25759 }, { "epoch": 3.054666192339618, "grad_norm": 0.5090109943146182, "learning_rate": 6.972838290222441e-06, "loss": 0.0309, "step": 25760 }, { "epoch": 3.0547847741017433, "grad_norm": 0.7082551377553794, "learning_rate": 6.97117529491611e-06, "loss": 0.0378, "step": 25761 }, { "epoch": 3.054903355863868, "grad_norm": 0.6171946508172774, "learning_rate": 6.9695124658134326e-06, "loss": 0.0338, "step": 25762 }, { "epoch": 3.0550219376259933, "grad_norm": 0.544922771491039, "learning_rate": 6.967849802929749e-06, "loss": 0.0293, "step": 25763 }, { "epoch": 3.055140519388118, "grad_norm": 0.5255244655327902, "learning_rate": 6.966187306280361e-06, "loss": 0.0313, "step": 25764 }, { "epoch": 3.0552591011502432, "grad_norm": 0.6548627026355068, "learning_rate": 6.964524975880627e-06, "loss": 0.031, "step": 25765 }, { "epoch": 3.055377682912368, "grad_norm": 0.4451188408935132, "learning_rate": 6.962862811745849e-06, "loss": 0.0239, "step": 25766 }, { "epoch": 3.055496264674493, "grad_norm": 0.4355291471565756, "learning_rate": 6.961200813891358e-06, "loss": 0.024, "step": 25767 }, { "epoch": 3.055614846436618, "grad_norm": 0.466256907231154, "learning_rate": 6.9595389823324725e-06, "loss": 0.0301, "step": 25768 }, { "epoch": 3.055733428198743, "grad_norm": 0.38401684296510374, "learning_rate": 6.957877317084524e-06, "loss": 0.0189, "step": 25769 }, { "epoch": 3.055852009960868, "grad_norm": 0.5919805023892334, "learning_rate": 6.956215818162814e-06, "loss": 0.0235, "step": 25770 }, { "epoch": 3.055970591722993, "grad_norm": 0.37052701137189514, "learning_rate": 6.954554485582671e-06, "loss": 0.0215, "step": 25771 }, { "epoch": 3.056089173485118, "grad_norm": 0.45827125056667717, "learning_rate": 6.952893319359402e-06, "loss": 0.0221, "step": 25772 }, { "epoch": 3.056207755247243, "grad_norm": 0.49035394111056885, "learning_rate": 6.951232319508327e-06, "loss": 0.0207, "step": 25773 }, { "epoch": 3.056326337009368, "grad_norm": 0.719291418845679, "learning_rate": 6.9495714860447645e-06, "loss": 0.0339, "step": 25774 }, { "epoch": 3.056444918771493, "grad_norm": 0.568831438226503, "learning_rate": 6.947910818984005e-06, "loss": 0.0415, "step": 25775 }, { "epoch": 3.056563500533618, "grad_norm": 0.5568137102820796, "learning_rate": 6.946250318341385e-06, "loss": 0.0278, "step": 25776 }, { "epoch": 3.056682082295743, "grad_norm": 0.35188762122798484, "learning_rate": 6.9445899841321884e-06, "loss": 0.016, "step": 25777 }, { "epoch": 3.0568006640578678, "grad_norm": 0.34386535001292284, "learning_rate": 6.9429298163717405e-06, "loss": 0.015, "step": 25778 }, { "epoch": 3.056919245819993, "grad_norm": 0.5714147326626452, "learning_rate": 6.941269815075322e-06, "loss": 0.0301, "step": 25779 }, { "epoch": 3.0570378275821177, "grad_norm": 0.5411813796549171, "learning_rate": 6.939609980258266e-06, "loss": 0.0272, "step": 25780 }, { "epoch": 3.057156409344243, "grad_norm": 0.607024756412698, "learning_rate": 6.9379503119358505e-06, "loss": 0.0266, "step": 25781 }, { "epoch": 3.0572749911063677, "grad_norm": 0.4330951119300781, "learning_rate": 6.936290810123383e-06, "loss": 0.0253, "step": 25782 }, { "epoch": 3.057393572868493, "grad_norm": 0.5591926917613065, "learning_rate": 6.934631474836165e-06, "loss": 0.0259, "step": 25783 }, { "epoch": 3.0575121546306177, "grad_norm": 0.5674151046967072, "learning_rate": 6.932972306089491e-06, "loss": 0.0213, "step": 25784 }, { "epoch": 3.057630736392743, "grad_norm": 0.6008829734802543, "learning_rate": 6.931313303898662e-06, "loss": 0.0214, "step": 25785 }, { "epoch": 3.0577493181548676, "grad_norm": 0.5691489084671257, "learning_rate": 6.929654468278956e-06, "loss": 0.0271, "step": 25786 }, { "epoch": 3.057867899916993, "grad_norm": 0.45693182015021594, "learning_rate": 6.9279957992456905e-06, "loss": 0.0305, "step": 25787 }, { "epoch": 3.0579864816791176, "grad_norm": 0.5430877288018532, "learning_rate": 6.926337296814134e-06, "loss": 0.0296, "step": 25788 }, { "epoch": 3.0581050634412428, "grad_norm": 0.6787097291722429, "learning_rate": 6.9246789609995834e-06, "loss": 0.0451, "step": 25789 }, { "epoch": 3.0582236452033675, "grad_norm": 0.37606153381369456, "learning_rate": 6.923020791817328e-06, "loss": 0.0204, "step": 25790 }, { "epoch": 3.0583422269654927, "grad_norm": 0.8022909081912585, "learning_rate": 6.921362789282654e-06, "loss": 0.0461, "step": 25791 }, { "epoch": 3.0584608087276175, "grad_norm": 0.47664877278163165, "learning_rate": 6.919704953410852e-06, "loss": 0.0249, "step": 25792 }, { "epoch": 3.0585793904897427, "grad_norm": 0.4774977809235436, "learning_rate": 6.918047284217194e-06, "loss": 0.0264, "step": 25793 }, { "epoch": 3.0586979722518675, "grad_norm": 0.32735948785500757, "learning_rate": 6.916389781716964e-06, "loss": 0.0159, "step": 25794 }, { "epoch": 3.0588165540139927, "grad_norm": 0.7060606687930043, "learning_rate": 6.914732445925445e-06, "loss": 0.0444, "step": 25795 }, { "epoch": 3.0589351357761174, "grad_norm": 0.4268015467240367, "learning_rate": 6.913075276857922e-06, "loss": 0.0209, "step": 25796 }, { "epoch": 3.0590537175382426, "grad_norm": 0.6203091626563271, "learning_rate": 6.911418274529652e-06, "loss": 0.0317, "step": 25797 }, { "epoch": 3.059172299300368, "grad_norm": 0.4838970179708307, "learning_rate": 6.909761438955939e-06, "loss": 0.0246, "step": 25798 }, { "epoch": 3.0592908810624926, "grad_norm": 0.7978607484712558, "learning_rate": 6.908104770152032e-06, "loss": 0.0382, "step": 25799 }, { "epoch": 3.059409462824618, "grad_norm": 0.5507959003195486, "learning_rate": 6.906448268133214e-06, "loss": 0.0246, "step": 25800 }, { "epoch": 3.0595280445867425, "grad_norm": 0.5857655908451135, "learning_rate": 6.904791932914759e-06, "loss": 0.0252, "step": 25801 }, { "epoch": 3.0596466263488677, "grad_norm": 1.1184656339710428, "learning_rate": 6.903135764511928e-06, "loss": 0.0517, "step": 25802 }, { "epoch": 3.0597652081109925, "grad_norm": 0.4865340630688182, "learning_rate": 6.901479762940002e-06, "loss": 0.0235, "step": 25803 }, { "epoch": 3.0598837898731177, "grad_norm": 0.6597280550054939, "learning_rate": 6.899823928214233e-06, "loss": 0.0337, "step": 25804 }, { "epoch": 3.0600023716352425, "grad_norm": 0.4900448895763137, "learning_rate": 6.898168260349888e-06, "loss": 0.0279, "step": 25805 }, { "epoch": 3.0601209533973677, "grad_norm": 0.3485483320855055, "learning_rate": 6.896512759362236e-06, "loss": 0.0182, "step": 25806 }, { "epoch": 3.0602395351594924, "grad_norm": 0.5881307469099415, "learning_rate": 6.894857425266543e-06, "loss": 0.0384, "step": 25807 }, { "epoch": 3.0603581169216176, "grad_norm": 0.40650897000557606, "learning_rate": 6.893202258078057e-06, "loss": 0.0215, "step": 25808 }, { "epoch": 3.0604766986837424, "grad_norm": 0.36216889734036656, "learning_rate": 6.891547257812042e-06, "loss": 0.0183, "step": 25809 }, { "epoch": 3.0605952804458676, "grad_norm": 0.3756832185446683, "learning_rate": 6.889892424483754e-06, "loss": 0.0151, "step": 25810 }, { "epoch": 3.0607138622079924, "grad_norm": 0.42426682647770025, "learning_rate": 6.8882377581084485e-06, "loss": 0.0265, "step": 25811 }, { "epoch": 3.0608324439701176, "grad_norm": 0.4794668841413802, "learning_rate": 6.886583258701382e-06, "loss": 0.0212, "step": 25812 }, { "epoch": 3.0609510257322423, "grad_norm": 0.4846359900405756, "learning_rate": 6.884928926277806e-06, "loss": 0.0255, "step": 25813 }, { "epoch": 3.0610696074943675, "grad_norm": 0.4309565563567072, "learning_rate": 6.883274760852979e-06, "loss": 0.0199, "step": 25814 }, { "epoch": 3.0611881892564923, "grad_norm": 0.5380942434254913, "learning_rate": 6.881620762442134e-06, "loss": 0.034, "step": 25815 }, { "epoch": 3.0613067710186175, "grad_norm": 0.5093015654017277, "learning_rate": 6.879966931060527e-06, "loss": 0.0263, "step": 25816 }, { "epoch": 3.0614253527807422, "grad_norm": 0.42525135528409663, "learning_rate": 6.878313266723407e-06, "loss": 0.0216, "step": 25817 }, { "epoch": 3.0615439345428674, "grad_norm": 0.5517435099452616, "learning_rate": 6.876659769446023e-06, "loss": 0.0252, "step": 25818 }, { "epoch": 3.061662516304992, "grad_norm": 0.9779500253573901, "learning_rate": 6.875006439243603e-06, "loss": 0.0617, "step": 25819 }, { "epoch": 3.0617810980671174, "grad_norm": 0.5819855514862882, "learning_rate": 6.873353276131397e-06, "loss": 0.038, "step": 25820 }, { "epoch": 3.061899679829242, "grad_norm": 0.47633736339051425, "learning_rate": 6.871700280124646e-06, "loss": 0.0191, "step": 25821 }, { "epoch": 3.0620182615913674, "grad_norm": 0.6353145236188867, "learning_rate": 6.8700474512385894e-06, "loss": 0.0329, "step": 25822 }, { "epoch": 3.062136843353492, "grad_norm": 0.42993114419843786, "learning_rate": 6.868394789488469e-06, "loss": 0.0148, "step": 25823 }, { "epoch": 3.0622554251156173, "grad_norm": 0.49620444191909197, "learning_rate": 6.8667422948895e-06, "loss": 0.0255, "step": 25824 }, { "epoch": 3.062374006877742, "grad_norm": 0.7351365217697372, "learning_rate": 6.865089967456945e-06, "loss": 0.0455, "step": 25825 }, { "epoch": 3.0624925886398673, "grad_norm": 0.6639661622594325, "learning_rate": 6.8634378072060135e-06, "loss": 0.0414, "step": 25826 }, { "epoch": 3.062611170401992, "grad_norm": 0.6958009562140369, "learning_rate": 6.861785814151947e-06, "loss": 0.0389, "step": 25827 }, { "epoch": 3.0627297521641172, "grad_norm": 0.6615669799424448, "learning_rate": 6.8601339883099715e-06, "loss": 0.0372, "step": 25828 }, { "epoch": 3.062848333926242, "grad_norm": 0.423052211877331, "learning_rate": 6.858482329695324e-06, "loss": 0.019, "step": 25829 }, { "epoch": 3.062966915688367, "grad_norm": 0.5918559671098086, "learning_rate": 6.856830838323214e-06, "loss": 0.0299, "step": 25830 }, { "epoch": 3.063085497450492, "grad_norm": 0.7177482042210066, "learning_rate": 6.855179514208876e-06, "loss": 0.0307, "step": 25831 }, { "epoch": 3.063204079212617, "grad_norm": 0.7414089321718169, "learning_rate": 6.8535283573675335e-06, "loss": 0.0318, "step": 25832 }, { "epoch": 3.063322660974742, "grad_norm": 0.42536487059598616, "learning_rate": 6.8518773678144045e-06, "loss": 0.0248, "step": 25833 }, { "epoch": 3.063441242736867, "grad_norm": 0.9257222276313929, "learning_rate": 6.8502265455647195e-06, "loss": 0.039, "step": 25834 }, { "epoch": 3.063559824498992, "grad_norm": 0.7426316904198073, "learning_rate": 6.848575890633674e-06, "loss": 0.0296, "step": 25835 }, { "epoch": 3.063678406261117, "grad_norm": 0.5042090417220441, "learning_rate": 6.846925403036517e-06, "loss": 0.0308, "step": 25836 }, { "epoch": 3.063796988023242, "grad_norm": 0.6049909276258166, "learning_rate": 6.845275082788438e-06, "loss": 0.0291, "step": 25837 }, { "epoch": 3.063915569785367, "grad_norm": 0.6169826222956333, "learning_rate": 6.84362492990466e-06, "loss": 0.0295, "step": 25838 }, { "epoch": 3.064034151547492, "grad_norm": 0.5844787027949316, "learning_rate": 6.841974944400395e-06, "loss": 0.0324, "step": 25839 }, { "epoch": 3.064152733309617, "grad_norm": 0.6078471162182267, "learning_rate": 6.840325126290856e-06, "loss": 0.0346, "step": 25840 }, { "epoch": 3.0642713150717418, "grad_norm": 0.5051496136165976, "learning_rate": 6.838675475591256e-06, "loss": 0.026, "step": 25841 }, { "epoch": 3.064389896833867, "grad_norm": 0.5062291150583061, "learning_rate": 6.837025992316784e-06, "loss": 0.0225, "step": 25842 }, { "epoch": 3.0645084785959917, "grad_norm": 0.43092542008669, "learning_rate": 6.835376676482672e-06, "loss": 0.0229, "step": 25843 }, { "epoch": 3.064627060358117, "grad_norm": 0.4431354798462679, "learning_rate": 6.833727528104106e-06, "loss": 0.0218, "step": 25844 }, { "epoch": 3.0647456421202417, "grad_norm": 0.639235035690723, "learning_rate": 6.832078547196302e-06, "loss": 0.0285, "step": 25845 }, { "epoch": 3.064864223882367, "grad_norm": 0.915774658134227, "learning_rate": 6.8304297337744406e-06, "loss": 0.0479, "step": 25846 }, { "epoch": 3.064982805644492, "grad_norm": 0.557791260046144, "learning_rate": 6.828781087853753e-06, "loss": 0.0343, "step": 25847 }, { "epoch": 3.065101387406617, "grad_norm": 0.5048933344371328, "learning_rate": 6.8271326094494105e-06, "loss": 0.0272, "step": 25848 }, { "epoch": 3.0652199691687416, "grad_norm": 0.5339461983290035, "learning_rate": 6.825484298576621e-06, "loss": 0.0315, "step": 25849 }, { "epoch": 3.065338550930867, "grad_norm": 0.48765067927091527, "learning_rate": 6.823836155250579e-06, "loss": 0.0162, "step": 25850 }, { "epoch": 3.065457132692992, "grad_norm": 0.7186481360532898, "learning_rate": 6.822188179486477e-06, "loss": 0.0415, "step": 25851 }, { "epoch": 3.065575714455117, "grad_norm": 0.49809011870022457, "learning_rate": 6.8205403712995195e-06, "loss": 0.0301, "step": 25852 }, { "epoch": 3.065694296217242, "grad_norm": 0.45164947745730993, "learning_rate": 6.818892730704871e-06, "loss": 0.0211, "step": 25853 }, { "epoch": 3.0658128779793667, "grad_norm": 0.5880306453973525, "learning_rate": 6.81724525771775e-06, "loss": 0.0376, "step": 25854 }, { "epoch": 3.065931459741492, "grad_norm": 0.44198185050182615, "learning_rate": 6.815597952353323e-06, "loss": 0.016, "step": 25855 }, { "epoch": 3.0660500415036167, "grad_norm": 0.46574299328846847, "learning_rate": 6.813950814626793e-06, "loss": 0.0264, "step": 25856 }, { "epoch": 3.066168623265742, "grad_norm": 0.49415000395660835, "learning_rate": 6.812303844553319e-06, "loss": 0.0188, "step": 25857 }, { "epoch": 3.0662872050278667, "grad_norm": 0.8737964975475574, "learning_rate": 6.8106570421481135e-06, "loss": 0.0421, "step": 25858 }, { "epoch": 3.066405786789992, "grad_norm": 0.7576365459302427, "learning_rate": 6.809010407426342e-06, "loss": 0.0339, "step": 25859 }, { "epoch": 3.0665243685521166, "grad_norm": 0.5442203235355177, "learning_rate": 6.807363940403183e-06, "loss": 0.0295, "step": 25860 }, { "epoch": 3.066642950314242, "grad_norm": 0.5790749516284069, "learning_rate": 6.80571764109382e-06, "loss": 0.0306, "step": 25861 }, { "epoch": 3.0667615320763666, "grad_norm": 0.604843369059809, "learning_rate": 6.804071509513432e-06, "loss": 0.0368, "step": 25862 }, { "epoch": 3.066880113838492, "grad_norm": 0.3822210337800399, "learning_rate": 6.802425545677196e-06, "loss": 0.0207, "step": 25863 }, { "epoch": 3.0669986956006166, "grad_norm": 0.48186421846021993, "learning_rate": 6.800779749600275e-06, "loss": 0.0303, "step": 25864 }, { "epoch": 3.0671172773627418, "grad_norm": 0.33738222726826406, "learning_rate": 6.799134121297846e-06, "loss": 0.0166, "step": 25865 }, { "epoch": 3.0672358591248665, "grad_norm": 0.4726914493630969, "learning_rate": 6.7974886607850815e-06, "loss": 0.0255, "step": 25866 }, { "epoch": 3.0673544408869917, "grad_norm": 0.425367214463027, "learning_rate": 6.795843368077156e-06, "loss": 0.0215, "step": 25867 }, { "epoch": 3.0674730226491165, "grad_norm": 0.40042809549233377, "learning_rate": 6.79419824318922e-06, "loss": 0.0223, "step": 25868 }, { "epoch": 3.0675916044112417, "grad_norm": 0.5356514329832545, "learning_rate": 6.792553286136463e-06, "loss": 0.029, "step": 25869 }, { "epoch": 3.0677101861733664, "grad_norm": 0.21396593341027106, "learning_rate": 6.790908496934032e-06, "loss": 0.0091, "step": 25870 }, { "epoch": 3.0678287679354916, "grad_norm": 0.5688104277766369, "learning_rate": 6.789263875597094e-06, "loss": 0.034, "step": 25871 }, { "epoch": 3.0679473496976164, "grad_norm": 0.5458570857043821, "learning_rate": 6.787619422140812e-06, "loss": 0.0291, "step": 25872 }, { "epoch": 3.0680659314597416, "grad_norm": 0.32012408333736764, "learning_rate": 6.785975136580344e-06, "loss": 0.0177, "step": 25873 }, { "epoch": 3.0681845132218664, "grad_norm": 0.4727060662961032, "learning_rate": 6.784331018930856e-06, "loss": 0.0193, "step": 25874 }, { "epoch": 3.0683030949839916, "grad_norm": 0.6464418134210175, "learning_rate": 6.782687069207494e-06, "loss": 0.0306, "step": 25875 }, { "epoch": 3.0684216767461163, "grad_norm": 0.5787655379751475, "learning_rate": 6.781043287425418e-06, "loss": 0.0299, "step": 25876 }, { "epoch": 3.0685402585082415, "grad_norm": 0.3658910380590534, "learning_rate": 6.779399673599779e-06, "loss": 0.0161, "step": 25877 }, { "epoch": 3.0686588402703663, "grad_norm": 0.5293957086092779, "learning_rate": 6.777756227745741e-06, "loss": 0.0343, "step": 25878 }, { "epoch": 3.0687774220324915, "grad_norm": 0.5562311714225917, "learning_rate": 6.7761129498784355e-06, "loss": 0.0259, "step": 25879 }, { "epoch": 3.0688960037946162, "grad_norm": 0.6656504298263599, "learning_rate": 6.774469840013023e-06, "loss": 0.0463, "step": 25880 }, { "epoch": 3.0690145855567414, "grad_norm": 0.5021857512383711, "learning_rate": 6.772826898164647e-06, "loss": 0.027, "step": 25881 }, { "epoch": 3.069133167318866, "grad_norm": 0.5302041301119917, "learning_rate": 6.771184124348457e-06, "loss": 0.0294, "step": 25882 }, { "epoch": 3.0692517490809914, "grad_norm": 0.7264072103536825, "learning_rate": 6.769541518579594e-06, "loss": 0.0397, "step": 25883 }, { "epoch": 3.069370330843116, "grad_norm": 0.7163393501755547, "learning_rate": 6.767899080873202e-06, "loss": 0.0369, "step": 25884 }, { "epoch": 3.0694889126052414, "grad_norm": 0.6041071071269902, "learning_rate": 6.766256811244434e-06, "loss": 0.0257, "step": 25885 }, { "epoch": 3.069607494367366, "grad_norm": 0.6160479698774085, "learning_rate": 6.764614709708409e-06, "loss": 0.0327, "step": 25886 }, { "epoch": 3.0697260761294913, "grad_norm": 0.7277326951784558, "learning_rate": 6.7629727762802745e-06, "loss": 0.0395, "step": 25887 }, { "epoch": 3.069844657891616, "grad_norm": 0.5503031520352925, "learning_rate": 6.761331010975167e-06, "loss": 0.0274, "step": 25888 }, { "epoch": 3.0699632396537413, "grad_norm": 0.539842881824107, "learning_rate": 6.7596894138082205e-06, "loss": 0.0335, "step": 25889 }, { "epoch": 3.070081821415866, "grad_norm": 0.3794459420552934, "learning_rate": 6.7580479847945795e-06, "loss": 0.0165, "step": 25890 }, { "epoch": 3.0702004031779913, "grad_norm": 0.433417860380022, "learning_rate": 6.756406723949351e-06, "loss": 0.0214, "step": 25891 }, { "epoch": 3.070318984940116, "grad_norm": 0.4504631558960605, "learning_rate": 6.754765631287696e-06, "loss": 0.018, "step": 25892 }, { "epoch": 3.070437566702241, "grad_norm": 0.45073397316565667, "learning_rate": 6.753124706824721e-06, "loss": 0.0241, "step": 25893 }, { "epoch": 3.070556148464366, "grad_norm": 0.5830727702341724, "learning_rate": 6.751483950575566e-06, "loss": 0.025, "step": 25894 }, { "epoch": 3.070674730226491, "grad_norm": 0.5957239029171869, "learning_rate": 6.749843362555339e-06, "loss": 0.0302, "step": 25895 }, { "epoch": 3.070793311988616, "grad_norm": 0.565179982150426, "learning_rate": 6.748202942779189e-06, "loss": 0.0286, "step": 25896 }, { "epoch": 3.070911893750741, "grad_norm": 0.5249380185163606, "learning_rate": 6.746562691262218e-06, "loss": 0.0287, "step": 25897 }, { "epoch": 3.071030475512866, "grad_norm": 0.6442152819803039, "learning_rate": 6.744922608019557e-06, "loss": 0.0335, "step": 25898 }, { "epoch": 3.071149057274991, "grad_norm": 0.5158825609294077, "learning_rate": 6.743282693066322e-06, "loss": 0.0323, "step": 25899 }, { "epoch": 3.0712676390371163, "grad_norm": 0.7804451032898571, "learning_rate": 6.741642946417634e-06, "loss": 0.039, "step": 25900 }, { "epoch": 3.071386220799241, "grad_norm": 0.331957877931185, "learning_rate": 6.740003368088615e-06, "loss": 0.0155, "step": 25901 }, { "epoch": 3.0715048025613663, "grad_norm": 0.5732318369015555, "learning_rate": 6.73836395809436e-06, "loss": 0.0369, "step": 25902 }, { "epoch": 3.071623384323491, "grad_norm": 0.5571725632372766, "learning_rate": 6.736724716450007e-06, "loss": 0.0229, "step": 25903 }, { "epoch": 3.0717419660856162, "grad_norm": 0.6046886985022194, "learning_rate": 6.735085643170652e-06, "loss": 0.0246, "step": 25904 }, { "epoch": 3.071860547847741, "grad_norm": 0.6789798219781968, "learning_rate": 6.733446738271415e-06, "loss": 0.0268, "step": 25905 }, { "epoch": 3.071979129609866, "grad_norm": 0.4999417015898718, "learning_rate": 6.731808001767384e-06, "loss": 0.0263, "step": 25906 }, { "epoch": 3.072097711371991, "grad_norm": 0.44104058731017104, "learning_rate": 6.730169433673697e-06, "loss": 0.0216, "step": 25907 }, { "epoch": 3.072216293134116, "grad_norm": 0.47351955317250677, "learning_rate": 6.728531034005436e-06, "loss": 0.0209, "step": 25908 }, { "epoch": 3.072334874896241, "grad_norm": 0.5262492090055991, "learning_rate": 6.7268928027777125e-06, "loss": 0.0342, "step": 25909 }, { "epoch": 3.072453456658366, "grad_norm": 0.4591880761273801, "learning_rate": 6.725254740005632e-06, "loss": 0.0322, "step": 25910 }, { "epoch": 3.072572038420491, "grad_norm": 0.6395357646933344, "learning_rate": 6.72361684570429e-06, "loss": 0.0268, "step": 25911 }, { "epoch": 3.072690620182616, "grad_norm": 0.60017315930188, "learning_rate": 6.721979119888796e-06, "loss": 0.0294, "step": 25912 }, { "epoch": 3.072809201944741, "grad_norm": 0.45477161757698537, "learning_rate": 6.720341562574229e-06, "loss": 0.0172, "step": 25913 }, { "epoch": 3.072927783706866, "grad_norm": 0.5052483364994256, "learning_rate": 6.718704173775708e-06, "loss": 0.0276, "step": 25914 }, { "epoch": 3.073046365468991, "grad_norm": 0.5143902325757742, "learning_rate": 6.717066953508311e-06, "loss": 0.0251, "step": 25915 }, { "epoch": 3.073164947231116, "grad_norm": 0.5186563203319153, "learning_rate": 6.715429901787143e-06, "loss": 0.0307, "step": 25916 }, { "epoch": 3.0732835289932408, "grad_norm": 0.5447876411100923, "learning_rate": 6.7137930186272735e-06, "loss": 0.0272, "step": 25917 }, { "epoch": 3.073402110755366, "grad_norm": 0.37968710528348865, "learning_rate": 6.712156304043826e-06, "loss": 0.0221, "step": 25918 }, { "epoch": 3.0735206925174907, "grad_norm": 0.4072395098457812, "learning_rate": 6.7105197580518616e-06, "loss": 0.0271, "step": 25919 }, { "epoch": 3.073639274279616, "grad_norm": 0.5667311751760737, "learning_rate": 6.708883380666478e-06, "loss": 0.0386, "step": 25920 }, { "epoch": 3.0737578560417407, "grad_norm": 0.46432565369581397, "learning_rate": 6.707247171902761e-06, "loss": 0.025, "step": 25921 }, { "epoch": 3.073876437803866, "grad_norm": 0.948709046899295, "learning_rate": 6.705611131775791e-06, "loss": 0.0684, "step": 25922 }, { "epoch": 3.0739950195659906, "grad_norm": 0.7328721994866092, "learning_rate": 6.70397526030066e-06, "loss": 0.0203, "step": 25923 }, { "epoch": 3.074113601328116, "grad_norm": 0.7437448381255534, "learning_rate": 6.702339557492426e-06, "loss": 0.033, "step": 25924 }, { "epoch": 3.0742321830902406, "grad_norm": 0.2928563639643633, "learning_rate": 6.7007040233662e-06, "loss": 0.0158, "step": 25925 }, { "epoch": 3.074350764852366, "grad_norm": 0.4004930120455854, "learning_rate": 6.699068657937033e-06, "loss": 0.0223, "step": 25926 }, { "epoch": 3.0744693466144906, "grad_norm": 0.35397344748753046, "learning_rate": 6.697433461220021e-06, "loss": 0.0194, "step": 25927 }, { "epoch": 3.0745879283766158, "grad_norm": 0.6660547468923519, "learning_rate": 6.69579843323021e-06, "loss": 0.0451, "step": 25928 }, { "epoch": 3.0747065101387405, "grad_norm": 0.3983931657422914, "learning_rate": 6.694163573982709e-06, "loss": 0.0159, "step": 25929 }, { "epoch": 3.0748250919008657, "grad_norm": 0.7248030601621892, "learning_rate": 6.692528883492563e-06, "loss": 0.043, "step": 25930 }, { "epoch": 3.0749436736629905, "grad_norm": 0.29697685819447456, "learning_rate": 6.69089436177485e-06, "loss": 0.0169, "step": 25931 }, { "epoch": 3.0750622554251157, "grad_norm": 0.47128071031494084, "learning_rate": 6.689260008844642e-06, "loss": 0.0245, "step": 25932 }, { "epoch": 3.0751808371872404, "grad_norm": 0.5045354158244515, "learning_rate": 6.687625824716998e-06, "loss": 0.0296, "step": 25933 }, { "epoch": 3.0752994189493656, "grad_norm": 0.47521115053413315, "learning_rate": 6.685991809407e-06, "loss": 0.0267, "step": 25934 }, { "epoch": 3.0754180007114904, "grad_norm": 0.5427210469182296, "learning_rate": 6.684357962929688e-06, "loss": 0.0284, "step": 25935 }, { "epoch": 3.0755365824736156, "grad_norm": 0.4680366271340309, "learning_rate": 6.682724285300138e-06, "loss": 0.0256, "step": 25936 }, { "epoch": 3.0756551642357404, "grad_norm": 0.5073478344346518, "learning_rate": 6.681090776533405e-06, "loss": 0.0185, "step": 25937 }, { "epoch": 3.0757737459978656, "grad_norm": 0.707258533804839, "learning_rate": 6.679457436644562e-06, "loss": 0.0481, "step": 25938 }, { "epoch": 3.0758923277599903, "grad_norm": 0.5655719063524077, "learning_rate": 6.6778242656486384e-06, "loss": 0.0356, "step": 25939 }, { "epoch": 3.0760109095221155, "grad_norm": 0.3338516124342359, "learning_rate": 6.676191263560721e-06, "loss": 0.0197, "step": 25940 }, { "epoch": 3.0761294912842403, "grad_norm": 0.7008272852628471, "learning_rate": 6.6745584303958445e-06, "loss": 0.0385, "step": 25941 }, { "epoch": 3.0762480730463655, "grad_norm": 0.5813366625513711, "learning_rate": 6.672925766169067e-06, "loss": 0.0336, "step": 25942 }, { "epoch": 3.0763666548084903, "grad_norm": 0.33185583763258386, "learning_rate": 6.671293270895437e-06, "loss": 0.0183, "step": 25943 }, { "epoch": 3.0764852365706155, "grad_norm": 0.5645713815139736, "learning_rate": 6.6696609445900114e-06, "loss": 0.0276, "step": 25944 }, { "epoch": 3.07660381833274, "grad_norm": 0.6200641791451986, "learning_rate": 6.6680287872678385e-06, "loss": 0.033, "step": 25945 }, { "epoch": 3.0767224000948654, "grad_norm": 0.5217965316192931, "learning_rate": 6.666396798943952e-06, "loss": 0.0313, "step": 25946 }, { "epoch": 3.07684098185699, "grad_norm": 0.44747791484559546, "learning_rate": 6.664764979633406e-06, "loss": 0.0277, "step": 25947 }, { "epoch": 3.0769595636191154, "grad_norm": 0.605239895008461, "learning_rate": 6.663133329351242e-06, "loss": 0.0334, "step": 25948 }, { "epoch": 3.0770781453812406, "grad_norm": 0.3575997759549617, "learning_rate": 6.661501848112503e-06, "loss": 0.0149, "step": 25949 }, { "epoch": 3.0771967271433653, "grad_norm": 0.5300955645364865, "learning_rate": 6.659870535932236e-06, "loss": 0.0256, "step": 25950 }, { "epoch": 3.0773153089054905, "grad_norm": 0.4404148374999319, "learning_rate": 6.658239392825458e-06, "loss": 0.0251, "step": 25951 }, { "epoch": 3.0774338906676153, "grad_norm": 0.454633415464408, "learning_rate": 6.6566084188072366e-06, "loss": 0.0254, "step": 25952 }, { "epoch": 3.0775524724297405, "grad_norm": 0.4190149943582766, "learning_rate": 6.654977613892582e-06, "loss": 0.0218, "step": 25953 }, { "epoch": 3.0776710541918653, "grad_norm": 0.5675263024464763, "learning_rate": 6.653346978096539e-06, "loss": 0.0234, "step": 25954 }, { "epoch": 3.0777896359539905, "grad_norm": 0.4828041754269332, "learning_rate": 6.651716511434139e-06, "loss": 0.0388, "step": 25955 }, { "epoch": 3.0779082177161152, "grad_norm": 0.7056754517121647, "learning_rate": 6.65008621392042e-06, "loss": 0.03, "step": 25956 }, { "epoch": 3.0780267994782404, "grad_norm": 0.7171374270320224, "learning_rate": 6.648456085570395e-06, "loss": 0.0353, "step": 25957 }, { "epoch": 3.078145381240365, "grad_norm": 0.48563659580290375, "learning_rate": 6.6468261263991e-06, "loss": 0.0208, "step": 25958 }, { "epoch": 3.0782639630024904, "grad_norm": 0.5130931344452003, "learning_rate": 6.645196336421564e-06, "loss": 0.0259, "step": 25959 }, { "epoch": 3.078382544764615, "grad_norm": 0.3749734399854593, "learning_rate": 6.643566715652811e-06, "loss": 0.0243, "step": 25960 }, { "epoch": 3.0785011265267404, "grad_norm": 0.48067106995466385, "learning_rate": 6.641937264107867e-06, "loss": 0.0275, "step": 25961 }, { "epoch": 3.078619708288865, "grad_norm": 0.40427296122172973, "learning_rate": 6.640307981801735e-06, "loss": 0.0216, "step": 25962 }, { "epoch": 3.0787382900509903, "grad_norm": 0.5519668107864291, "learning_rate": 6.638678868749465e-06, "loss": 0.0359, "step": 25963 }, { "epoch": 3.078856871813115, "grad_norm": 0.4789663619443339, "learning_rate": 6.637049924966052e-06, "loss": 0.0277, "step": 25964 }, { "epoch": 3.0789754535752403, "grad_norm": 0.8152120851929396, "learning_rate": 6.635421150466528e-06, "loss": 0.0418, "step": 25965 }, { "epoch": 3.079094035337365, "grad_norm": 0.48097894293493126, "learning_rate": 6.633792545265888e-06, "loss": 0.0213, "step": 25966 }, { "epoch": 3.0792126170994902, "grad_norm": 0.879799524835373, "learning_rate": 6.632164109379172e-06, "loss": 0.0538, "step": 25967 }, { "epoch": 3.079331198861615, "grad_norm": 0.4976087320992294, "learning_rate": 6.63053584282137e-06, "loss": 0.0226, "step": 25968 }, { "epoch": 3.07944978062374, "grad_norm": 0.5124747792340648, "learning_rate": 6.628907745607502e-06, "loss": 0.0335, "step": 25969 }, { "epoch": 3.079568362385865, "grad_norm": 0.6426188538542209, "learning_rate": 6.627279817752577e-06, "loss": 0.0343, "step": 25970 }, { "epoch": 3.07968694414799, "grad_norm": 0.5484701421463061, "learning_rate": 6.6256520592716e-06, "loss": 0.0253, "step": 25971 }, { "epoch": 3.079805525910115, "grad_norm": 0.6540499677653276, "learning_rate": 6.624024470179591e-06, "loss": 0.0323, "step": 25972 }, { "epoch": 3.07992410767224, "grad_norm": 0.5288305571702047, "learning_rate": 6.622397050491522e-06, "loss": 0.017, "step": 25973 }, { "epoch": 3.080042689434365, "grad_norm": 0.4656192330169103, "learning_rate": 6.620769800222434e-06, "loss": 0.0274, "step": 25974 }, { "epoch": 3.08016127119649, "grad_norm": 0.58610074477054, "learning_rate": 6.619142719387303e-06, "loss": 0.0287, "step": 25975 }, { "epoch": 3.080279852958615, "grad_norm": 0.5001379414832273, "learning_rate": 6.617515808001143e-06, "loss": 0.0343, "step": 25976 }, { "epoch": 3.08039843472074, "grad_norm": 0.4225576013570139, "learning_rate": 6.615889066078929e-06, "loss": 0.025, "step": 25977 }, { "epoch": 3.080517016482865, "grad_norm": 0.7017133954642204, "learning_rate": 6.6142624936356895e-06, "loss": 0.031, "step": 25978 }, { "epoch": 3.08063559824499, "grad_norm": 0.3715763857088239, "learning_rate": 6.612636090686397e-06, "loss": 0.0224, "step": 25979 }, { "epoch": 3.0807541800071148, "grad_norm": 0.5764439137282856, "learning_rate": 6.611009857246051e-06, "loss": 0.0282, "step": 25980 }, { "epoch": 3.08087276176924, "grad_norm": 0.534889794221993, "learning_rate": 6.609383793329646e-06, "loss": 0.0375, "step": 25981 }, { "epoch": 3.0809913435313647, "grad_norm": 0.5414781577732999, "learning_rate": 6.607757898952166e-06, "loss": 0.033, "step": 25982 }, { "epoch": 3.08110992529349, "grad_norm": 0.4242410781553771, "learning_rate": 6.606132174128615e-06, "loss": 0.0174, "step": 25983 }, { "epoch": 3.0812285070556147, "grad_norm": 0.7079930818340444, "learning_rate": 6.604506618873954e-06, "loss": 0.0314, "step": 25984 }, { "epoch": 3.08134708881774, "grad_norm": 0.45773454886821563, "learning_rate": 6.602881233203198e-06, "loss": 0.0317, "step": 25985 }, { "epoch": 3.0814656705798646, "grad_norm": 0.7240846407864003, "learning_rate": 6.60125601713131e-06, "loss": 0.0375, "step": 25986 }, { "epoch": 3.08158425234199, "grad_norm": 0.31766103810571283, "learning_rate": 6.599630970673288e-06, "loss": 0.0161, "step": 25987 }, { "epoch": 3.0817028341041146, "grad_norm": 0.3293169853157706, "learning_rate": 6.598006093844086e-06, "loss": 0.0174, "step": 25988 }, { "epoch": 3.08182141586624, "grad_norm": 0.6611321989966826, "learning_rate": 6.596381386658721e-06, "loss": 0.0327, "step": 25989 }, { "epoch": 3.0819399976283646, "grad_norm": 0.48905176181743343, "learning_rate": 6.594756849132142e-06, "loss": 0.0197, "step": 25990 }, { "epoch": 3.0820585793904898, "grad_norm": 0.7123950419147906, "learning_rate": 6.5931324812793345e-06, "loss": 0.0407, "step": 25991 }, { "epoch": 3.0821771611526145, "grad_norm": 0.6289870448804806, "learning_rate": 6.591508283115274e-06, "loss": 0.0377, "step": 25992 }, { "epoch": 3.0822957429147397, "grad_norm": 0.4311515109705671, "learning_rate": 6.589884254654932e-06, "loss": 0.0262, "step": 25993 }, { "epoch": 3.0824143246768645, "grad_norm": 0.8784319018096395, "learning_rate": 6.588260395913293e-06, "loss": 0.051, "step": 25994 }, { "epoch": 3.0825329064389897, "grad_norm": 0.49729826456178705, "learning_rate": 6.586636706905303e-06, "loss": 0.0231, "step": 25995 }, { "epoch": 3.0826514882011145, "grad_norm": 0.717239423927012, "learning_rate": 6.585013187645944e-06, "loss": 0.0459, "step": 25996 }, { "epoch": 3.0827700699632397, "grad_norm": 0.565218427395357, "learning_rate": 6.583389838150181e-06, "loss": 0.0242, "step": 25997 }, { "epoch": 3.082888651725365, "grad_norm": 0.7997870949802006, "learning_rate": 6.581766658432981e-06, "loss": 0.0499, "step": 25998 }, { "epoch": 3.0830072334874896, "grad_norm": 0.4324894770740627, "learning_rate": 6.580143648509307e-06, "loss": 0.0231, "step": 25999 }, { "epoch": 3.083125815249615, "grad_norm": 0.6268098730406804, "learning_rate": 6.57852080839412e-06, "loss": 0.0299, "step": 26000 }, { "epoch": 3.0832443970117396, "grad_norm": 0.4833575874483491, "learning_rate": 6.576898138102386e-06, "loss": 0.0248, "step": 26001 }, { "epoch": 3.083362978773865, "grad_norm": 0.4922517181288967, "learning_rate": 6.575275637649056e-06, "loss": 0.025, "step": 26002 }, { "epoch": 3.0834815605359895, "grad_norm": 0.7073501529210374, "learning_rate": 6.573653307049088e-06, "loss": 0.0293, "step": 26003 }, { "epoch": 3.0836001422981147, "grad_norm": 0.6919452446827044, "learning_rate": 6.572031146317443e-06, "loss": 0.0318, "step": 26004 }, { "epoch": 3.0837187240602395, "grad_norm": 0.6100910706523739, "learning_rate": 6.570409155469076e-06, "loss": 0.0369, "step": 26005 }, { "epoch": 3.0838373058223647, "grad_norm": 0.3088219258351878, "learning_rate": 6.568787334518934e-06, "loss": 0.0161, "step": 26006 }, { "epoch": 3.0839558875844895, "grad_norm": 0.7093426330780543, "learning_rate": 6.567165683481968e-06, "loss": 0.0503, "step": 26007 }, { "epoch": 3.0840744693466147, "grad_norm": 0.6919160496538017, "learning_rate": 6.565544202373133e-06, "loss": 0.0335, "step": 26008 }, { "epoch": 3.0841930511087394, "grad_norm": 0.5167383835095553, "learning_rate": 6.5639228912073715e-06, "loss": 0.0273, "step": 26009 }, { "epoch": 3.0843116328708646, "grad_norm": 0.6214526479294019, "learning_rate": 6.562301749999636e-06, "loss": 0.0261, "step": 26010 }, { "epoch": 3.0844302146329894, "grad_norm": 0.44531902237942017, "learning_rate": 6.560680778764866e-06, "loss": 0.0134, "step": 26011 }, { "epoch": 3.0845487963951146, "grad_norm": 0.4863516764959551, "learning_rate": 6.559059977518017e-06, "loss": 0.0274, "step": 26012 }, { "epoch": 3.0846673781572393, "grad_norm": 0.7104606924491721, "learning_rate": 6.557439346274014e-06, "loss": 0.0281, "step": 26013 }, { "epoch": 3.0847859599193646, "grad_norm": 0.333014670373954, "learning_rate": 6.555818885047804e-06, "loss": 0.0155, "step": 26014 }, { "epoch": 3.0849045416814893, "grad_norm": 0.436665404637552, "learning_rate": 6.554198593854324e-06, "loss": 0.0218, "step": 26015 }, { "epoch": 3.0850231234436145, "grad_norm": 0.3905988278179602, "learning_rate": 6.552578472708523e-06, "loss": 0.0169, "step": 26016 }, { "epoch": 3.0851417052057393, "grad_norm": 0.5594439728415496, "learning_rate": 6.55095852162532e-06, "loss": 0.0328, "step": 26017 }, { "epoch": 3.0852602869678645, "grad_norm": 0.6152464330328824, "learning_rate": 6.549338740619654e-06, "loss": 0.03, "step": 26018 }, { "epoch": 3.0853788687299892, "grad_norm": 0.3864452028102754, "learning_rate": 6.547719129706459e-06, "loss": 0.022, "step": 26019 }, { "epoch": 3.0854974504921144, "grad_norm": 0.5161766484415163, "learning_rate": 6.546099688900667e-06, "loss": 0.0215, "step": 26020 }, { "epoch": 3.085616032254239, "grad_norm": 0.6192341118768159, "learning_rate": 6.544480418217214e-06, "loss": 0.0396, "step": 26021 }, { "epoch": 3.0857346140163644, "grad_norm": 0.4950366101394761, "learning_rate": 6.542861317671003e-06, "loss": 0.0253, "step": 26022 }, { "epoch": 3.085853195778489, "grad_norm": 0.4434046054761947, "learning_rate": 6.541242387276994e-06, "loss": 0.0232, "step": 26023 }, { "epoch": 3.0859717775406144, "grad_norm": 0.7428801192215463, "learning_rate": 6.5396236270500845e-06, "loss": 0.0346, "step": 26024 }, { "epoch": 3.086090359302739, "grad_norm": 0.5405437648916125, "learning_rate": 6.5380050370052095e-06, "loss": 0.0339, "step": 26025 }, { "epoch": 3.0862089410648643, "grad_norm": 0.6394170193230403, "learning_rate": 6.536386617157289e-06, "loss": 0.0447, "step": 26026 }, { "epoch": 3.086327522826989, "grad_norm": 0.4450824035829191, "learning_rate": 6.534768367521249e-06, "loss": 0.0236, "step": 26027 }, { "epoch": 3.0864461045891143, "grad_norm": 0.3928210764250835, "learning_rate": 6.533150288111992e-06, "loss": 0.0123, "step": 26028 }, { "epoch": 3.086564686351239, "grad_norm": 0.7628377051536036, "learning_rate": 6.531532378944446e-06, "loss": 0.0538, "step": 26029 }, { "epoch": 3.0866832681133642, "grad_norm": 0.476141190421601, "learning_rate": 6.529914640033524e-06, "loss": 0.0237, "step": 26030 }, { "epoch": 3.086801849875489, "grad_norm": 0.3992736288780532, "learning_rate": 6.528297071394138e-06, "loss": 0.0224, "step": 26031 }, { "epoch": 3.086920431637614, "grad_norm": 0.39158701755428715, "learning_rate": 6.526679673041211e-06, "loss": 0.02, "step": 26032 }, { "epoch": 3.087039013399739, "grad_norm": 0.4995681483269818, "learning_rate": 6.525062444989627e-06, "loss": 0.0309, "step": 26033 }, { "epoch": 3.087157595161864, "grad_norm": 0.47622242371285595, "learning_rate": 6.523445387254326e-06, "loss": 0.024, "step": 26034 }, { "epoch": 3.087276176923989, "grad_norm": 0.4822889715952465, "learning_rate": 6.521828499850197e-06, "loss": 0.0218, "step": 26035 }, { "epoch": 3.087394758686114, "grad_norm": 0.3947165075191613, "learning_rate": 6.520211782792154e-06, "loss": 0.0249, "step": 26036 }, { "epoch": 3.087513340448239, "grad_norm": 0.7401827631733047, "learning_rate": 6.518595236095084e-06, "loss": 0.0245, "step": 26037 }, { "epoch": 3.087631922210364, "grad_norm": 0.5925232133644974, "learning_rate": 6.516978859773917e-06, "loss": 0.0249, "step": 26038 }, { "epoch": 3.087750503972489, "grad_norm": 0.5927016230377626, "learning_rate": 6.515362653843532e-06, "loss": 0.0317, "step": 26039 }, { "epoch": 3.087869085734614, "grad_norm": 0.53365322631287, "learning_rate": 6.513746618318836e-06, "loss": 0.0293, "step": 26040 }, { "epoch": 3.087987667496739, "grad_norm": 0.4297130706179819, "learning_rate": 6.512130753214726e-06, "loss": 0.0259, "step": 26041 }, { "epoch": 3.088106249258864, "grad_norm": 0.6590294629012833, "learning_rate": 6.510515058546099e-06, "loss": 0.0315, "step": 26042 }, { "epoch": 3.0882248310209888, "grad_norm": 0.3969071013258093, "learning_rate": 6.508899534327858e-06, "loss": 0.0223, "step": 26043 }, { "epoch": 3.088343412783114, "grad_norm": 0.6890257396124768, "learning_rate": 6.507284180574874e-06, "loss": 0.0328, "step": 26044 }, { "epoch": 3.0884619945452387, "grad_norm": 0.39502478958012394, "learning_rate": 6.505668997302067e-06, "loss": 0.0244, "step": 26045 }, { "epoch": 3.088580576307364, "grad_norm": 0.49492961933499946, "learning_rate": 6.504053984524305e-06, "loss": 0.0229, "step": 26046 }, { "epoch": 3.088699158069489, "grad_norm": 0.5379565912596361, "learning_rate": 6.502439142256484e-06, "loss": 0.0267, "step": 26047 }, { "epoch": 3.088817739831614, "grad_norm": 0.4276569010260428, "learning_rate": 6.500824470513492e-06, "loss": 0.0218, "step": 26048 }, { "epoch": 3.0889363215937387, "grad_norm": 0.7430565957769454, "learning_rate": 6.49920996931021e-06, "loss": 0.0306, "step": 26049 }, { "epoch": 3.089054903355864, "grad_norm": 0.40178050499033524, "learning_rate": 6.497595638661535e-06, "loss": 0.0243, "step": 26050 }, { "epoch": 3.089173485117989, "grad_norm": 0.3567049333889741, "learning_rate": 6.49598147858233e-06, "loss": 0.0215, "step": 26051 }, { "epoch": 3.089292066880114, "grad_norm": 0.48087754570985825, "learning_rate": 6.4943674890874885e-06, "loss": 0.025, "step": 26052 }, { "epoch": 3.089410648642239, "grad_norm": 0.46757723379112104, "learning_rate": 6.4927536701918815e-06, "loss": 0.0261, "step": 26053 }, { "epoch": 3.089529230404364, "grad_norm": 0.7440723165720353, "learning_rate": 6.491140021910399e-06, "loss": 0.0297, "step": 26054 }, { "epoch": 3.089647812166489, "grad_norm": 0.4924604747150375, "learning_rate": 6.489526544257893e-06, "loss": 0.0326, "step": 26055 }, { "epoch": 3.0897663939286137, "grad_norm": 0.6617313019587737, "learning_rate": 6.4879132372492714e-06, "loss": 0.0395, "step": 26056 }, { "epoch": 3.089884975690739, "grad_norm": 0.5708943102887274, "learning_rate": 6.486300100899379e-06, "loss": 0.0317, "step": 26057 }, { "epoch": 3.0900035574528637, "grad_norm": 0.5943811059048867, "learning_rate": 6.4846871352230945e-06, "loss": 0.0299, "step": 26058 }, { "epoch": 3.090122139214989, "grad_norm": 0.6235873308204609, "learning_rate": 6.4830743402352925e-06, "loss": 0.0356, "step": 26059 }, { "epoch": 3.0902407209771137, "grad_norm": 0.6517355605667484, "learning_rate": 6.4814617159508375e-06, "loss": 0.0327, "step": 26060 }, { "epoch": 3.090359302739239, "grad_norm": 0.6607459360935296, "learning_rate": 6.479849262384605e-06, "loss": 0.0293, "step": 26061 }, { "epoch": 3.0904778845013636, "grad_norm": 0.34996653675546363, "learning_rate": 6.478236979551441e-06, "loss": 0.0184, "step": 26062 }, { "epoch": 3.090596466263489, "grad_norm": 0.5253416480052093, "learning_rate": 6.476624867466222e-06, "loss": 0.0407, "step": 26063 }, { "epoch": 3.0907150480256136, "grad_norm": 0.46496221764541534, "learning_rate": 6.475012926143806e-06, "loss": 0.024, "step": 26064 }, { "epoch": 3.090833629787739, "grad_norm": 0.6791902826723922, "learning_rate": 6.47340115559906e-06, "loss": 0.0359, "step": 26065 }, { "epoch": 3.0909522115498635, "grad_norm": 0.435439009503598, "learning_rate": 6.47178955584683e-06, "loss": 0.023, "step": 26066 }, { "epoch": 3.0910707933119888, "grad_norm": 0.4099788133572411, "learning_rate": 6.47017812690198e-06, "loss": 0.02, "step": 26067 }, { "epoch": 3.0911893750741135, "grad_norm": 0.46143631459436923, "learning_rate": 6.468566868779366e-06, "loss": 0.0196, "step": 26068 }, { "epoch": 3.0913079568362387, "grad_norm": 0.5754176118179641, "learning_rate": 6.4669557814938385e-06, "loss": 0.0259, "step": 26069 }, { "epoch": 3.0914265385983635, "grad_norm": 0.35391677572652575, "learning_rate": 6.465344865060252e-06, "loss": 0.0165, "step": 26070 }, { "epoch": 3.0915451203604887, "grad_norm": 0.4467724976243358, "learning_rate": 6.463734119493459e-06, "loss": 0.025, "step": 26071 }, { "epoch": 3.0916637021226134, "grad_norm": 0.34964913588438584, "learning_rate": 6.462123544808313e-06, "loss": 0.0184, "step": 26072 }, { "epoch": 3.0917822838847386, "grad_norm": 0.6823587938764629, "learning_rate": 6.460513141019647e-06, "loss": 0.0322, "step": 26073 }, { "epoch": 3.0919008656468634, "grad_norm": 0.74573373887155, "learning_rate": 6.458902908142317e-06, "loss": 0.039, "step": 26074 }, { "epoch": 3.0920194474089886, "grad_norm": 0.40816573136992246, "learning_rate": 6.457292846191165e-06, "loss": 0.0208, "step": 26075 }, { "epoch": 3.0921380291711134, "grad_norm": 0.6587267117361991, "learning_rate": 6.455682955181041e-06, "loss": 0.0362, "step": 26076 }, { "epoch": 3.0922566109332386, "grad_norm": 0.8188778750560314, "learning_rate": 6.454073235126773e-06, "loss": 0.0568, "step": 26077 }, { "epoch": 3.0923751926953633, "grad_norm": 0.652906726667297, "learning_rate": 6.452463686043208e-06, "loss": 0.0324, "step": 26078 }, { "epoch": 3.0924937744574885, "grad_norm": 0.48402914168977423, "learning_rate": 6.450854307945181e-06, "loss": 0.0258, "step": 26079 }, { "epoch": 3.0926123562196133, "grad_norm": 0.8139689697659419, "learning_rate": 6.449245100847534e-06, "loss": 0.0398, "step": 26080 }, { "epoch": 3.0927309379817385, "grad_norm": 0.5386061958055983, "learning_rate": 6.447636064765103e-06, "loss": 0.0278, "step": 26081 }, { "epoch": 3.0928495197438632, "grad_norm": 0.8552742057520043, "learning_rate": 6.4460271997127036e-06, "loss": 0.0411, "step": 26082 }, { "epoch": 3.0929681015059884, "grad_norm": 0.5608867718886952, "learning_rate": 6.444418505705197e-06, "loss": 0.0247, "step": 26083 }, { "epoch": 3.093086683268113, "grad_norm": 0.4739128024999165, "learning_rate": 6.442809982757389e-06, "loss": 0.0252, "step": 26084 }, { "epoch": 3.0932052650302384, "grad_norm": 0.7288042162777812, "learning_rate": 6.4412016308841165e-06, "loss": 0.0348, "step": 26085 }, { "epoch": 3.093323846792363, "grad_norm": 0.5094538540042433, "learning_rate": 6.4395934501002064e-06, "loss": 0.026, "step": 26086 }, { "epoch": 3.0934424285544884, "grad_norm": 0.465996766650947, "learning_rate": 6.437985440420491e-06, "loss": 0.025, "step": 26087 }, { "epoch": 3.093561010316613, "grad_norm": 0.517471552499677, "learning_rate": 6.436377601859784e-06, "loss": 0.0332, "step": 26088 }, { "epoch": 3.0936795920787383, "grad_norm": 0.5555464629530564, "learning_rate": 6.434769934432908e-06, "loss": 0.0272, "step": 26089 }, { "epoch": 3.093798173840863, "grad_norm": 0.5764550458155953, "learning_rate": 6.433162438154686e-06, "loss": 0.0329, "step": 26090 }, { "epoch": 3.0939167556029883, "grad_norm": 0.6175872489853209, "learning_rate": 6.4315551130399415e-06, "loss": 0.0397, "step": 26091 }, { "epoch": 3.094035337365113, "grad_norm": 0.575816710613068, "learning_rate": 6.429947959103494e-06, "loss": 0.0237, "step": 26092 }, { "epoch": 3.0941539191272383, "grad_norm": 0.6492289104129336, "learning_rate": 6.428340976360139e-06, "loss": 0.0293, "step": 26093 }, { "epoch": 3.094272500889363, "grad_norm": 0.6265035535082848, "learning_rate": 6.426734164824722e-06, "loss": 0.0304, "step": 26094 }, { "epoch": 3.094391082651488, "grad_norm": 0.4358127569980879, "learning_rate": 6.4251275245120314e-06, "loss": 0.0214, "step": 26095 }, { "epoch": 3.0945096644136134, "grad_norm": 0.5054357215257627, "learning_rate": 6.423521055436887e-06, "loss": 0.0196, "step": 26096 }, { "epoch": 3.094628246175738, "grad_norm": 0.5745817211277335, "learning_rate": 6.4219147576141e-06, "loss": 0.0319, "step": 26097 }, { "epoch": 3.094746827937863, "grad_norm": 0.3563434163305405, "learning_rate": 6.420308631058477e-06, "loss": 0.0125, "step": 26098 }, { "epoch": 3.094865409699988, "grad_norm": 0.7071026399693929, "learning_rate": 6.4187026757848325e-06, "loss": 0.0319, "step": 26099 }, { "epoch": 3.0949839914621133, "grad_norm": 0.7004558454359763, "learning_rate": 6.417096891807947e-06, "loss": 0.0353, "step": 26100 }, { "epoch": 3.095102573224238, "grad_norm": 0.7292360374990524, "learning_rate": 6.415491279142655e-06, "loss": 0.0413, "step": 26101 }, { "epoch": 3.0952211549863633, "grad_norm": 0.7670420474174356, "learning_rate": 6.413885837803738e-06, "loss": 0.0445, "step": 26102 }, { "epoch": 3.095339736748488, "grad_norm": 0.4713770252526953, "learning_rate": 6.412280567806006e-06, "loss": 0.0271, "step": 26103 }, { "epoch": 3.0954583185106133, "grad_norm": 0.4099736588742806, "learning_rate": 6.41067546916424e-06, "loss": 0.0187, "step": 26104 }, { "epoch": 3.095576900272738, "grad_norm": 0.3314777783927386, "learning_rate": 6.4090705418932665e-06, "loss": 0.0205, "step": 26105 }, { "epoch": 3.095695482034863, "grad_norm": 0.4781650328230959, "learning_rate": 6.407465786007857e-06, "loss": 0.0204, "step": 26106 }, { "epoch": 3.095814063796988, "grad_norm": 0.6459632347641338, "learning_rate": 6.405861201522812e-06, "loss": 0.0515, "step": 26107 }, { "epoch": 3.095932645559113, "grad_norm": 0.6143532582629664, "learning_rate": 6.404256788452928e-06, "loss": 0.0295, "step": 26108 }, { "epoch": 3.096051227321238, "grad_norm": 0.37624946306459617, "learning_rate": 6.40265254681299e-06, "loss": 0.0167, "step": 26109 }, { "epoch": 3.096169809083363, "grad_norm": 0.47942195552259165, "learning_rate": 6.401048476617799e-06, "loss": 0.0311, "step": 26110 }, { "epoch": 3.096288390845488, "grad_norm": 0.46734469051664285, "learning_rate": 6.399444577882116e-06, "loss": 0.0248, "step": 26111 }, { "epoch": 3.096406972607613, "grad_norm": 0.4174875944450662, "learning_rate": 6.397840850620762e-06, "loss": 0.0166, "step": 26112 }, { "epoch": 3.096525554369738, "grad_norm": 0.7021774639070125, "learning_rate": 6.396237294848495e-06, "loss": 0.0368, "step": 26113 }, { "epoch": 3.096644136131863, "grad_norm": 0.5650737757798722, "learning_rate": 6.394633910580117e-06, "loss": 0.0347, "step": 26114 }, { "epoch": 3.096762717893988, "grad_norm": 0.42941695748394354, "learning_rate": 6.39303069783038e-06, "loss": 0.0249, "step": 26115 }, { "epoch": 3.096881299656113, "grad_norm": 0.3815517075895673, "learning_rate": 6.391427656614099e-06, "loss": 0.0227, "step": 26116 }, { "epoch": 3.096999881418238, "grad_norm": 0.42865949722174695, "learning_rate": 6.38982478694603e-06, "loss": 0.0237, "step": 26117 }, { "epoch": 3.097118463180363, "grad_norm": 0.6148091098283479, "learning_rate": 6.3882220888409535e-06, "loss": 0.0352, "step": 26118 }, { "epoch": 3.0972370449424877, "grad_norm": 0.9776604886746922, "learning_rate": 6.386619562313645e-06, "loss": 0.0252, "step": 26119 }, { "epoch": 3.097355626704613, "grad_norm": 0.5186746240473057, "learning_rate": 6.385017207378882e-06, "loss": 0.0289, "step": 26120 }, { "epoch": 3.0974742084667377, "grad_norm": 0.35456380628504547, "learning_rate": 6.383415024051437e-06, "loss": 0.017, "step": 26121 }, { "epoch": 3.097592790228863, "grad_norm": 0.4721376671801722, "learning_rate": 6.381813012346072e-06, "loss": 0.0187, "step": 26122 }, { "epoch": 3.0977113719909877, "grad_norm": 0.500040686280097, "learning_rate": 6.380211172277559e-06, "loss": 0.0206, "step": 26123 }, { "epoch": 3.097829953753113, "grad_norm": 0.5783981179098923, "learning_rate": 6.378609503860664e-06, "loss": 0.0257, "step": 26124 }, { "epoch": 3.0979485355152376, "grad_norm": 0.718243938403343, "learning_rate": 6.3770080071101634e-06, "loss": 0.032, "step": 26125 }, { "epoch": 3.098067117277363, "grad_norm": 0.46007860692600205, "learning_rate": 6.375406682040797e-06, "loss": 0.0259, "step": 26126 }, { "epoch": 3.0981856990394876, "grad_norm": 0.2682857679339162, "learning_rate": 6.3738055286673565e-06, "loss": 0.013, "step": 26127 }, { "epoch": 3.098304280801613, "grad_norm": 0.5876134087215134, "learning_rate": 6.372204547004582e-06, "loss": 0.0393, "step": 26128 }, { "epoch": 3.0984228625637376, "grad_norm": 0.3860610685957886, "learning_rate": 6.370603737067237e-06, "loss": 0.023, "step": 26129 }, { "epoch": 3.0985414443258628, "grad_norm": 0.5742058485515639, "learning_rate": 6.36900309887008e-06, "loss": 0.0201, "step": 26130 }, { "epoch": 3.0986600260879875, "grad_norm": 0.5404164781061723, "learning_rate": 6.3674026324278715e-06, "loss": 0.0353, "step": 26131 }, { "epoch": 3.0987786078501127, "grad_norm": 0.6375048028861412, "learning_rate": 6.365802337755364e-06, "loss": 0.0339, "step": 26132 }, { "epoch": 3.0988971896122375, "grad_norm": 0.4719265436184998, "learning_rate": 6.364202214867304e-06, "loss": 0.0369, "step": 26133 }, { "epoch": 3.0990157713743627, "grad_norm": 0.5649501941399752, "learning_rate": 6.362602263778447e-06, "loss": 0.0301, "step": 26134 }, { "epoch": 3.0991343531364874, "grad_norm": 0.5069996707479983, "learning_rate": 6.361002484503542e-06, "loss": 0.0229, "step": 26135 }, { "epoch": 3.0992529348986126, "grad_norm": 0.4811692753328815, "learning_rate": 6.359402877057344e-06, "loss": 0.0253, "step": 26136 }, { "epoch": 3.0993715166607374, "grad_norm": 0.4527373067829245, "learning_rate": 6.357803441454585e-06, "loss": 0.0214, "step": 26137 }, { "epoch": 3.0994900984228626, "grad_norm": 0.543717208047477, "learning_rate": 6.356204177710021e-06, "loss": 0.0256, "step": 26138 }, { "epoch": 3.0996086801849874, "grad_norm": 0.29689793949334753, "learning_rate": 6.354605085838389e-06, "loss": 0.015, "step": 26139 }, { "epoch": 3.0997272619471126, "grad_norm": 0.4255895091790392, "learning_rate": 6.353006165854433e-06, "loss": 0.0256, "step": 26140 }, { "epoch": 3.0998458437092373, "grad_norm": 0.4924913820572335, "learning_rate": 6.351407417772895e-06, "loss": 0.023, "step": 26141 }, { "epoch": 3.0999644254713625, "grad_norm": 0.2611360846122386, "learning_rate": 6.349808841608512e-06, "loss": 0.0132, "step": 26142 }, { "epoch": 3.1000830072334873, "grad_norm": 0.6231538052656527, "learning_rate": 6.3482104373760285e-06, "loss": 0.0268, "step": 26143 }, { "epoch": 3.1002015889956125, "grad_norm": 0.5581988894052708, "learning_rate": 6.346612205090166e-06, "loss": 0.0343, "step": 26144 }, { "epoch": 3.1003201707577372, "grad_norm": 0.467943269195353, "learning_rate": 6.345014144765665e-06, "loss": 0.0246, "step": 26145 }, { "epoch": 3.1004387525198625, "grad_norm": 0.39490299166866516, "learning_rate": 6.343416256417256e-06, "loss": 0.0216, "step": 26146 }, { "epoch": 3.100557334281987, "grad_norm": 0.40526310950422917, "learning_rate": 6.341818540059672e-06, "loss": 0.0189, "step": 26147 }, { "epoch": 3.1006759160441124, "grad_norm": 0.9096834403711268, "learning_rate": 6.340220995707646e-06, "loss": 0.0494, "step": 26148 }, { "epoch": 3.1007944978062376, "grad_norm": 0.7194759164932298, "learning_rate": 6.338623623375886e-06, "loss": 0.0315, "step": 26149 }, { "epoch": 3.1009130795683624, "grad_norm": 0.9069395760537126, "learning_rate": 6.337026423079148e-06, "loss": 0.0345, "step": 26150 }, { "epoch": 3.1010316613304876, "grad_norm": 0.2881014049473789, "learning_rate": 6.3354293948321345e-06, "loss": 0.0153, "step": 26151 }, { "epoch": 3.1011502430926123, "grad_norm": 0.412915766422724, "learning_rate": 6.333832538649578e-06, "loss": 0.0199, "step": 26152 }, { "epoch": 3.1012688248547375, "grad_norm": 0.7464201383240343, "learning_rate": 6.332235854546184e-06, "loss": 0.0355, "step": 26153 }, { "epoch": 3.1013874066168623, "grad_norm": 0.4052856316928445, "learning_rate": 6.330639342536696e-06, "loss": 0.0219, "step": 26154 }, { "epoch": 3.1015059883789875, "grad_norm": 0.46503804348169514, "learning_rate": 6.329043002635812e-06, "loss": 0.021, "step": 26155 }, { "epoch": 3.1016245701411123, "grad_norm": 0.6772933924026678, "learning_rate": 6.327446834858259e-06, "loss": 0.0301, "step": 26156 }, { "epoch": 3.1017431519032375, "grad_norm": 0.4369348388843227, "learning_rate": 6.3258508392187445e-06, "loss": 0.0206, "step": 26157 }, { "epoch": 3.101861733665362, "grad_norm": 0.4568157068083222, "learning_rate": 6.324255015731986e-06, "loss": 0.0272, "step": 26158 }, { "epoch": 3.1019803154274874, "grad_norm": 0.7929178633046083, "learning_rate": 6.322659364412703e-06, "loss": 0.044, "step": 26159 }, { "epoch": 3.102098897189612, "grad_norm": 0.5468994257523234, "learning_rate": 6.321063885275583e-06, "loss": 0.0265, "step": 26160 }, { "epoch": 3.1022174789517374, "grad_norm": 0.3919872481637659, "learning_rate": 6.319468578335361e-06, "loss": 0.0197, "step": 26161 }, { "epoch": 3.102336060713862, "grad_norm": 0.5259115291583194, "learning_rate": 6.317873443606726e-06, "loss": 0.0293, "step": 26162 }, { "epoch": 3.1024546424759873, "grad_norm": 0.45732702986419904, "learning_rate": 6.316278481104393e-06, "loss": 0.0198, "step": 26163 }, { "epoch": 3.102573224238112, "grad_norm": 0.49014440440116797, "learning_rate": 6.314683690843048e-06, "loss": 0.0294, "step": 26164 }, { "epoch": 3.1026918060002373, "grad_norm": 0.6621076537610031, "learning_rate": 6.313089072837419e-06, "loss": 0.0485, "step": 26165 }, { "epoch": 3.102810387762362, "grad_norm": 0.4798318940313289, "learning_rate": 6.311494627102188e-06, "loss": 0.0167, "step": 26166 }, { "epoch": 3.1029289695244873, "grad_norm": 0.7725896287506294, "learning_rate": 6.309900353652056e-06, "loss": 0.0272, "step": 26167 }, { "epoch": 3.103047551286612, "grad_norm": 0.4707758511473212, "learning_rate": 6.308306252501728e-06, "loss": 0.0237, "step": 26168 }, { "epoch": 3.1031661330487372, "grad_norm": 0.47674213614228134, "learning_rate": 6.3067123236658924e-06, "loss": 0.0214, "step": 26169 }, { "epoch": 3.103284714810862, "grad_norm": 0.4040571008650443, "learning_rate": 6.305118567159254e-06, "loss": 0.015, "step": 26170 }, { "epoch": 3.103403296572987, "grad_norm": 0.5013733217426917, "learning_rate": 6.303524982996481e-06, "loss": 0.0211, "step": 26171 }, { "epoch": 3.103521878335112, "grad_norm": 0.5708457959477325, "learning_rate": 6.3019315711922985e-06, "loss": 0.0297, "step": 26172 }, { "epoch": 3.103640460097237, "grad_norm": 0.4303491387378306, "learning_rate": 6.300338331761368e-06, "loss": 0.0257, "step": 26173 }, { "epoch": 3.103759041859362, "grad_norm": 0.5712810381636688, "learning_rate": 6.298745264718395e-06, "loss": 0.0238, "step": 26174 }, { "epoch": 3.103877623621487, "grad_norm": 0.6337820052975662, "learning_rate": 6.297152370078044e-06, "loss": 0.0252, "step": 26175 }, { "epoch": 3.103996205383612, "grad_norm": 0.3697059540460847, "learning_rate": 6.295559647855026e-06, "loss": 0.023, "step": 26176 }, { "epoch": 3.104114787145737, "grad_norm": 0.3406037064297466, "learning_rate": 6.2939670980640044e-06, "loss": 0.0166, "step": 26177 }, { "epoch": 3.104233368907862, "grad_norm": 0.3465394481968439, "learning_rate": 6.292374720719668e-06, "loss": 0.0151, "step": 26178 }, { "epoch": 3.104351950669987, "grad_norm": 0.5471383595162395, "learning_rate": 6.290782515836693e-06, "loss": 0.0253, "step": 26179 }, { "epoch": 3.104470532432112, "grad_norm": 0.5977196001023531, "learning_rate": 6.2891904834297635e-06, "loss": 0.034, "step": 26180 }, { "epoch": 3.104589114194237, "grad_norm": 0.322087546047683, "learning_rate": 6.287598623513561e-06, "loss": 0.0157, "step": 26181 }, { "epoch": 3.1047076959563618, "grad_norm": 0.515620873152897, "learning_rate": 6.286006936102737e-06, "loss": 0.0241, "step": 26182 }, { "epoch": 3.104826277718487, "grad_norm": 1.105780492860416, "learning_rate": 6.284415421211995e-06, "loss": 0.0373, "step": 26183 }, { "epoch": 3.1049448594806117, "grad_norm": 0.6671472947546507, "learning_rate": 6.282824078855984e-06, "loss": 0.0366, "step": 26184 }, { "epoch": 3.105063441242737, "grad_norm": 0.3657149944095711, "learning_rate": 6.28123290904939e-06, "loss": 0.0148, "step": 26185 }, { "epoch": 3.1051820230048617, "grad_norm": 0.4677163757661089, "learning_rate": 6.279641911806861e-06, "loss": 0.0263, "step": 26186 }, { "epoch": 3.105300604766987, "grad_norm": 0.6634729342814972, "learning_rate": 6.278051087143089e-06, "loss": 0.0358, "step": 26187 }, { "epoch": 3.1054191865291116, "grad_norm": 0.6650557226389416, "learning_rate": 6.2764604350727226e-06, "loss": 0.0269, "step": 26188 }, { "epoch": 3.105537768291237, "grad_norm": 0.5158551401085014, "learning_rate": 6.27486995561043e-06, "loss": 0.0277, "step": 26189 }, { "epoch": 3.1056563500533616, "grad_norm": 0.3636550865915899, "learning_rate": 6.273279648770874e-06, "loss": 0.0126, "step": 26190 }, { "epoch": 3.105774931815487, "grad_norm": 0.548194025131478, "learning_rate": 6.271689514568715e-06, "loss": 0.027, "step": 26191 }, { "epoch": 3.1058935135776116, "grad_norm": 0.7543154328423592, "learning_rate": 6.270099553018621e-06, "loss": 0.0362, "step": 26192 }, { "epoch": 3.1060120953397368, "grad_norm": 0.8585245432987427, "learning_rate": 6.268509764135233e-06, "loss": 0.0384, "step": 26193 }, { "epoch": 3.1061306771018615, "grad_norm": 0.5438979068703628, "learning_rate": 6.266920147933214e-06, "loss": 0.0361, "step": 26194 }, { "epoch": 3.1062492588639867, "grad_norm": 0.47608983359420165, "learning_rate": 6.265330704427219e-06, "loss": 0.0221, "step": 26195 }, { "epoch": 3.1063678406261115, "grad_norm": 0.5851248257257475, "learning_rate": 6.263741433631912e-06, "loss": 0.0334, "step": 26196 }, { "epoch": 3.1064864223882367, "grad_norm": 0.7180837176882194, "learning_rate": 6.2621523355619136e-06, "loss": 0.0403, "step": 26197 }, { "epoch": 3.106605004150362, "grad_norm": 0.5668140545060149, "learning_rate": 6.260563410231909e-06, "loss": 0.0258, "step": 26198 }, { "epoch": 3.1067235859124867, "grad_norm": 0.49279006628514976, "learning_rate": 6.258974657656525e-06, "loss": 0.0239, "step": 26199 }, { "epoch": 3.106842167674612, "grad_norm": 0.3602721722995767, "learning_rate": 6.257386077850411e-06, "loss": 0.017, "step": 26200 }, { "epoch": 3.1069607494367366, "grad_norm": 0.43464324767864015, "learning_rate": 6.255797670828215e-06, "loss": 0.014, "step": 26201 }, { "epoch": 3.107079331198862, "grad_norm": 0.7187527548542603, "learning_rate": 6.2542094366045775e-06, "loss": 0.031, "step": 26202 }, { "epoch": 3.1071979129609866, "grad_norm": 0.5209400554885527, "learning_rate": 6.252621375194148e-06, "loss": 0.0224, "step": 26203 }, { "epoch": 3.1073164947231118, "grad_norm": 0.8959580971168821, "learning_rate": 6.251033486611554e-06, "loss": 0.0462, "step": 26204 }, { "epoch": 3.1074350764852365, "grad_norm": 0.6117897944961294, "learning_rate": 6.249445770871437e-06, "loss": 0.0331, "step": 26205 }, { "epoch": 3.1075536582473617, "grad_norm": 0.4864062189881036, "learning_rate": 6.24785822798844e-06, "loss": 0.0277, "step": 26206 }, { "epoch": 3.1076722400094865, "grad_norm": 0.34375536332310835, "learning_rate": 6.246270857977193e-06, "loss": 0.017, "step": 26207 }, { "epoch": 3.1077908217716117, "grad_norm": 0.5515455651915314, "learning_rate": 6.244683660852341e-06, "loss": 0.024, "step": 26208 }, { "epoch": 3.1079094035337365, "grad_norm": 0.6770005272404406, "learning_rate": 6.24309663662849e-06, "loss": 0.0316, "step": 26209 }, { "epoch": 3.1080279852958617, "grad_norm": 0.8336155106196668, "learning_rate": 6.241509785320298e-06, "loss": 0.0392, "step": 26210 }, { "epoch": 3.1081465670579864, "grad_norm": 0.8887607302091973, "learning_rate": 6.23992310694238e-06, "loss": 0.0313, "step": 26211 }, { "epoch": 3.1082651488201116, "grad_norm": 0.30569133403223414, "learning_rate": 6.238336601509365e-06, "loss": 0.0123, "step": 26212 }, { "epoch": 3.1083837305822364, "grad_norm": 0.6165676113326908, "learning_rate": 6.2367502690358795e-06, "loss": 0.0269, "step": 26213 }, { "epoch": 3.1085023123443616, "grad_norm": 0.6718343721116631, "learning_rate": 6.235164109536554e-06, "loss": 0.0311, "step": 26214 }, { "epoch": 3.1086208941064863, "grad_norm": 0.35416484315472335, "learning_rate": 6.233578123025996e-06, "loss": 0.0208, "step": 26215 }, { "epoch": 3.1087394758686115, "grad_norm": 0.49332196230191394, "learning_rate": 6.231992309518836e-06, "loss": 0.0272, "step": 26216 }, { "epoch": 3.1088580576307363, "grad_norm": 0.5688896801795142, "learning_rate": 6.230406669029693e-06, "loss": 0.0313, "step": 26217 }, { "epoch": 3.1089766393928615, "grad_norm": 0.616522881474446, "learning_rate": 6.228821201573182e-06, "loss": 0.0201, "step": 26218 }, { "epoch": 3.1090952211549863, "grad_norm": 0.63616730972599, "learning_rate": 6.227235907163928e-06, "loss": 0.0365, "step": 26219 }, { "epoch": 3.1092138029171115, "grad_norm": 0.4397305539556511, "learning_rate": 6.225650785816525e-06, "loss": 0.0252, "step": 26220 }, { "epoch": 3.1093323846792362, "grad_norm": 0.46680021410540623, "learning_rate": 6.2240658375456145e-06, "loss": 0.0197, "step": 26221 }, { "epoch": 3.1094509664413614, "grad_norm": 0.568702417637025, "learning_rate": 6.222481062365784e-06, "loss": 0.0341, "step": 26222 }, { "epoch": 3.109569548203486, "grad_norm": 0.502489025253057, "learning_rate": 6.220896460291658e-06, "loss": 0.0126, "step": 26223 }, { "epoch": 3.1096881299656114, "grad_norm": 0.45140662597873343, "learning_rate": 6.219312031337823e-06, "loss": 0.0278, "step": 26224 }, { "epoch": 3.109806711727736, "grad_norm": 0.7083775000050564, "learning_rate": 6.21772777551892e-06, "loss": 0.0288, "step": 26225 }, { "epoch": 3.1099252934898614, "grad_norm": 0.6953384466977269, "learning_rate": 6.216143692849522e-06, "loss": 0.0383, "step": 26226 }, { "epoch": 3.110043875251986, "grad_norm": 0.37717068588820113, "learning_rate": 6.214559783344248e-06, "loss": 0.0302, "step": 26227 }, { "epoch": 3.1101624570141113, "grad_norm": 0.6486995332270011, "learning_rate": 6.212976047017693e-06, "loss": 0.027, "step": 26228 }, { "epoch": 3.110281038776236, "grad_norm": 0.43221895613542033, "learning_rate": 6.211392483884465e-06, "loss": 0.0257, "step": 26229 }, { "epoch": 3.1103996205383613, "grad_norm": 0.4420670967039843, "learning_rate": 6.209809093959162e-06, "loss": 0.0257, "step": 26230 }, { "epoch": 3.110518202300486, "grad_norm": 0.5846597177599898, "learning_rate": 6.208225877256365e-06, "loss": 0.0245, "step": 26231 }, { "epoch": 3.1106367840626112, "grad_norm": 0.4132947153591283, "learning_rate": 6.206642833790697e-06, "loss": 0.0178, "step": 26232 }, { "epoch": 3.110755365824736, "grad_norm": 0.7255629429753506, "learning_rate": 6.205059963576726e-06, "loss": 0.0409, "step": 26233 }, { "epoch": 3.110873947586861, "grad_norm": 0.45664943772320615, "learning_rate": 6.203477266629063e-06, "loss": 0.0309, "step": 26234 }, { "epoch": 3.110992529348986, "grad_norm": 0.4277818756474455, "learning_rate": 6.201894742962272e-06, "loss": 0.0237, "step": 26235 }, { "epoch": 3.111111111111111, "grad_norm": 0.4685305222137804, "learning_rate": 6.200312392590979e-06, "loss": 0.029, "step": 26236 }, { "epoch": 3.111229692873236, "grad_norm": 0.7582771968955652, "learning_rate": 6.198730215529741e-06, "loss": 0.045, "step": 26237 }, { "epoch": 3.111348274635361, "grad_norm": 0.42218489555420885, "learning_rate": 6.197148211793158e-06, "loss": 0.0291, "step": 26238 }, { "epoch": 3.111466856397486, "grad_norm": 0.7115387031444904, "learning_rate": 6.1955663813958095e-06, "loss": 0.0354, "step": 26239 }, { "epoch": 3.111585438159611, "grad_norm": 0.5283888267842933, "learning_rate": 6.193984724352278e-06, "loss": 0.026, "step": 26240 }, { "epoch": 3.111704019921736, "grad_norm": 0.7192516683336835, "learning_rate": 6.1924032406771555e-06, "loss": 0.0283, "step": 26241 }, { "epoch": 3.111822601683861, "grad_norm": 0.7288191672369203, "learning_rate": 6.190821930384993e-06, "loss": 0.0341, "step": 26242 }, { "epoch": 3.111941183445986, "grad_norm": 0.46128648150005486, "learning_rate": 6.189240793490406e-06, "loss": 0.0197, "step": 26243 }, { "epoch": 3.112059765208111, "grad_norm": 0.3410948768404213, "learning_rate": 6.187659830007944e-06, "loss": 0.0182, "step": 26244 }, { "epoch": 3.1121783469702358, "grad_norm": 0.5240195228257548, "learning_rate": 6.1860790399521945e-06, "loss": 0.0246, "step": 26245 }, { "epoch": 3.112296928732361, "grad_norm": 0.4001617304394909, "learning_rate": 6.184498423337712e-06, "loss": 0.0195, "step": 26246 }, { "epoch": 3.112415510494486, "grad_norm": 0.727974674543846, "learning_rate": 6.182917980179095e-06, "loss": 0.0542, "step": 26247 }, { "epoch": 3.112534092256611, "grad_norm": 0.3889790685633258, "learning_rate": 6.181337710490892e-06, "loss": 0.0222, "step": 26248 }, { "epoch": 3.112652674018736, "grad_norm": 0.5382050914405286, "learning_rate": 6.179757614287676e-06, "loss": 0.0286, "step": 26249 }, { "epoch": 3.112771255780861, "grad_norm": 0.5093180945173544, "learning_rate": 6.178177691584019e-06, "loss": 0.0299, "step": 26250 }, { "epoch": 3.112889837542986, "grad_norm": 0.7609135769509627, "learning_rate": 6.1765979423944825e-06, "loss": 0.0493, "step": 26251 }, { "epoch": 3.113008419305111, "grad_norm": 0.5791319427426144, "learning_rate": 6.175018366733637e-06, "loss": 0.0262, "step": 26252 }, { "epoch": 3.113127001067236, "grad_norm": 0.631420043736167, "learning_rate": 6.173438964616021e-06, "loss": 0.0386, "step": 26253 }, { "epoch": 3.113245582829361, "grad_norm": 1.081654847200261, "learning_rate": 6.171859736056229e-06, "loss": 0.052, "step": 26254 }, { "epoch": 3.113364164591486, "grad_norm": 0.3129093976209349, "learning_rate": 6.170280681068793e-06, "loss": 0.0159, "step": 26255 }, { "epoch": 3.1134827463536108, "grad_norm": 0.40100901051509386, "learning_rate": 6.168701799668278e-06, "loss": 0.0219, "step": 26256 }, { "epoch": 3.113601328115736, "grad_norm": 0.6646319086608931, "learning_rate": 6.167123091869239e-06, "loss": 0.0366, "step": 26257 }, { "epoch": 3.1137199098778607, "grad_norm": 0.5138584603742744, "learning_rate": 6.165544557686234e-06, "loss": 0.0265, "step": 26258 }, { "epoch": 3.113838491639986, "grad_norm": 0.4781233901173742, "learning_rate": 6.163966197133817e-06, "loss": 0.0229, "step": 26259 }, { "epoch": 3.1139570734021107, "grad_norm": 0.823658527505835, "learning_rate": 6.1623880102265245e-06, "loss": 0.0495, "step": 26260 }, { "epoch": 3.114075655164236, "grad_norm": 0.37239294454884825, "learning_rate": 6.160809996978917e-06, "loss": 0.0212, "step": 26261 }, { "epoch": 3.1141942369263607, "grad_norm": 0.33186510048859447, "learning_rate": 6.15923215740554e-06, "loss": 0.0179, "step": 26262 }, { "epoch": 3.114312818688486, "grad_norm": 0.7190116307334551, "learning_rate": 6.157654491520942e-06, "loss": 0.0362, "step": 26263 }, { "epoch": 3.1144314004506106, "grad_norm": 0.48536041226647314, "learning_rate": 6.156076999339658e-06, "loss": 0.0295, "step": 26264 }, { "epoch": 3.114549982212736, "grad_norm": 0.6586722441334608, "learning_rate": 6.1544996808762384e-06, "loss": 0.0269, "step": 26265 }, { "epoch": 3.1146685639748606, "grad_norm": 0.3992533423413259, "learning_rate": 6.15292253614522e-06, "loss": 0.0203, "step": 26266 }, { "epoch": 3.114787145736986, "grad_norm": 0.4031307533358063, "learning_rate": 6.151345565161143e-06, "loss": 0.0164, "step": 26267 }, { "epoch": 3.1149057274991105, "grad_norm": 0.4459740798021636, "learning_rate": 6.149768767938546e-06, "loss": 0.0256, "step": 26268 }, { "epoch": 3.1150243092612357, "grad_norm": 0.45403022301777946, "learning_rate": 6.148192144491965e-06, "loss": 0.0298, "step": 26269 }, { "epoch": 3.1151428910233605, "grad_norm": 0.4360993414821474, "learning_rate": 6.1466156948359446e-06, "loss": 0.0283, "step": 26270 }, { "epoch": 3.1152614727854857, "grad_norm": 0.5202946078847035, "learning_rate": 6.1450394189849995e-06, "loss": 0.0324, "step": 26271 }, { "epoch": 3.1153800545476105, "grad_norm": 0.5347746934446267, "learning_rate": 6.143463316953671e-06, "loss": 0.0293, "step": 26272 }, { "epoch": 3.1154986363097357, "grad_norm": 0.545493000013581, "learning_rate": 6.141887388756484e-06, "loss": 0.0315, "step": 26273 }, { "epoch": 3.1156172180718604, "grad_norm": 0.4168115978082355, "learning_rate": 6.14031163440798e-06, "loss": 0.0211, "step": 26274 }, { "epoch": 3.1157357998339856, "grad_norm": 0.7299160151343137, "learning_rate": 6.138736053922667e-06, "loss": 0.0344, "step": 26275 }, { "epoch": 3.1158543815961104, "grad_norm": 0.46450530358740605, "learning_rate": 6.13716064731508e-06, "loss": 0.0297, "step": 26276 }, { "epoch": 3.1159729633582356, "grad_norm": 0.5160852309218061, "learning_rate": 6.1355854145997374e-06, "loss": 0.0216, "step": 26277 }, { "epoch": 3.1160915451203604, "grad_norm": 0.4153077112997744, "learning_rate": 6.134010355791167e-06, "loss": 0.0232, "step": 26278 }, { "epoch": 3.1162101268824856, "grad_norm": 0.4378318319357323, "learning_rate": 6.132435470903891e-06, "loss": 0.0245, "step": 26279 }, { "epoch": 3.1163287086446103, "grad_norm": 0.48532728153617644, "learning_rate": 6.130860759952409e-06, "loss": 0.0274, "step": 26280 }, { "epoch": 3.1164472904067355, "grad_norm": 0.5672209230944391, "learning_rate": 6.129286222951269e-06, "loss": 0.0286, "step": 26281 }, { "epoch": 3.1165658721688603, "grad_norm": 0.5534314541471204, "learning_rate": 6.127711859914959e-06, "loss": 0.023, "step": 26282 }, { "epoch": 3.1166844539309855, "grad_norm": 0.5959560136849181, "learning_rate": 6.126137670858004e-06, "loss": 0.025, "step": 26283 }, { "epoch": 3.1168030356931102, "grad_norm": 0.4647694886206378, "learning_rate": 6.124563655794916e-06, "loss": 0.0237, "step": 26284 }, { "epoch": 3.1169216174552354, "grad_norm": 0.4483179311976623, "learning_rate": 6.1229898147402096e-06, "loss": 0.0201, "step": 26285 }, { "epoch": 3.11704019921736, "grad_norm": 0.38058502336118705, "learning_rate": 6.121416147708384e-06, "loss": 0.0194, "step": 26286 }, { "epoch": 3.1171587809794854, "grad_norm": 0.42267102794530037, "learning_rate": 6.119842654713951e-06, "loss": 0.02, "step": 26287 }, { "epoch": 3.11727736274161, "grad_norm": 0.46499943024481627, "learning_rate": 6.118269335771415e-06, "loss": 0.0285, "step": 26288 }, { "epoch": 3.1173959445037354, "grad_norm": 0.5938377226165401, "learning_rate": 6.116696190895279e-06, "loss": 0.0352, "step": 26289 }, { "epoch": 3.11751452626586, "grad_norm": 0.25061663200722223, "learning_rate": 6.115123220100058e-06, "loss": 0.0138, "step": 26290 }, { "epoch": 3.1176331080279853, "grad_norm": 0.5165507533506261, "learning_rate": 6.11355042340023e-06, "loss": 0.0362, "step": 26291 }, { "epoch": 3.11775168979011, "grad_norm": 0.7371341139299494, "learning_rate": 6.1119778008103175e-06, "loss": 0.0532, "step": 26292 }, { "epoch": 3.1178702715522353, "grad_norm": 0.8152258301619669, "learning_rate": 6.110405352344803e-06, "loss": 0.0438, "step": 26293 }, { "epoch": 3.11798885331436, "grad_norm": 0.7877495621241245, "learning_rate": 6.108833078018195e-06, "loss": 0.0487, "step": 26294 }, { "epoch": 3.1181074350764852, "grad_norm": 0.48826246453853916, "learning_rate": 6.107260977844964e-06, "loss": 0.0248, "step": 26295 }, { "epoch": 3.1182260168386104, "grad_norm": 0.595456657684731, "learning_rate": 6.105689051839633e-06, "loss": 0.0291, "step": 26296 }, { "epoch": 3.118344598600735, "grad_norm": 0.3909451340086784, "learning_rate": 6.104117300016673e-06, "loss": 0.0302, "step": 26297 }, { "epoch": 3.11846318036286, "grad_norm": 0.5127090371223674, "learning_rate": 6.102545722390579e-06, "loss": 0.0273, "step": 26298 }, { "epoch": 3.118581762124985, "grad_norm": 0.43141772523821204, "learning_rate": 6.100974318975841e-06, "loss": 0.0225, "step": 26299 }, { "epoch": 3.1187003438871104, "grad_norm": 0.6828681656112371, "learning_rate": 6.099403089786945e-06, "loss": 0.0356, "step": 26300 }, { "epoch": 3.118818925649235, "grad_norm": 0.5918741701517921, "learning_rate": 6.09783203483838e-06, "loss": 0.0422, "step": 26301 }, { "epoch": 3.1189375074113603, "grad_norm": 0.5052372997534127, "learning_rate": 6.096261154144611e-06, "loss": 0.0282, "step": 26302 }, { "epoch": 3.119056089173485, "grad_norm": 0.4074068395070854, "learning_rate": 6.0946904477201474e-06, "loss": 0.0205, "step": 26303 }, { "epoch": 3.1191746709356103, "grad_norm": 0.45291976869728684, "learning_rate": 6.093119915579448e-06, "loss": 0.0237, "step": 26304 }, { "epoch": 3.119293252697735, "grad_norm": 0.4058642600599523, "learning_rate": 6.0915495577369976e-06, "loss": 0.0216, "step": 26305 }, { "epoch": 3.1194118344598603, "grad_norm": 0.48196768105082105, "learning_rate": 6.0899793742072716e-06, "loss": 0.03, "step": 26306 }, { "epoch": 3.119530416221985, "grad_norm": 0.47160646293205244, "learning_rate": 6.088409365004746e-06, "loss": 0.0262, "step": 26307 }, { "epoch": 3.11964899798411, "grad_norm": 0.5809473993530636, "learning_rate": 6.086839530143907e-06, "loss": 0.0271, "step": 26308 }, { "epoch": 3.119767579746235, "grad_norm": 0.6103514826344902, "learning_rate": 6.085269869639204e-06, "loss": 0.0327, "step": 26309 }, { "epoch": 3.11988616150836, "grad_norm": 0.4650399805277606, "learning_rate": 6.083700383505117e-06, "loss": 0.0198, "step": 26310 }, { "epoch": 3.120004743270485, "grad_norm": 0.515006356325317, "learning_rate": 6.082131071756117e-06, "loss": 0.0284, "step": 26311 }, { "epoch": 3.12012332503261, "grad_norm": 0.5000140479252765, "learning_rate": 6.080561934406678e-06, "loss": 0.0272, "step": 26312 }, { "epoch": 3.120241906794735, "grad_norm": 0.3930970358327909, "learning_rate": 6.078992971471242e-06, "loss": 0.0227, "step": 26313 }, { "epoch": 3.12036048855686, "grad_norm": 0.5412777648050726, "learning_rate": 6.077424182964306e-06, "loss": 0.0313, "step": 26314 }, { "epoch": 3.120479070318985, "grad_norm": 0.5190185716822356, "learning_rate": 6.075855568900304e-06, "loss": 0.0338, "step": 26315 }, { "epoch": 3.12059765208111, "grad_norm": 0.32161030933237994, "learning_rate": 6.074287129293707e-06, "loss": 0.013, "step": 26316 }, { "epoch": 3.120716233843235, "grad_norm": 0.5576879644492603, "learning_rate": 6.072718864158977e-06, "loss": 0.0287, "step": 26317 }, { "epoch": 3.12083481560536, "grad_norm": 0.5006503810964035, "learning_rate": 6.071150773510567e-06, "loss": 0.0288, "step": 26318 }, { "epoch": 3.120953397367485, "grad_norm": 0.507611208082678, "learning_rate": 6.06958285736294e-06, "loss": 0.0295, "step": 26319 }, { "epoch": 3.12107197912961, "grad_norm": 0.46810844406434715, "learning_rate": 6.0680151157305406e-06, "loss": 0.0188, "step": 26320 }, { "epoch": 3.1211905608917347, "grad_norm": 0.4144453655671915, "learning_rate": 6.066447548627827e-06, "loss": 0.0242, "step": 26321 }, { "epoch": 3.12130914265386, "grad_norm": 0.613335183349102, "learning_rate": 6.064880156069247e-06, "loss": 0.0309, "step": 26322 }, { "epoch": 3.1214277244159847, "grad_norm": 0.609281565917231, "learning_rate": 6.0633129380692595e-06, "loss": 0.0369, "step": 26323 }, { "epoch": 3.12154630617811, "grad_norm": 0.4342602315160974, "learning_rate": 6.061745894642298e-06, "loss": 0.0243, "step": 26324 }, { "epoch": 3.1216648879402347, "grad_norm": 0.3951429952582306, "learning_rate": 6.0601790258028175e-06, "loss": 0.0309, "step": 26325 }, { "epoch": 3.12178346970236, "grad_norm": 0.4382296969511378, "learning_rate": 6.058612331565258e-06, "loss": 0.0211, "step": 26326 }, { "epoch": 3.1219020514644846, "grad_norm": 0.4928037609163024, "learning_rate": 6.057045811944067e-06, "loss": 0.0347, "step": 26327 }, { "epoch": 3.12202063322661, "grad_norm": 0.7214476577076309, "learning_rate": 6.055479466953684e-06, "loss": 0.0434, "step": 26328 }, { "epoch": 3.1221392149887346, "grad_norm": 0.6170254593204545, "learning_rate": 6.053913296608551e-06, "loss": 0.0342, "step": 26329 }, { "epoch": 3.12225779675086, "grad_norm": 0.5866828875534099, "learning_rate": 6.052347300923109e-06, "loss": 0.0318, "step": 26330 }, { "epoch": 3.1223763785129846, "grad_norm": 0.48473290033556543, "learning_rate": 6.050781479911783e-06, "loss": 0.0255, "step": 26331 }, { "epoch": 3.1224949602751098, "grad_norm": 0.5607967479719365, "learning_rate": 6.049215833589015e-06, "loss": 0.0265, "step": 26332 }, { "epoch": 3.1226135420372345, "grad_norm": 0.48070067262679655, "learning_rate": 6.047650361969237e-06, "loss": 0.0237, "step": 26333 }, { "epoch": 3.1227321237993597, "grad_norm": 0.4869936767607306, "learning_rate": 6.046085065066889e-06, "loss": 0.03, "step": 26334 }, { "epoch": 3.1228507055614845, "grad_norm": 0.40442386869805635, "learning_rate": 6.044519942896387e-06, "loss": 0.0169, "step": 26335 }, { "epoch": 3.1229692873236097, "grad_norm": 0.9445767047642402, "learning_rate": 6.0429549954721666e-06, "loss": 0.062, "step": 26336 }, { "epoch": 3.1230878690857344, "grad_norm": 0.7502960983523899, "learning_rate": 6.0413902228086545e-06, "loss": 0.0351, "step": 26337 }, { "epoch": 3.1232064508478596, "grad_norm": 0.6056312213643601, "learning_rate": 6.039825624920276e-06, "loss": 0.0344, "step": 26338 }, { "epoch": 3.1233250326099844, "grad_norm": 0.6716946265459287, "learning_rate": 6.038261201821455e-06, "loss": 0.0263, "step": 26339 }, { "epoch": 3.1234436143721096, "grad_norm": 0.5957391892203963, "learning_rate": 6.0366969535266135e-06, "loss": 0.0353, "step": 26340 }, { "epoch": 3.1235621961342344, "grad_norm": 0.44570344106230136, "learning_rate": 6.035132880050179e-06, "loss": 0.0227, "step": 26341 }, { "epoch": 3.1236807778963596, "grad_norm": 0.4075217306155211, "learning_rate": 6.033568981406554e-06, "loss": 0.0239, "step": 26342 }, { "epoch": 3.1237993596584843, "grad_norm": 0.7316214611352528, "learning_rate": 6.0320052576101686e-06, "loss": 0.0393, "step": 26343 }, { "epoch": 3.1239179414206095, "grad_norm": 0.4998675873110259, "learning_rate": 6.03044170867543e-06, "loss": 0.0235, "step": 26344 }, { "epoch": 3.1240365231827343, "grad_norm": 0.8009544908385927, "learning_rate": 6.028878334616767e-06, "loss": 0.0486, "step": 26345 }, { "epoch": 3.1241551049448595, "grad_norm": 0.48724126620947783, "learning_rate": 6.0273151354485745e-06, "loss": 0.0378, "step": 26346 }, { "epoch": 3.1242736867069842, "grad_norm": 0.5946988404712186, "learning_rate": 6.02575211118527e-06, "loss": 0.0394, "step": 26347 }, { "epoch": 3.1243922684691094, "grad_norm": 0.7605585468068908, "learning_rate": 6.024189261841262e-06, "loss": 0.0391, "step": 26348 }, { "epoch": 3.1245108502312346, "grad_norm": 0.4573221715754893, "learning_rate": 6.02262658743096e-06, "loss": 0.0286, "step": 26349 }, { "epoch": 3.1246294319933594, "grad_norm": 0.46191099144268455, "learning_rate": 6.021064087968778e-06, "loss": 0.0206, "step": 26350 }, { "epoch": 3.1247480137554846, "grad_norm": 0.4737641159367521, "learning_rate": 6.019501763469096e-06, "loss": 0.0261, "step": 26351 }, { "epoch": 3.1248665955176094, "grad_norm": 0.6951175687898896, "learning_rate": 6.017939613946347e-06, "loss": 0.041, "step": 26352 }, { "epoch": 3.1249851772797346, "grad_norm": 0.47867307850830426, "learning_rate": 6.016377639414911e-06, "loss": 0.0379, "step": 26353 }, { "epoch": 3.1251037590418593, "grad_norm": 0.5491373313112263, "learning_rate": 6.014815839889193e-06, "loss": 0.0261, "step": 26354 }, { "epoch": 3.1252223408039845, "grad_norm": 0.4554406616685739, "learning_rate": 6.01325421538359e-06, "loss": 0.0249, "step": 26355 }, { "epoch": 3.1253409225661093, "grad_norm": 0.4227603963653256, "learning_rate": 6.011692765912502e-06, "loss": 0.029, "step": 26356 }, { "epoch": 3.1254595043282345, "grad_norm": 0.6049835976340525, "learning_rate": 6.0101314914903315e-06, "loss": 0.0276, "step": 26357 }, { "epoch": 3.1255780860903593, "grad_norm": 0.8690625408649898, "learning_rate": 6.008570392131443e-06, "loss": 0.0407, "step": 26358 }, { "epoch": 3.1256966678524845, "grad_norm": 0.4016350991813743, "learning_rate": 6.0070094678502635e-06, "loss": 0.0175, "step": 26359 }, { "epoch": 3.125815249614609, "grad_norm": 0.6427182839279686, "learning_rate": 6.005448718661158e-06, "loss": 0.0409, "step": 26360 }, { "epoch": 3.1259338313767344, "grad_norm": 0.40102628240276084, "learning_rate": 6.003888144578529e-06, "loss": 0.0213, "step": 26361 }, { "epoch": 3.126052413138859, "grad_norm": 0.5061796594992519, "learning_rate": 6.002327745616743e-06, "loss": 0.0253, "step": 26362 }, { "epoch": 3.1261709949009844, "grad_norm": 0.8603491806947539, "learning_rate": 6.000767521790215e-06, "loss": 0.0383, "step": 26363 }, { "epoch": 3.126289576663109, "grad_norm": 0.49096725115672774, "learning_rate": 5.999207473113302e-06, "loss": 0.0313, "step": 26364 }, { "epoch": 3.1264081584252343, "grad_norm": 0.8236129254157163, "learning_rate": 5.997647599600398e-06, "loss": 0.0479, "step": 26365 }, { "epoch": 3.126526740187359, "grad_norm": 0.5101912292947374, "learning_rate": 5.996087901265879e-06, "loss": 0.0226, "step": 26366 }, { "epoch": 3.1266453219494843, "grad_norm": 0.4943259352041767, "learning_rate": 5.994528378124126e-06, "loss": 0.0305, "step": 26367 }, { "epoch": 3.126763903711609, "grad_norm": 0.371631003130342, "learning_rate": 5.992969030189524e-06, "loss": 0.0265, "step": 26368 }, { "epoch": 3.1268824854737343, "grad_norm": 0.36844481772271553, "learning_rate": 5.9914098574764234e-06, "loss": 0.015, "step": 26369 }, { "epoch": 3.127001067235859, "grad_norm": 0.7779838178229455, "learning_rate": 5.989850859999227e-06, "loss": 0.0487, "step": 26370 }, { "epoch": 3.1271196489979842, "grad_norm": 0.43866744934416524, "learning_rate": 5.988292037772289e-06, "loss": 0.0181, "step": 26371 }, { "epoch": 3.127238230760109, "grad_norm": 0.4595146021177555, "learning_rate": 5.986733390809993e-06, "loss": 0.021, "step": 26372 }, { "epoch": 3.127356812522234, "grad_norm": 0.6045776803566405, "learning_rate": 5.985174919126682e-06, "loss": 0.031, "step": 26373 }, { "epoch": 3.127475394284359, "grad_norm": 0.7736068139302532, "learning_rate": 5.983616622736757e-06, "loss": 0.0347, "step": 26374 }, { "epoch": 3.127593976046484, "grad_norm": 0.7934318659164906, "learning_rate": 5.982058501654561e-06, "loss": 0.0475, "step": 26375 }, { "epoch": 3.127712557808609, "grad_norm": 0.5069609624416779, "learning_rate": 5.9805005558944636e-06, "loss": 0.0245, "step": 26376 }, { "epoch": 3.127831139570734, "grad_norm": 0.7257143168598176, "learning_rate": 5.978942785470826e-06, "loss": 0.0468, "step": 26377 }, { "epoch": 3.127949721332859, "grad_norm": 0.41502856633980395, "learning_rate": 5.977385190398014e-06, "loss": 0.0148, "step": 26378 }, { "epoch": 3.128068303094984, "grad_norm": 0.47333048523842425, "learning_rate": 5.975827770690387e-06, "loss": 0.0214, "step": 26379 }, { "epoch": 3.128186884857109, "grad_norm": 0.4828907615134411, "learning_rate": 5.9742705263622935e-06, "loss": 0.0304, "step": 26380 }, { "epoch": 3.128305466619234, "grad_norm": 0.3927308010571228, "learning_rate": 5.972713457428098e-06, "loss": 0.0198, "step": 26381 }, { "epoch": 3.128424048381359, "grad_norm": 0.4779933849400146, "learning_rate": 5.9711565639021475e-06, "loss": 0.0208, "step": 26382 }, { "epoch": 3.128542630143484, "grad_norm": 0.6865227068016373, "learning_rate": 5.969599845798807e-06, "loss": 0.0405, "step": 26383 }, { "epoch": 3.1286612119056088, "grad_norm": 0.5915808760547434, "learning_rate": 5.9680433031324035e-06, "loss": 0.0298, "step": 26384 }, { "epoch": 3.128779793667734, "grad_norm": 0.48361589072970923, "learning_rate": 5.966486935917321e-06, "loss": 0.0281, "step": 26385 }, { "epoch": 3.1288983754298587, "grad_norm": 0.3941513020193435, "learning_rate": 5.964930744167877e-06, "loss": 0.0193, "step": 26386 }, { "epoch": 3.129016957191984, "grad_norm": 0.3585370821688364, "learning_rate": 5.96337472789843e-06, "loss": 0.0151, "step": 26387 }, { "epoch": 3.1291355389541087, "grad_norm": 0.47346362897325456, "learning_rate": 5.9618188871233244e-06, "loss": 0.0322, "step": 26388 }, { "epoch": 3.129254120716234, "grad_norm": 0.6131846849259461, "learning_rate": 5.9602632218569015e-06, "loss": 0.043, "step": 26389 }, { "epoch": 3.1293727024783586, "grad_norm": 0.7949724378229978, "learning_rate": 5.958707732113513e-06, "loss": 0.0327, "step": 26390 }, { "epoch": 3.129491284240484, "grad_norm": 0.3767653565407148, "learning_rate": 5.95715241790748e-06, "loss": 0.0168, "step": 26391 }, { "epoch": 3.1296098660026086, "grad_norm": 0.2752933865805243, "learning_rate": 5.95559727925315e-06, "loss": 0.0161, "step": 26392 }, { "epoch": 3.129728447764734, "grad_norm": 0.4417689417423417, "learning_rate": 5.954042316164862e-06, "loss": 0.0213, "step": 26393 }, { "epoch": 3.129847029526859, "grad_norm": 0.5887560914003729, "learning_rate": 5.952487528656953e-06, "loss": 0.0211, "step": 26394 }, { "epoch": 3.1299656112889838, "grad_norm": 0.493439560677845, "learning_rate": 5.950932916743743e-06, "loss": 0.0361, "step": 26395 }, { "epoch": 3.1300841930511085, "grad_norm": 0.7784391929764075, "learning_rate": 5.9493784804395745e-06, "loss": 0.0364, "step": 26396 }, { "epoch": 3.1302027748132337, "grad_norm": 0.5293853444452674, "learning_rate": 5.947824219758774e-06, "loss": 0.0297, "step": 26397 }, { "epoch": 3.130321356575359, "grad_norm": 0.8962762795334234, "learning_rate": 5.9462701347156704e-06, "loss": 0.0429, "step": 26398 }, { "epoch": 3.1304399383374837, "grad_norm": 0.47727229477196587, "learning_rate": 5.944716225324592e-06, "loss": 0.0216, "step": 26399 }, { "epoch": 3.130558520099609, "grad_norm": 0.32726285505453734, "learning_rate": 5.943162491599863e-06, "loss": 0.016, "step": 26400 }, { "epoch": 3.1306771018617336, "grad_norm": 0.31048007736542066, "learning_rate": 5.941608933555812e-06, "loss": 0.0261, "step": 26401 }, { "epoch": 3.130795683623859, "grad_norm": 0.5514542281103719, "learning_rate": 5.940055551206749e-06, "loss": 0.0303, "step": 26402 }, { "epoch": 3.1309142653859836, "grad_norm": 0.7477712363324263, "learning_rate": 5.938502344567004e-06, "loss": 0.0403, "step": 26403 }, { "epoch": 3.131032847148109, "grad_norm": 0.7823471342572753, "learning_rate": 5.936949313650888e-06, "loss": 0.0357, "step": 26404 }, { "epoch": 3.1311514289102336, "grad_norm": 0.579467259852118, "learning_rate": 5.935396458472734e-06, "loss": 0.029, "step": 26405 }, { "epoch": 3.1312700106723588, "grad_norm": 0.6721024470669525, "learning_rate": 5.933843779046835e-06, "loss": 0.0279, "step": 26406 }, { "epoch": 3.1313885924344835, "grad_norm": 0.5884805015347351, "learning_rate": 5.932291275387519e-06, "loss": 0.0313, "step": 26407 }, { "epoch": 3.1315071741966087, "grad_norm": 0.44325785899973896, "learning_rate": 5.930738947509093e-06, "loss": 0.0218, "step": 26408 }, { "epoch": 3.1316257559587335, "grad_norm": 0.6866505128404966, "learning_rate": 5.9291867954258725e-06, "loss": 0.0385, "step": 26409 }, { "epoch": 3.1317443377208587, "grad_norm": 0.45584616241746495, "learning_rate": 5.9276348191521695e-06, "loss": 0.0203, "step": 26410 }, { "epoch": 3.1318629194829835, "grad_norm": 0.37365316392737813, "learning_rate": 5.926083018702269e-06, "loss": 0.0155, "step": 26411 }, { "epoch": 3.1319815012451087, "grad_norm": 0.5125858513314979, "learning_rate": 5.92453139409051e-06, "loss": 0.0212, "step": 26412 }, { "epoch": 3.1321000830072334, "grad_norm": 0.5250983188841336, "learning_rate": 5.92297994533117e-06, "loss": 0.0282, "step": 26413 }, { "epoch": 3.1322186647693586, "grad_norm": 0.5038561471555543, "learning_rate": 5.9214286724385644e-06, "loss": 0.0233, "step": 26414 }, { "epoch": 3.1323372465314834, "grad_norm": 0.6067244182628121, "learning_rate": 5.919877575426988e-06, "loss": 0.031, "step": 26415 }, { "epoch": 3.1324558282936086, "grad_norm": 0.6994347652606668, "learning_rate": 5.918326654310743e-06, "loss": 0.0286, "step": 26416 }, { "epoch": 3.1325744100557333, "grad_norm": 0.44977341647341645, "learning_rate": 5.9167759091041355e-06, "loss": 0.0212, "step": 26417 }, { "epoch": 3.1326929918178585, "grad_norm": 0.8705929139586636, "learning_rate": 5.915225339821437e-06, "loss": 0.0334, "step": 26418 }, { "epoch": 3.1328115735799833, "grad_norm": 0.6184187365000753, "learning_rate": 5.913674946476971e-06, "loss": 0.0336, "step": 26419 }, { "epoch": 3.1329301553421085, "grad_norm": 0.5119581090748316, "learning_rate": 5.9121247290850115e-06, "loss": 0.0309, "step": 26420 }, { "epoch": 3.1330487371042333, "grad_norm": 0.4235545784500126, "learning_rate": 5.9105746876598615e-06, "loss": 0.02, "step": 26421 }, { "epoch": 3.1331673188663585, "grad_norm": 0.3658310155542368, "learning_rate": 5.909024822215789e-06, "loss": 0.0155, "step": 26422 }, { "epoch": 3.1332859006284832, "grad_norm": 0.47707851159812303, "learning_rate": 5.907475132767113e-06, "loss": 0.0191, "step": 26423 }, { "epoch": 3.1334044823906084, "grad_norm": 0.8203498612815477, "learning_rate": 5.905925619328095e-06, "loss": 0.0481, "step": 26424 }, { "epoch": 3.133523064152733, "grad_norm": 0.7090082445993743, "learning_rate": 5.9043762819130266e-06, "loss": 0.0461, "step": 26425 }, { "epoch": 3.1336416459148584, "grad_norm": 0.5728713166606242, "learning_rate": 5.9028271205361945e-06, "loss": 0.0237, "step": 26426 }, { "epoch": 3.133760227676983, "grad_norm": 0.623498335929264, "learning_rate": 5.9012781352118765e-06, "loss": 0.0403, "step": 26427 }, { "epoch": 3.1338788094391083, "grad_norm": 0.569968443263714, "learning_rate": 5.899729325954362e-06, "loss": 0.0359, "step": 26428 }, { "epoch": 3.133997391201233, "grad_norm": 0.5205932755447436, "learning_rate": 5.898180692777908e-06, "loss": 0.0283, "step": 26429 }, { "epoch": 3.1341159729633583, "grad_norm": 0.46263736641812175, "learning_rate": 5.896632235696814e-06, "loss": 0.0223, "step": 26430 }, { "epoch": 3.134234554725483, "grad_norm": 0.5598539815368102, "learning_rate": 5.895083954725339e-06, "loss": 0.0237, "step": 26431 }, { "epoch": 3.1343531364876083, "grad_norm": 0.5680516967838447, "learning_rate": 5.893535849877771e-06, "loss": 0.0232, "step": 26432 }, { "epoch": 3.134471718249733, "grad_norm": 0.5254584326820175, "learning_rate": 5.891987921168357e-06, "loss": 0.0215, "step": 26433 }, { "epoch": 3.1345903000118582, "grad_norm": 0.6897241536801275, "learning_rate": 5.890440168611397e-06, "loss": 0.0289, "step": 26434 }, { "epoch": 3.134708881773983, "grad_norm": 0.4470248122040568, "learning_rate": 5.888892592221137e-06, "loss": 0.0198, "step": 26435 }, { "epoch": 3.134827463536108, "grad_norm": 0.32402684436571594, "learning_rate": 5.887345192011853e-06, "loss": 0.018, "step": 26436 }, { "epoch": 3.134946045298233, "grad_norm": 0.6100102390395041, "learning_rate": 5.885797967997808e-06, "loss": 0.0408, "step": 26437 }, { "epoch": 3.135064627060358, "grad_norm": 0.5965415193464118, "learning_rate": 5.884250920193265e-06, "loss": 0.0282, "step": 26438 }, { "epoch": 3.135183208822483, "grad_norm": 0.4431857966213763, "learning_rate": 5.882704048612497e-06, "loss": 0.0168, "step": 26439 }, { "epoch": 3.135301790584608, "grad_norm": 0.799073517910963, "learning_rate": 5.88115735326974e-06, "loss": 0.0472, "step": 26440 }, { "epoch": 3.135420372346733, "grad_norm": 0.4816932517660804, "learning_rate": 5.87961083417928e-06, "loss": 0.0241, "step": 26441 }, { "epoch": 3.135538954108858, "grad_norm": 0.2928295585116497, "learning_rate": 5.878064491355354e-06, "loss": 0.0122, "step": 26442 }, { "epoch": 3.135657535870983, "grad_norm": 0.3198009853728703, "learning_rate": 5.876518324812233e-06, "loss": 0.014, "step": 26443 }, { "epoch": 3.135776117633108, "grad_norm": 0.559712962574276, "learning_rate": 5.874972334564146e-06, "loss": 0.025, "step": 26444 }, { "epoch": 3.135894699395233, "grad_norm": 0.6365757290384054, "learning_rate": 5.873426520625377e-06, "loss": 0.0305, "step": 26445 }, { "epoch": 3.136013281157358, "grad_norm": 0.5776621995043156, "learning_rate": 5.871880883010156e-06, "loss": 0.0326, "step": 26446 }, { "epoch": 3.136131862919483, "grad_norm": 0.25001734108606954, "learning_rate": 5.8703354217327335e-06, "loss": 0.0164, "step": 26447 }, { "epoch": 3.136250444681608, "grad_norm": 0.8194171706129216, "learning_rate": 5.8687901368073616e-06, "loss": 0.0364, "step": 26448 }, { "epoch": 3.1363690264437327, "grad_norm": 0.30881677104782224, "learning_rate": 5.867245028248283e-06, "loss": 0.0127, "step": 26449 }, { "epoch": 3.136487608205858, "grad_norm": 0.522278803746144, "learning_rate": 5.865700096069751e-06, "loss": 0.0286, "step": 26450 }, { "epoch": 3.136606189967983, "grad_norm": 0.4973938982393696, "learning_rate": 5.864155340285993e-06, "loss": 0.0252, "step": 26451 }, { "epoch": 3.136724771730108, "grad_norm": 0.5151207150795732, "learning_rate": 5.862610760911258e-06, "loss": 0.0249, "step": 26452 }, { "epoch": 3.136843353492233, "grad_norm": 0.5230333977043645, "learning_rate": 5.861066357959783e-06, "loss": 0.0225, "step": 26453 }, { "epoch": 3.136961935254358, "grad_norm": 0.5236756429436434, "learning_rate": 5.859522131445813e-06, "loss": 0.0248, "step": 26454 }, { "epoch": 3.137080517016483, "grad_norm": 0.43740192076927503, "learning_rate": 5.857978081383564e-06, "loss": 0.0172, "step": 26455 }, { "epoch": 3.137199098778608, "grad_norm": 0.802520009947024, "learning_rate": 5.856434207787296e-06, "loss": 0.0401, "step": 26456 }, { "epoch": 3.137317680540733, "grad_norm": 0.5800233754308818, "learning_rate": 5.854890510671224e-06, "loss": 0.0212, "step": 26457 }, { "epoch": 3.1374362623028578, "grad_norm": 0.35403084677039753, "learning_rate": 5.853346990049583e-06, "loss": 0.0134, "step": 26458 }, { "epoch": 3.137554844064983, "grad_norm": 0.5606625572563435, "learning_rate": 5.851803645936604e-06, "loss": 0.0295, "step": 26459 }, { "epoch": 3.1376734258271077, "grad_norm": 0.5941022795162523, "learning_rate": 5.850260478346514e-06, "loss": 0.0332, "step": 26460 }, { "epoch": 3.137792007589233, "grad_norm": 0.5123755014698302, "learning_rate": 5.848717487293548e-06, "loss": 0.032, "step": 26461 }, { "epoch": 3.1379105893513577, "grad_norm": 0.9467003144816032, "learning_rate": 5.847174672791913e-06, "loss": 0.0507, "step": 26462 }, { "epoch": 3.138029171113483, "grad_norm": 0.3678269012625654, "learning_rate": 5.845632034855844e-06, "loss": 0.0184, "step": 26463 }, { "epoch": 3.1381477528756077, "grad_norm": 0.38417938121235634, "learning_rate": 5.844089573499556e-06, "loss": 0.0169, "step": 26464 }, { "epoch": 3.138266334637733, "grad_norm": 0.46682660694710315, "learning_rate": 5.842547288737271e-06, "loss": 0.0244, "step": 26465 }, { "epoch": 3.1383849163998576, "grad_norm": 0.49705350705706053, "learning_rate": 5.841005180583217e-06, "loss": 0.0271, "step": 26466 }, { "epoch": 3.138503498161983, "grad_norm": 0.49426167298939766, "learning_rate": 5.839463249051586e-06, "loss": 0.0188, "step": 26467 }, { "epoch": 3.1386220799241076, "grad_norm": 0.9147184875531823, "learning_rate": 5.8379214941566205e-06, "loss": 0.0395, "step": 26468 }, { "epoch": 3.138740661686233, "grad_norm": 0.5753097905519559, "learning_rate": 5.836379915912515e-06, "loss": 0.0268, "step": 26469 }, { "epoch": 3.1388592434483575, "grad_norm": 0.36535928606265883, "learning_rate": 5.834838514333488e-06, "loss": 0.021, "step": 26470 }, { "epoch": 3.1389778252104827, "grad_norm": 0.6847106876046791, "learning_rate": 5.833297289433745e-06, "loss": 0.0288, "step": 26471 }, { "epoch": 3.1390964069726075, "grad_norm": 0.7769879549766886, "learning_rate": 5.831756241227507e-06, "loss": 0.0439, "step": 26472 }, { "epoch": 3.1392149887347327, "grad_norm": 0.3987841890029213, "learning_rate": 5.8302153697289615e-06, "loss": 0.016, "step": 26473 }, { "epoch": 3.1393335704968575, "grad_norm": 0.3551076401751938, "learning_rate": 5.8286746749523226e-06, "loss": 0.0264, "step": 26474 }, { "epoch": 3.1394521522589827, "grad_norm": 0.7492744707264353, "learning_rate": 5.827134156911796e-06, "loss": 0.0388, "step": 26475 }, { "epoch": 3.1395707340211074, "grad_norm": 0.7684243553827571, "learning_rate": 5.82559381562158e-06, "loss": 0.0371, "step": 26476 }, { "epoch": 3.1396893157832326, "grad_norm": 0.34510580256249623, "learning_rate": 5.824053651095881e-06, "loss": 0.0178, "step": 26477 }, { "epoch": 3.1398078975453574, "grad_norm": 0.5870244371204735, "learning_rate": 5.82251366334888e-06, "loss": 0.0356, "step": 26478 }, { "epoch": 3.1399264793074826, "grad_norm": 0.7246354449158723, "learning_rate": 5.820973852394801e-06, "loss": 0.0337, "step": 26479 }, { "epoch": 3.1400450610696073, "grad_norm": 0.5408743572799807, "learning_rate": 5.8194342182478155e-06, "loss": 0.0259, "step": 26480 }, { "epoch": 3.1401636428317325, "grad_norm": 1.083729733510719, "learning_rate": 5.817894760922135e-06, "loss": 0.0508, "step": 26481 }, { "epoch": 3.1402822245938573, "grad_norm": 0.5086976505850244, "learning_rate": 5.816355480431926e-06, "loss": 0.0279, "step": 26482 }, { "epoch": 3.1404008063559825, "grad_norm": 0.6513428722790767, "learning_rate": 5.81481637679141e-06, "loss": 0.0257, "step": 26483 }, { "epoch": 3.1405193881181073, "grad_norm": 0.524775943595944, "learning_rate": 5.813277450014754e-06, "loss": 0.0268, "step": 26484 }, { "epoch": 3.1406379698802325, "grad_norm": 0.5549800699734857, "learning_rate": 5.811738700116151e-06, "loss": 0.0334, "step": 26485 }, { "epoch": 3.1407565516423572, "grad_norm": 0.5536522248793825, "learning_rate": 5.810200127109786e-06, "loss": 0.0363, "step": 26486 }, { "epoch": 3.1408751334044824, "grad_norm": 0.8982560559618565, "learning_rate": 5.808661731009843e-06, "loss": 0.0623, "step": 26487 }, { "epoch": 3.140993715166607, "grad_norm": 1.006771120041674, "learning_rate": 5.807123511830514e-06, "loss": 0.0607, "step": 26488 }, { "epoch": 3.1411122969287324, "grad_norm": 0.6439602654526553, "learning_rate": 5.805585469585956e-06, "loss": 0.0279, "step": 26489 }, { "epoch": 3.141230878690857, "grad_norm": 0.5232060820862042, "learning_rate": 5.8040476042903774e-06, "loss": 0.028, "step": 26490 }, { "epoch": 3.1413494604529824, "grad_norm": 0.49938830392681344, "learning_rate": 5.802509915957932e-06, "loss": 0.0291, "step": 26491 }, { "epoch": 3.141468042215107, "grad_norm": 0.5155873274323398, "learning_rate": 5.800972404602808e-06, "loss": 0.0204, "step": 26492 }, { "epoch": 3.1415866239772323, "grad_norm": 0.5754756773878571, "learning_rate": 5.799435070239165e-06, "loss": 0.0275, "step": 26493 }, { "epoch": 3.141705205739357, "grad_norm": 0.4611119720889473, "learning_rate": 5.797897912881198e-06, "loss": 0.0218, "step": 26494 }, { "epoch": 3.1418237875014823, "grad_norm": 0.4184032661806354, "learning_rate": 5.796360932543058e-06, "loss": 0.0184, "step": 26495 }, { "epoch": 3.1419423692636075, "grad_norm": 0.5644947445114421, "learning_rate": 5.794824129238921e-06, "loss": 0.0223, "step": 26496 }, { "epoch": 3.1420609510257322, "grad_norm": 0.4977074214246076, "learning_rate": 5.793287502982955e-06, "loss": 0.0305, "step": 26497 }, { "epoch": 3.142179532787857, "grad_norm": 0.3833348030377763, "learning_rate": 5.791751053789324e-06, "loss": 0.0219, "step": 26498 }, { "epoch": 3.142298114549982, "grad_norm": 0.492182054527954, "learning_rate": 5.790214781672201e-06, "loss": 0.0196, "step": 26499 }, { "epoch": 3.1424166963121074, "grad_norm": 0.6002204341957319, "learning_rate": 5.788678686645729e-06, "loss": 0.0207, "step": 26500 }, { "epoch": 3.142535278074232, "grad_norm": 0.6327196377616867, "learning_rate": 5.787142768724094e-06, "loss": 0.0345, "step": 26501 }, { "epoch": 3.1426538598363574, "grad_norm": 0.5646726849599687, "learning_rate": 5.785607027921436e-06, "loss": 0.0267, "step": 26502 }, { "epoch": 3.142772441598482, "grad_norm": 0.5522083773191634, "learning_rate": 5.7840714642519254e-06, "loss": 0.0258, "step": 26503 }, { "epoch": 3.1428910233606073, "grad_norm": 0.44312626503577884, "learning_rate": 5.782536077729697e-06, "loss": 0.0211, "step": 26504 }, { "epoch": 3.143009605122732, "grad_norm": 0.5922276562849648, "learning_rate": 5.781000868368932e-06, "loss": 0.0324, "step": 26505 }, { "epoch": 3.1431281868848573, "grad_norm": 0.5774722123487237, "learning_rate": 5.779465836183768e-06, "loss": 0.0331, "step": 26506 }, { "epoch": 3.143246768646982, "grad_norm": 0.5334660386945912, "learning_rate": 5.777930981188356e-06, "loss": 0.0198, "step": 26507 }, { "epoch": 3.1433653504091073, "grad_norm": 0.4237401786488765, "learning_rate": 5.776396303396853e-06, "loss": 0.0219, "step": 26508 }, { "epoch": 3.143483932171232, "grad_norm": 0.3782750539849547, "learning_rate": 5.774861802823398e-06, "loss": 0.016, "step": 26509 }, { "epoch": 3.143602513933357, "grad_norm": 0.5964203984234823, "learning_rate": 5.773327479482152e-06, "loss": 0.0213, "step": 26510 }, { "epoch": 3.143721095695482, "grad_norm": 0.4701924915399486, "learning_rate": 5.771793333387232e-06, "loss": 0.0228, "step": 26511 }, { "epoch": 3.143839677457607, "grad_norm": 0.6454118133131835, "learning_rate": 5.770259364552816e-06, "loss": 0.0273, "step": 26512 }, { "epoch": 3.143958259219732, "grad_norm": 0.552411031097624, "learning_rate": 5.7687255729930194e-06, "loss": 0.0273, "step": 26513 }, { "epoch": 3.144076840981857, "grad_norm": 0.524483328863811, "learning_rate": 5.7671919587219875e-06, "loss": 0.0181, "step": 26514 }, { "epoch": 3.144195422743982, "grad_norm": 0.4530422848865083, "learning_rate": 5.765658521753864e-06, "loss": 0.028, "step": 26515 }, { "epoch": 3.144314004506107, "grad_norm": 0.5909972867375151, "learning_rate": 5.764125262102782e-06, "loss": 0.024, "step": 26516 }, { "epoch": 3.144432586268232, "grad_norm": 0.6490524107886798, "learning_rate": 5.762592179782883e-06, "loss": 0.0244, "step": 26517 }, { "epoch": 3.144551168030357, "grad_norm": 0.6284204537498008, "learning_rate": 5.761059274808286e-06, "loss": 0.0336, "step": 26518 }, { "epoch": 3.144669749792482, "grad_norm": 0.60442366777456, "learning_rate": 5.759526547193131e-06, "loss": 0.027, "step": 26519 }, { "epoch": 3.144788331554607, "grad_norm": 0.4308110041276725, "learning_rate": 5.757993996951547e-06, "loss": 0.0206, "step": 26520 }, { "epoch": 3.144906913316732, "grad_norm": 0.32672195390668596, "learning_rate": 5.756461624097667e-06, "loss": 0.0165, "step": 26521 }, { "epoch": 3.145025495078857, "grad_norm": 0.6177497124058408, "learning_rate": 5.754929428645609e-06, "loss": 0.0263, "step": 26522 }, { "epoch": 3.1451440768409817, "grad_norm": 0.3755258770553064, "learning_rate": 5.753397410609498e-06, "loss": 0.0244, "step": 26523 }, { "epoch": 3.145262658603107, "grad_norm": 0.5002124135303487, "learning_rate": 5.7518655700034646e-06, "loss": 0.0322, "step": 26524 }, { "epoch": 3.1453812403652317, "grad_norm": 0.7858284166251983, "learning_rate": 5.750333906841626e-06, "loss": 0.0299, "step": 26525 }, { "epoch": 3.145499822127357, "grad_norm": 0.7236651107708738, "learning_rate": 5.748802421138103e-06, "loss": 0.0434, "step": 26526 }, { "epoch": 3.1456184038894817, "grad_norm": 0.954658505183258, "learning_rate": 5.747271112907016e-06, "loss": 0.0385, "step": 26527 }, { "epoch": 3.145736985651607, "grad_norm": 0.6379611975864471, "learning_rate": 5.745739982162485e-06, "loss": 0.0386, "step": 26528 }, { "epoch": 3.1458555674137316, "grad_norm": 0.6190511002061756, "learning_rate": 5.7442090289186124e-06, "loss": 0.026, "step": 26529 }, { "epoch": 3.145974149175857, "grad_norm": 0.6827590386808182, "learning_rate": 5.742678253189521e-06, "loss": 0.0371, "step": 26530 }, { "epoch": 3.1460927309379816, "grad_norm": 0.45200367410830866, "learning_rate": 5.74114765498932e-06, "loss": 0.0234, "step": 26531 }, { "epoch": 3.146211312700107, "grad_norm": 0.6796563803710274, "learning_rate": 5.7396172343321315e-06, "loss": 0.0277, "step": 26532 }, { "epoch": 3.1463298944622315, "grad_norm": 0.46064238859130574, "learning_rate": 5.738086991232045e-06, "loss": 0.0172, "step": 26533 }, { "epoch": 3.1464484762243567, "grad_norm": 0.5218751141072743, "learning_rate": 5.7365569257031735e-06, "loss": 0.0235, "step": 26534 }, { "epoch": 3.1465670579864815, "grad_norm": 0.4013512672705241, "learning_rate": 5.7350270377596275e-06, "loss": 0.0183, "step": 26535 }, { "epoch": 3.1466856397486067, "grad_norm": 0.6946502655787471, "learning_rate": 5.733497327415505e-06, "loss": 0.0279, "step": 26536 }, { "epoch": 3.1468042215107315, "grad_norm": 0.5394938520723539, "learning_rate": 5.7319677946849195e-06, "loss": 0.0304, "step": 26537 }, { "epoch": 3.1469228032728567, "grad_norm": 0.7056571504280278, "learning_rate": 5.730438439581948e-06, "loss": 0.0322, "step": 26538 }, { "epoch": 3.1470413850349814, "grad_norm": 0.48864677955113806, "learning_rate": 5.728909262120721e-06, "loss": 0.0153, "step": 26539 }, { "epoch": 3.1471599667971066, "grad_norm": 0.6448594654091826, "learning_rate": 5.72738026231531e-06, "loss": 0.0288, "step": 26540 }, { "epoch": 3.1472785485592314, "grad_norm": 0.40891234158686096, "learning_rate": 5.725851440179819e-06, "loss": 0.0191, "step": 26541 }, { "epoch": 3.1473971303213566, "grad_norm": 0.3742260613568129, "learning_rate": 5.724322795728343e-06, "loss": 0.0183, "step": 26542 }, { "epoch": 3.1475157120834814, "grad_norm": 0.4555088845215266, "learning_rate": 5.722794328974978e-06, "loss": 0.0232, "step": 26543 }, { "epoch": 3.1476342938456066, "grad_norm": 1.0298972948721947, "learning_rate": 5.721266039933806e-06, "loss": 0.0526, "step": 26544 }, { "epoch": 3.1477528756077318, "grad_norm": 0.34464262379438, "learning_rate": 5.719737928618918e-06, "loss": 0.0152, "step": 26545 }, { "epoch": 3.1478714573698565, "grad_norm": 0.31128886138342626, "learning_rate": 5.7182099950444045e-06, "loss": 0.0209, "step": 26546 }, { "epoch": 3.1479900391319813, "grad_norm": 0.6376810950066052, "learning_rate": 5.71668223922435e-06, "loss": 0.0441, "step": 26547 }, { "epoch": 3.1481086208941065, "grad_norm": 0.6372475864418388, "learning_rate": 5.715154661172844e-06, "loss": 0.0299, "step": 26548 }, { "epoch": 3.1482272026562317, "grad_norm": 0.7366146216610789, "learning_rate": 5.713627260903954e-06, "loss": 0.0431, "step": 26549 }, { "epoch": 3.1483457844183564, "grad_norm": 0.3576252486601014, "learning_rate": 5.712100038431778e-06, "loss": 0.0142, "step": 26550 }, { "epoch": 3.1484643661804816, "grad_norm": 0.6693882808168642, "learning_rate": 5.710572993770383e-06, "loss": 0.0372, "step": 26551 }, { "epoch": 3.1485829479426064, "grad_norm": 0.6454864410202077, "learning_rate": 5.7090461269338595e-06, "loss": 0.04, "step": 26552 }, { "epoch": 3.1487015297047316, "grad_norm": 0.40647217274426994, "learning_rate": 5.707519437936257e-06, "loss": 0.0188, "step": 26553 }, { "epoch": 3.1488201114668564, "grad_norm": 0.4958807839197245, "learning_rate": 5.705992926791684e-06, "loss": 0.0171, "step": 26554 }, { "epoch": 3.1489386932289816, "grad_norm": 0.4987326682964474, "learning_rate": 5.7044665935141876e-06, "loss": 0.0251, "step": 26555 }, { "epoch": 3.1490572749911063, "grad_norm": 0.5462990981653086, "learning_rate": 5.702940438117849e-06, "loss": 0.0203, "step": 26556 }, { "epoch": 3.1491758567532315, "grad_norm": 0.48780061276303704, "learning_rate": 5.701414460616735e-06, "loss": 0.0233, "step": 26557 }, { "epoch": 3.1492944385153563, "grad_norm": 0.4951421467599058, "learning_rate": 5.699888661024916e-06, "loss": 0.0255, "step": 26558 }, { "epoch": 3.1494130202774815, "grad_norm": 0.8189411315742582, "learning_rate": 5.698363039356461e-06, "loss": 0.0428, "step": 26559 }, { "epoch": 3.1495316020396062, "grad_norm": 0.6380509961310558, "learning_rate": 5.696837595625415e-06, "loss": 0.0289, "step": 26560 }, { "epoch": 3.1496501838017315, "grad_norm": 0.6512545278836288, "learning_rate": 5.69531232984587e-06, "loss": 0.0285, "step": 26561 }, { "epoch": 3.149768765563856, "grad_norm": 0.3440223846764162, "learning_rate": 5.693787242031868e-06, "loss": 0.0163, "step": 26562 }, { "epoch": 3.1498873473259814, "grad_norm": 0.40498095197337325, "learning_rate": 5.69226233219747e-06, "loss": 0.0219, "step": 26563 }, { "epoch": 3.150005929088106, "grad_norm": 0.4011267816332027, "learning_rate": 5.690737600356735e-06, "loss": 0.0176, "step": 26564 }, { "epoch": 3.1501245108502314, "grad_norm": 0.46662903999268157, "learning_rate": 5.689213046523725e-06, "loss": 0.0215, "step": 26565 }, { "epoch": 3.150243092612356, "grad_norm": 0.9363607701605602, "learning_rate": 5.687688670712493e-06, "loss": 0.0386, "step": 26566 }, { "epoch": 3.1503616743744813, "grad_norm": 0.6887110046460401, "learning_rate": 5.686164472937086e-06, "loss": 0.0234, "step": 26567 }, { "epoch": 3.150480256136606, "grad_norm": 0.4885368418439986, "learning_rate": 5.6846404532115565e-06, "loss": 0.0319, "step": 26568 }, { "epoch": 3.1505988378987313, "grad_norm": 0.6528168409109995, "learning_rate": 5.683116611549955e-06, "loss": 0.0326, "step": 26569 }, { "epoch": 3.150717419660856, "grad_norm": 0.8395616197642036, "learning_rate": 5.681592947966338e-06, "loss": 0.0411, "step": 26570 }, { "epoch": 3.1508360014229813, "grad_norm": 0.5861161698914285, "learning_rate": 5.680069462474733e-06, "loss": 0.025, "step": 26571 }, { "epoch": 3.150954583185106, "grad_norm": 0.3887593293051977, "learning_rate": 5.678546155089207e-06, "loss": 0.0187, "step": 26572 }, { "epoch": 3.151073164947231, "grad_norm": 0.23024903190683405, "learning_rate": 5.677023025823785e-06, "loss": 0.014, "step": 26573 }, { "epoch": 3.151191746709356, "grad_norm": 0.47631072378532624, "learning_rate": 5.6755000746925165e-06, "loss": 0.027, "step": 26574 }, { "epoch": 3.151310328471481, "grad_norm": 0.6769521497072645, "learning_rate": 5.6739773017094375e-06, "loss": 0.031, "step": 26575 }, { "epoch": 3.151428910233606, "grad_norm": 0.5895592546007068, "learning_rate": 5.672454706888591e-06, "loss": 0.0291, "step": 26576 }, { "epoch": 3.151547491995731, "grad_norm": 0.5016624943202945, "learning_rate": 5.6709322902440186e-06, "loss": 0.0241, "step": 26577 }, { "epoch": 3.151666073757856, "grad_norm": 0.4795681599393951, "learning_rate": 5.669410051789739e-06, "loss": 0.022, "step": 26578 }, { "epoch": 3.151784655519981, "grad_norm": 0.37123960081839974, "learning_rate": 5.667887991539794e-06, "loss": 0.024, "step": 26579 }, { "epoch": 3.151903237282106, "grad_norm": 0.5478626072401132, "learning_rate": 5.6663661095082175e-06, "loss": 0.0245, "step": 26580 }, { "epoch": 3.152021819044231, "grad_norm": 0.6797552609797196, "learning_rate": 5.664844405709041e-06, "loss": 0.0315, "step": 26581 }, { "epoch": 3.152140400806356, "grad_norm": 0.5169363666498958, "learning_rate": 5.663322880156277e-06, "loss": 0.0258, "step": 26582 }, { "epoch": 3.152258982568481, "grad_norm": 0.5983167696969309, "learning_rate": 5.661801532863978e-06, "loss": 0.0246, "step": 26583 }, { "epoch": 3.152377564330606, "grad_norm": 0.45995688807837276, "learning_rate": 5.660280363846146e-06, "loss": 0.0271, "step": 26584 }, { "epoch": 3.152496146092731, "grad_norm": 0.5916461285047279, "learning_rate": 5.6587593731168125e-06, "loss": 0.0319, "step": 26585 }, { "epoch": 3.1526147278548557, "grad_norm": 0.44402319585487193, "learning_rate": 5.657238560690001e-06, "loss": 0.0193, "step": 26586 }, { "epoch": 3.152733309616981, "grad_norm": 0.4831016992576706, "learning_rate": 5.655717926579731e-06, "loss": 0.0239, "step": 26587 }, { "epoch": 3.1528518913791057, "grad_norm": 0.5122591679597002, "learning_rate": 5.654197470800026e-06, "loss": 0.0247, "step": 26588 }, { "epoch": 3.152970473141231, "grad_norm": 0.4522386655295397, "learning_rate": 5.6526771933648915e-06, "loss": 0.0165, "step": 26589 }, { "epoch": 3.1530890549033557, "grad_norm": 0.863066393736565, "learning_rate": 5.651157094288345e-06, "loss": 0.0446, "step": 26590 }, { "epoch": 3.153207636665481, "grad_norm": 0.886404180688407, "learning_rate": 5.649637173584407e-06, "loss": 0.0608, "step": 26591 }, { "epoch": 3.1533262184276056, "grad_norm": 0.639482269552278, "learning_rate": 5.6481174312670885e-06, "loss": 0.0303, "step": 26592 }, { "epoch": 3.153444800189731, "grad_norm": 0.7825256477438343, "learning_rate": 5.646597867350392e-06, "loss": 0.0366, "step": 26593 }, { "epoch": 3.153563381951856, "grad_norm": 0.6222978926406758, "learning_rate": 5.645078481848329e-06, "loss": 0.0322, "step": 26594 }, { "epoch": 3.153681963713981, "grad_norm": 0.5722274146757342, "learning_rate": 5.64355927477491e-06, "loss": 0.0328, "step": 26595 }, { "epoch": 3.1538005454761056, "grad_norm": 0.5014712175888643, "learning_rate": 5.642040246144137e-06, "loss": 0.0236, "step": 26596 }, { "epoch": 3.1539191272382308, "grad_norm": 0.6811818094397059, "learning_rate": 5.640521395970014e-06, "loss": 0.0497, "step": 26597 }, { "epoch": 3.154037709000356, "grad_norm": 0.3537557709195875, "learning_rate": 5.6390027242665425e-06, "loss": 0.0171, "step": 26598 }, { "epoch": 3.1541562907624807, "grad_norm": 0.5690250867398374, "learning_rate": 5.637484231047732e-06, "loss": 0.0334, "step": 26599 }, { "epoch": 3.154274872524606, "grad_norm": 0.30618952500065916, "learning_rate": 5.635965916327565e-06, "loss": 0.0218, "step": 26600 }, { "epoch": 3.1543934542867307, "grad_norm": 0.3962223201012897, "learning_rate": 5.634447780120047e-06, "loss": 0.0253, "step": 26601 }, { "epoch": 3.154512036048856, "grad_norm": 0.7390115206593836, "learning_rate": 5.6329298224391754e-06, "loss": 0.045, "step": 26602 }, { "epoch": 3.1546306178109806, "grad_norm": 0.8415808425312654, "learning_rate": 5.631412043298945e-06, "loss": 0.0429, "step": 26603 }, { "epoch": 3.154749199573106, "grad_norm": 0.45609098772567436, "learning_rate": 5.629894442713341e-06, "loss": 0.0244, "step": 26604 }, { "epoch": 3.1548677813352306, "grad_norm": 0.6407850438187326, "learning_rate": 5.6283770206963545e-06, "loss": 0.0318, "step": 26605 }, { "epoch": 3.154986363097356, "grad_norm": 0.5197665351046238, "learning_rate": 5.626859777261975e-06, "loss": 0.0216, "step": 26606 }, { "epoch": 3.1551049448594806, "grad_norm": 0.44382483187713045, "learning_rate": 5.625342712424195e-06, "loss": 0.0222, "step": 26607 }, { "epoch": 3.1552235266216058, "grad_norm": 0.44687303323880107, "learning_rate": 5.623825826197002e-06, "loss": 0.0229, "step": 26608 }, { "epoch": 3.1553421083837305, "grad_norm": 0.6016046586231091, "learning_rate": 5.62230911859436e-06, "loss": 0.0322, "step": 26609 }, { "epoch": 3.1554606901458557, "grad_norm": 0.4525455067817098, "learning_rate": 5.620792589630278e-06, "loss": 0.0277, "step": 26610 }, { "epoch": 3.1555792719079805, "grad_norm": 0.664016422136047, "learning_rate": 5.619276239318719e-06, "loss": 0.0337, "step": 26611 }, { "epoch": 3.1556978536701057, "grad_norm": 0.4769741349010641, "learning_rate": 5.617760067673666e-06, "loss": 0.0212, "step": 26612 }, { "epoch": 3.1558164354322304, "grad_norm": 0.24428607418664183, "learning_rate": 5.616244074709098e-06, "loss": 0.0112, "step": 26613 }, { "epoch": 3.1559350171943557, "grad_norm": 0.43167705189003325, "learning_rate": 5.6147282604389895e-06, "loss": 0.0241, "step": 26614 }, { "epoch": 3.1560535989564804, "grad_norm": 0.6038021328274408, "learning_rate": 5.61321262487732e-06, "loss": 0.0322, "step": 26615 }, { "epoch": 3.1561721807186056, "grad_norm": 0.40571367420814874, "learning_rate": 5.6116971680380435e-06, "loss": 0.02, "step": 26616 }, { "epoch": 3.1562907624807304, "grad_norm": 0.42774192457661264, "learning_rate": 5.610181889935159e-06, "loss": 0.0308, "step": 26617 }, { "epoch": 3.1564093442428556, "grad_norm": 0.7603706727904274, "learning_rate": 5.6086667905826115e-06, "loss": 0.0446, "step": 26618 }, { "epoch": 3.1565279260049803, "grad_norm": 0.39898155567516974, "learning_rate": 5.6071518699943846e-06, "loss": 0.0179, "step": 26619 }, { "epoch": 3.1566465077671055, "grad_norm": 0.6306825913867191, "learning_rate": 5.605637128184424e-06, "loss": 0.0371, "step": 26620 }, { "epoch": 3.1567650895292303, "grad_norm": 0.6076442742104416, "learning_rate": 5.604122565166717e-06, "loss": 0.027, "step": 26621 }, { "epoch": 3.1568836712913555, "grad_norm": 0.4557759464683438, "learning_rate": 5.60260818095521e-06, "loss": 0.0227, "step": 26622 }, { "epoch": 3.1570022530534803, "grad_norm": 0.3119016819665012, "learning_rate": 5.601093975563868e-06, "loss": 0.0119, "step": 26623 }, { "epoch": 3.1571208348156055, "grad_norm": 0.5644064562534722, "learning_rate": 5.599579949006651e-06, "loss": 0.0198, "step": 26624 }, { "epoch": 3.15723941657773, "grad_norm": 0.7195544666409116, "learning_rate": 5.5980661012975165e-06, "loss": 0.0409, "step": 26625 }, { "epoch": 3.1573579983398554, "grad_norm": 0.5560744065102661, "learning_rate": 5.596552432450428e-06, "loss": 0.0266, "step": 26626 }, { "epoch": 3.15747658010198, "grad_norm": 0.3694347643354028, "learning_rate": 5.59503894247932e-06, "loss": 0.0166, "step": 26627 }, { "epoch": 3.1575951618641054, "grad_norm": 0.3895285804571392, "learning_rate": 5.593525631398167e-06, "loss": 0.019, "step": 26628 }, { "epoch": 3.15771374362623, "grad_norm": 0.49707156261795765, "learning_rate": 5.592012499220906e-06, "loss": 0.0152, "step": 26629 }, { "epoch": 3.1578323253883553, "grad_norm": 0.5476513881473059, "learning_rate": 5.590499545961495e-06, "loss": 0.0292, "step": 26630 }, { "epoch": 3.15795090715048, "grad_norm": 0.4434607461536729, "learning_rate": 5.588986771633864e-06, "loss": 0.0247, "step": 26631 }, { "epoch": 3.1580694889126053, "grad_norm": 0.39486660494170345, "learning_rate": 5.587474176251984e-06, "loss": 0.0198, "step": 26632 }, { "epoch": 3.15818807067473, "grad_norm": 0.7158026753544494, "learning_rate": 5.58596175982978e-06, "loss": 0.0357, "step": 26633 }, { "epoch": 3.1583066524368553, "grad_norm": 0.35456394169791267, "learning_rate": 5.584449522381205e-06, "loss": 0.0143, "step": 26634 }, { "epoch": 3.15842523419898, "grad_norm": 0.7763390793083799, "learning_rate": 5.582937463920193e-06, "loss": 0.0425, "step": 26635 }, { "epoch": 3.1585438159611052, "grad_norm": 0.6020293867749708, "learning_rate": 5.581425584460687e-06, "loss": 0.0316, "step": 26636 }, { "epoch": 3.15866239772323, "grad_norm": 0.5066444049013469, "learning_rate": 5.579913884016633e-06, "loss": 0.0234, "step": 26637 }, { "epoch": 3.158780979485355, "grad_norm": 0.5129731182383555, "learning_rate": 5.5784023626019504e-06, "loss": 0.0284, "step": 26638 }, { "epoch": 3.15889956124748, "grad_norm": 0.41223949646486846, "learning_rate": 5.576891020230582e-06, "loss": 0.0185, "step": 26639 }, { "epoch": 3.159018143009605, "grad_norm": 0.31086884832271366, "learning_rate": 5.57537985691646e-06, "loss": 0.014, "step": 26640 }, { "epoch": 3.15913672477173, "grad_norm": 0.495380124354331, "learning_rate": 5.573868872673524e-06, "loss": 0.0224, "step": 26641 }, { "epoch": 3.159255306533855, "grad_norm": 0.6532597895721629, "learning_rate": 5.5723580675156786e-06, "loss": 0.0287, "step": 26642 }, { "epoch": 3.15937388829598, "grad_norm": 0.7877203254114351, "learning_rate": 5.570847441456883e-06, "loss": 0.0379, "step": 26643 }, { "epoch": 3.159492470058105, "grad_norm": 0.37896503831854017, "learning_rate": 5.569336994511043e-06, "loss": 0.0157, "step": 26644 }, { "epoch": 3.15961105182023, "grad_norm": 0.5480394174531984, "learning_rate": 5.567826726692088e-06, "loss": 0.0264, "step": 26645 }, { "epoch": 3.159729633582355, "grad_norm": 0.610554383340205, "learning_rate": 5.566316638013941e-06, "loss": 0.0268, "step": 26646 }, { "epoch": 3.1598482153444802, "grad_norm": 0.5985654750563624, "learning_rate": 5.5648067284905205e-06, "loss": 0.0324, "step": 26647 }, { "epoch": 3.159966797106605, "grad_norm": 0.9368413839201747, "learning_rate": 5.563296998135758e-06, "loss": 0.0484, "step": 26648 }, { "epoch": 3.1600853788687298, "grad_norm": 0.5897524142164748, "learning_rate": 5.561787446963557e-06, "loss": 0.0288, "step": 26649 }, { "epoch": 3.160203960630855, "grad_norm": 0.4997943625897073, "learning_rate": 5.560278074987838e-06, "loss": 0.0261, "step": 26650 }, { "epoch": 3.16032254239298, "grad_norm": 0.33964160769384344, "learning_rate": 5.558768882222518e-06, "loss": 0.0191, "step": 26651 }, { "epoch": 3.160441124155105, "grad_norm": 0.5867920944600887, "learning_rate": 5.557259868681513e-06, "loss": 0.0328, "step": 26652 }, { "epoch": 3.16055970591723, "grad_norm": 1.2876167773105969, "learning_rate": 5.555751034378723e-06, "loss": 0.0261, "step": 26653 }, { "epoch": 3.160678287679355, "grad_norm": 0.5914519963827372, "learning_rate": 5.554242379328065e-06, "loss": 0.0292, "step": 26654 }, { "epoch": 3.16079686944148, "grad_norm": 0.47826560871631996, "learning_rate": 5.552733903543447e-06, "loss": 0.022, "step": 26655 }, { "epoch": 3.160915451203605, "grad_norm": 0.5591702200099782, "learning_rate": 5.5512256070387735e-06, "loss": 0.0218, "step": 26656 }, { "epoch": 3.16103403296573, "grad_norm": 0.5618164289533202, "learning_rate": 5.549717489827952e-06, "loss": 0.0315, "step": 26657 }, { "epoch": 3.161152614727855, "grad_norm": 1.372807177882171, "learning_rate": 5.548209551924882e-06, "loss": 0.0677, "step": 26658 }, { "epoch": 3.16127119648998, "grad_norm": 0.43490764991400105, "learning_rate": 5.5467017933434725e-06, "loss": 0.0218, "step": 26659 }, { "epoch": 3.1613897782521048, "grad_norm": 0.6329157173893086, "learning_rate": 5.545194214097612e-06, "loss": 0.0295, "step": 26660 }, { "epoch": 3.16150836001423, "grad_norm": 0.4793956041476161, "learning_rate": 5.543686814201204e-06, "loss": 0.0176, "step": 26661 }, { "epoch": 3.1616269417763547, "grad_norm": 0.8133051954632533, "learning_rate": 5.542179593668142e-06, "loss": 0.0395, "step": 26662 }, { "epoch": 3.16174552353848, "grad_norm": 0.3922801346389591, "learning_rate": 5.540672552512335e-06, "loss": 0.0177, "step": 26663 }, { "epoch": 3.1618641053006047, "grad_norm": 0.8189906547589164, "learning_rate": 5.539165690747655e-06, "loss": 0.0347, "step": 26664 }, { "epoch": 3.16198268706273, "grad_norm": 0.8532087012400474, "learning_rate": 5.537659008388002e-06, "loss": 0.0495, "step": 26665 }, { "epoch": 3.1621012688248546, "grad_norm": 0.6355937775994145, "learning_rate": 5.536152505447268e-06, "loss": 0.0335, "step": 26666 }, { "epoch": 3.16221985058698, "grad_norm": 0.4679963914209739, "learning_rate": 5.53464618193934e-06, "loss": 0.026, "step": 26667 }, { "epoch": 3.1623384323491046, "grad_norm": 0.3046907697715899, "learning_rate": 5.533140037878104e-06, "loss": 0.0158, "step": 26668 }, { "epoch": 3.16245701411123, "grad_norm": 0.6167404360071604, "learning_rate": 5.531634073277442e-06, "loss": 0.0303, "step": 26669 }, { "epoch": 3.1625755958733546, "grad_norm": 0.581131604725843, "learning_rate": 5.530128288151251e-06, "loss": 0.0245, "step": 26670 }, { "epoch": 3.1626941776354798, "grad_norm": 0.49421846995523716, "learning_rate": 5.528622682513393e-06, "loss": 0.0242, "step": 26671 }, { "epoch": 3.1628127593976045, "grad_norm": 0.6346374649248514, "learning_rate": 5.527117256377756e-06, "loss": 0.0409, "step": 26672 }, { "epoch": 3.1629313411597297, "grad_norm": 0.5369635809030873, "learning_rate": 5.5256120097582215e-06, "loss": 0.0176, "step": 26673 }, { "epoch": 3.1630499229218545, "grad_norm": 0.5053882107652605, "learning_rate": 5.524106942668661e-06, "loss": 0.0298, "step": 26674 }, { "epoch": 3.1631685046839797, "grad_norm": 0.5193856002686691, "learning_rate": 5.522602055122958e-06, "loss": 0.0235, "step": 26675 }, { "epoch": 3.1632870864461045, "grad_norm": 0.2787619779247167, "learning_rate": 5.5210973471349656e-06, "loss": 0.0133, "step": 26676 }, { "epoch": 3.1634056682082297, "grad_norm": 0.5416771010394478, "learning_rate": 5.519592818718583e-06, "loss": 0.0227, "step": 26677 }, { "epoch": 3.1635242499703544, "grad_norm": 0.45513253190118896, "learning_rate": 5.518088469887661e-06, "loss": 0.0204, "step": 26678 }, { "epoch": 3.1636428317324796, "grad_norm": 0.7058362929988333, "learning_rate": 5.516584300656077e-06, "loss": 0.0444, "step": 26679 }, { "epoch": 3.1637614134946044, "grad_norm": 0.6633681150523794, "learning_rate": 5.515080311037682e-06, "loss": 0.0342, "step": 26680 }, { "epoch": 3.1638799952567296, "grad_norm": 0.4698969827194819, "learning_rate": 5.513576501046366e-06, "loss": 0.0203, "step": 26681 }, { "epoch": 3.1639985770188543, "grad_norm": 0.37103222759795124, "learning_rate": 5.5120728706959724e-06, "loss": 0.0225, "step": 26682 }, { "epoch": 3.1641171587809795, "grad_norm": 0.6556792537597127, "learning_rate": 5.510569420000367e-06, "loss": 0.0353, "step": 26683 }, { "epoch": 3.1642357405431043, "grad_norm": 0.5246302529082818, "learning_rate": 5.509066148973413e-06, "loss": 0.0364, "step": 26684 }, { "epoch": 3.1643543223052295, "grad_norm": 0.5120428286771012, "learning_rate": 5.50756305762897e-06, "loss": 0.0243, "step": 26685 }, { "epoch": 3.1644729040673543, "grad_norm": 0.6400210556389612, "learning_rate": 5.506060145980896e-06, "loss": 0.0422, "step": 26686 }, { "epoch": 3.1645914858294795, "grad_norm": 0.7028948818814551, "learning_rate": 5.504557414043032e-06, "loss": 0.0379, "step": 26687 }, { "epoch": 3.1647100675916042, "grad_norm": 0.5320965549280915, "learning_rate": 5.5030548618292515e-06, "loss": 0.0317, "step": 26688 }, { "epoch": 3.1648286493537294, "grad_norm": 0.38345873417187376, "learning_rate": 5.501552489353392e-06, "loss": 0.0258, "step": 26689 }, { "epoch": 3.164947231115854, "grad_norm": 0.5988139009626915, "learning_rate": 5.500050296629317e-06, "loss": 0.0308, "step": 26690 }, { "epoch": 3.1650658128779794, "grad_norm": 0.527700164358762, "learning_rate": 5.498548283670848e-06, "loss": 0.0281, "step": 26691 }, { "epoch": 3.165184394640104, "grad_norm": 0.45917698734887175, "learning_rate": 5.497046450491866e-06, "loss": 0.0186, "step": 26692 }, { "epoch": 3.1653029764022294, "grad_norm": 0.3961475707044245, "learning_rate": 5.495544797106192e-06, "loss": 0.019, "step": 26693 }, { "epoch": 3.165421558164354, "grad_norm": 0.43082807047418076, "learning_rate": 5.494043323527675e-06, "loss": 0.0281, "step": 26694 }, { "epoch": 3.1655401399264793, "grad_norm": 0.4875196624021285, "learning_rate": 5.4925420297701616e-06, "loss": 0.0244, "step": 26695 }, { "epoch": 3.1656587216886045, "grad_norm": 0.5695649295574221, "learning_rate": 5.491040915847487e-06, "loss": 0.0301, "step": 26696 }, { "epoch": 3.1657773034507293, "grad_norm": 0.29331902382863034, "learning_rate": 5.4895399817735e-06, "loss": 0.011, "step": 26697 }, { "epoch": 3.165895885212854, "grad_norm": 0.5610909233469809, "learning_rate": 5.4880392275620164e-06, "loss": 0.0275, "step": 26698 }, { "epoch": 3.1660144669749792, "grad_norm": 0.3878037471302639, "learning_rate": 5.486538653226894e-06, "loss": 0.0261, "step": 26699 }, { "epoch": 3.1661330487371044, "grad_norm": 0.6906787482529037, "learning_rate": 5.4850382587819525e-06, "loss": 0.0329, "step": 26700 }, { "epoch": 3.166251630499229, "grad_norm": 0.4454800388023638, "learning_rate": 5.483538044241035e-06, "loss": 0.0235, "step": 26701 }, { "epoch": 3.1663702122613544, "grad_norm": 0.7748776986314141, "learning_rate": 5.482038009617949e-06, "loss": 0.0442, "step": 26702 }, { "epoch": 3.166488794023479, "grad_norm": 0.5263954710983711, "learning_rate": 5.480538154926551e-06, "loss": 0.0298, "step": 26703 }, { "epoch": 3.1666073757856044, "grad_norm": 0.4256685301779993, "learning_rate": 5.479038480180649e-06, "loss": 0.0197, "step": 26704 }, { "epoch": 3.166725957547729, "grad_norm": 0.6694135167410876, "learning_rate": 5.4775389853940735e-06, "loss": 0.0441, "step": 26705 }, { "epoch": 3.1668445393098543, "grad_norm": 0.7857330373306312, "learning_rate": 5.476039670580646e-06, "loss": 0.0512, "step": 26706 }, { "epoch": 3.166963121071979, "grad_norm": 0.36020711887452855, "learning_rate": 5.474540535754194e-06, "loss": 0.0163, "step": 26707 }, { "epoch": 3.1670817028341043, "grad_norm": 0.46759409713390915, "learning_rate": 5.4730415809285425e-06, "loss": 0.0274, "step": 26708 }, { "epoch": 3.167200284596229, "grad_norm": 0.7367709031179004, "learning_rate": 5.471542806117494e-06, "loss": 0.0395, "step": 26709 }, { "epoch": 3.1673188663583542, "grad_norm": 0.3291176863433781, "learning_rate": 5.470044211334871e-06, "loss": 0.0169, "step": 26710 }, { "epoch": 3.167437448120479, "grad_norm": 1.0399312302954402, "learning_rate": 5.468545796594493e-06, "loss": 0.047, "step": 26711 }, { "epoch": 3.167556029882604, "grad_norm": 0.49275819577584107, "learning_rate": 5.46704756191018e-06, "loss": 0.0225, "step": 26712 }, { "epoch": 3.167674611644729, "grad_norm": 0.5455624506497216, "learning_rate": 5.46554950729572e-06, "loss": 0.0202, "step": 26713 }, { "epoch": 3.167793193406854, "grad_norm": 0.5702892253792198, "learning_rate": 5.464051632764955e-06, "loss": 0.04, "step": 26714 }, { "epoch": 3.167911775168979, "grad_norm": 0.5225814026508266, "learning_rate": 5.462553938331666e-06, "loss": 0.0333, "step": 26715 }, { "epoch": 3.168030356931104, "grad_norm": 0.5089805022195287, "learning_rate": 5.461056424009675e-06, "loss": 0.0262, "step": 26716 }, { "epoch": 3.168148938693229, "grad_norm": 0.854781909004644, "learning_rate": 5.459559089812783e-06, "loss": 0.0453, "step": 26717 }, { "epoch": 3.168267520455354, "grad_norm": 0.5736952935338923, "learning_rate": 5.458061935754794e-06, "loss": 0.0312, "step": 26718 }, { "epoch": 3.168386102217479, "grad_norm": 0.49248841529545195, "learning_rate": 5.456564961849517e-06, "loss": 0.029, "step": 26719 }, { "epoch": 3.168504683979604, "grad_norm": 0.43418511108965685, "learning_rate": 5.455068168110735e-06, "loss": 0.0148, "step": 26720 }, { "epoch": 3.168623265741729, "grad_norm": 0.48311070922969684, "learning_rate": 5.453571554552262e-06, "loss": 0.022, "step": 26721 }, { "epoch": 3.168741847503854, "grad_norm": 0.5955857962872563, "learning_rate": 5.4520751211878886e-06, "loss": 0.0294, "step": 26722 }, { "epoch": 3.1688604292659788, "grad_norm": 0.401166068841645, "learning_rate": 5.450578868031409e-06, "loss": 0.0174, "step": 26723 }, { "epoch": 3.168979011028104, "grad_norm": 0.564569226103879, "learning_rate": 5.449082795096627e-06, "loss": 0.0249, "step": 26724 }, { "epoch": 3.1690975927902287, "grad_norm": 0.6738634091322023, "learning_rate": 5.447586902397314e-06, "loss": 0.0375, "step": 26725 }, { "epoch": 3.169216174552354, "grad_norm": 0.38784789238391076, "learning_rate": 5.4460911899472864e-06, "loss": 0.0205, "step": 26726 }, { "epoch": 3.1693347563144787, "grad_norm": 0.7138085552075453, "learning_rate": 5.444595657760312e-06, "loss": 0.0344, "step": 26727 }, { "epoch": 3.169453338076604, "grad_norm": 0.4621935581536481, "learning_rate": 5.443100305850185e-06, "loss": 0.0242, "step": 26728 }, { "epoch": 3.1695719198387287, "grad_norm": 0.7939860276844112, "learning_rate": 5.441605134230693e-06, "loss": 0.0494, "step": 26729 }, { "epoch": 3.169690501600854, "grad_norm": 0.8543588480729267, "learning_rate": 5.440110142915622e-06, "loss": 0.0363, "step": 26730 }, { "epoch": 3.1698090833629786, "grad_norm": 0.7898565018637371, "learning_rate": 5.438615331918745e-06, "loss": 0.0391, "step": 26731 }, { "epoch": 3.169927665125104, "grad_norm": 0.34098792414729207, "learning_rate": 5.437120701253845e-06, "loss": 0.0198, "step": 26732 }, { "epoch": 3.1700462468872286, "grad_norm": 0.5485708613383049, "learning_rate": 5.435626250934703e-06, "loss": 0.0275, "step": 26733 }, { "epoch": 3.170164828649354, "grad_norm": 0.481115096659025, "learning_rate": 5.4341319809750964e-06, "loss": 0.0332, "step": 26734 }, { "epoch": 3.1702834104114785, "grad_norm": 0.6878561817478288, "learning_rate": 5.432637891388806e-06, "loss": 0.0383, "step": 26735 }, { "epoch": 3.1704019921736037, "grad_norm": 0.4079535202290307, "learning_rate": 5.431143982189585e-06, "loss": 0.0172, "step": 26736 }, { "epoch": 3.1705205739357285, "grad_norm": 0.4214816620644817, "learning_rate": 5.429650253391233e-06, "loss": 0.026, "step": 26737 }, { "epoch": 3.1706391556978537, "grad_norm": 0.6137695776477892, "learning_rate": 5.4281567050075e-06, "loss": 0.0368, "step": 26738 }, { "epoch": 3.1707577374599785, "grad_norm": 0.78725275220998, "learning_rate": 5.426663337052162e-06, "loss": 0.0334, "step": 26739 }, { "epoch": 3.1708763192221037, "grad_norm": 0.4399612354286195, "learning_rate": 5.425170149538986e-06, "loss": 0.0231, "step": 26740 }, { "epoch": 3.1709949009842284, "grad_norm": 0.5910032812961574, "learning_rate": 5.4236771424817455e-06, "loss": 0.0271, "step": 26741 }, { "epoch": 3.1711134827463536, "grad_norm": 0.6578898526940677, "learning_rate": 5.422184315894185e-06, "loss": 0.032, "step": 26742 }, { "epoch": 3.1712320645084784, "grad_norm": 0.6214806636989694, "learning_rate": 5.420691669790079e-06, "loss": 0.0319, "step": 26743 }, { "epoch": 3.1713506462706036, "grad_norm": 0.3947991240829268, "learning_rate": 5.419199204183187e-06, "loss": 0.018, "step": 26744 }, { "epoch": 3.171469228032729, "grad_norm": 0.34455790541452275, "learning_rate": 5.417706919087265e-06, "loss": 0.0182, "step": 26745 }, { "epoch": 3.1715878097948536, "grad_norm": 0.5638888866163443, "learning_rate": 5.41621481451608e-06, "loss": 0.035, "step": 26746 }, { "epoch": 3.1717063915569783, "grad_norm": 0.5817569221000787, "learning_rate": 5.414722890483364e-06, "loss": 0.0243, "step": 26747 }, { "epoch": 3.1718249733191035, "grad_norm": 0.43688663114223547, "learning_rate": 5.413231147002903e-06, "loss": 0.0217, "step": 26748 }, { "epoch": 3.1719435550812287, "grad_norm": 0.5309896156379215, "learning_rate": 5.411739584088421e-06, "loss": 0.029, "step": 26749 }, { "epoch": 3.1720621368433535, "grad_norm": 0.48447605829924373, "learning_rate": 5.41024820175369e-06, "loss": 0.027, "step": 26750 }, { "epoch": 3.1721807186054787, "grad_norm": 0.30425748007982434, "learning_rate": 5.408757000012432e-06, "loss": 0.0136, "step": 26751 }, { "epoch": 3.1722993003676034, "grad_norm": 0.8706417153069349, "learning_rate": 5.407265978878423e-06, "loss": 0.0633, "step": 26752 }, { "epoch": 3.1724178821297286, "grad_norm": 0.711793280719569, "learning_rate": 5.405775138365391e-06, "loss": 0.032, "step": 26753 }, { "epoch": 3.1725364638918534, "grad_norm": 0.6767505318807576, "learning_rate": 5.404284478487082e-06, "loss": 0.0281, "step": 26754 }, { "epoch": 3.1726550456539786, "grad_norm": 0.6276445223452962, "learning_rate": 5.4027939992572415e-06, "loss": 0.0386, "step": 26755 }, { "epoch": 3.1727736274161034, "grad_norm": 0.5104551509351485, "learning_rate": 5.401303700689608e-06, "loss": 0.0315, "step": 26756 }, { "epoch": 3.1728922091782286, "grad_norm": 0.783167499475459, "learning_rate": 5.399813582797928e-06, "loss": 0.0369, "step": 26757 }, { "epoch": 3.1730107909403533, "grad_norm": 0.5055425303238021, "learning_rate": 5.398323645595918e-06, "loss": 0.0258, "step": 26758 }, { "epoch": 3.1731293727024785, "grad_norm": 0.528385705826086, "learning_rate": 5.396833889097341e-06, "loss": 0.0285, "step": 26759 }, { "epoch": 3.1732479544646033, "grad_norm": 0.6380171405945844, "learning_rate": 5.3953443133159085e-06, "loss": 0.0276, "step": 26760 }, { "epoch": 3.1733665362267285, "grad_norm": 0.8326696812288806, "learning_rate": 5.393854918265367e-06, "loss": 0.0373, "step": 26761 }, { "epoch": 3.1734851179888532, "grad_norm": 0.5059928927794698, "learning_rate": 5.392365703959429e-06, "loss": 0.0267, "step": 26762 }, { "epoch": 3.1736036997509784, "grad_norm": 0.412125306577279, "learning_rate": 5.390876670411848e-06, "loss": 0.0173, "step": 26763 }, { "epoch": 3.173722281513103, "grad_norm": 0.6338115349567435, "learning_rate": 5.389387817636329e-06, "loss": 0.0263, "step": 26764 }, { "epoch": 3.1738408632752284, "grad_norm": 0.5826166912721881, "learning_rate": 5.3878991456466075e-06, "loss": 0.0258, "step": 26765 }, { "epoch": 3.173959445037353, "grad_norm": 0.6729858767563561, "learning_rate": 5.386410654456406e-06, "loss": 0.0344, "step": 26766 }, { "epoch": 3.1740780267994784, "grad_norm": 0.5270856821897392, "learning_rate": 5.384922344079446e-06, "loss": 0.0296, "step": 26767 }, { "epoch": 3.174196608561603, "grad_norm": 0.46378438198269584, "learning_rate": 5.383434214529456e-06, "loss": 0.0261, "step": 26768 }, { "epoch": 3.1743151903237283, "grad_norm": 0.870864313638614, "learning_rate": 5.381946265820134e-06, "loss": 0.0478, "step": 26769 }, { "epoch": 3.174433772085853, "grad_norm": 0.651864068768727, "learning_rate": 5.380458497965221e-06, "loss": 0.04, "step": 26770 }, { "epoch": 3.1745523538479783, "grad_norm": 0.692815191892806, "learning_rate": 5.378970910978417e-06, "loss": 0.0361, "step": 26771 }, { "epoch": 3.174670935610103, "grad_norm": 0.47676657747842716, "learning_rate": 5.37748350487344e-06, "loss": 0.0192, "step": 26772 }, { "epoch": 3.1747895173722283, "grad_norm": 0.48527014032001664, "learning_rate": 5.375996279664003e-06, "loss": 0.0295, "step": 26773 }, { "epoch": 3.174908099134353, "grad_norm": 0.2384127238127367, "learning_rate": 5.374509235363815e-06, "loss": 0.0116, "step": 26774 }, { "epoch": 3.175026680896478, "grad_norm": 0.5821865983844606, "learning_rate": 5.373022371986591e-06, "loss": 0.0271, "step": 26775 }, { "epoch": 3.175145262658603, "grad_norm": 0.4950805157951881, "learning_rate": 5.371535689546028e-06, "loss": 0.0263, "step": 26776 }, { "epoch": 3.175263844420728, "grad_norm": 0.4251689163878971, "learning_rate": 5.370049188055834e-06, "loss": 0.0159, "step": 26777 }, { "epoch": 3.175382426182853, "grad_norm": 0.7167405557750278, "learning_rate": 5.3685628675297164e-06, "loss": 0.0277, "step": 26778 }, { "epoch": 3.175501007944978, "grad_norm": 0.599474579689702, "learning_rate": 5.367076727981382e-06, "loss": 0.0287, "step": 26779 }, { "epoch": 3.175619589707103, "grad_norm": 0.6315400415551827, "learning_rate": 5.365590769424517e-06, "loss": 0.0346, "step": 26780 }, { "epoch": 3.175738171469228, "grad_norm": 0.5872893752072457, "learning_rate": 5.3641049918728285e-06, "loss": 0.032, "step": 26781 }, { "epoch": 3.175856753231353, "grad_norm": 0.5339187719664221, "learning_rate": 5.362619395340013e-06, "loss": 0.0287, "step": 26782 }, { "epoch": 3.175975334993478, "grad_norm": 0.3552311541599536, "learning_rate": 5.361133979839764e-06, "loss": 0.0133, "step": 26783 }, { "epoch": 3.176093916755603, "grad_norm": 0.5186729839076002, "learning_rate": 5.359648745385778e-06, "loss": 0.0231, "step": 26784 }, { "epoch": 3.176212498517728, "grad_norm": 0.5225114148599227, "learning_rate": 5.358163691991746e-06, "loss": 0.0274, "step": 26785 }, { "epoch": 3.176331080279853, "grad_norm": 0.5108205334355377, "learning_rate": 5.356678819671368e-06, "loss": 0.0283, "step": 26786 }, { "epoch": 3.176449662041978, "grad_norm": 0.7275248216514436, "learning_rate": 5.355194128438312e-06, "loss": 0.0306, "step": 26787 }, { "epoch": 3.1765682438041027, "grad_norm": 0.43844123997498297, "learning_rate": 5.353709618306277e-06, "loss": 0.0237, "step": 26788 }, { "epoch": 3.176686825566228, "grad_norm": 0.28223519563035665, "learning_rate": 5.352225289288948e-06, "loss": 0.0141, "step": 26789 }, { "epoch": 3.1768054073283527, "grad_norm": 0.4396327739376631, "learning_rate": 5.350741141400018e-06, "loss": 0.0184, "step": 26790 }, { "epoch": 3.176923989090478, "grad_norm": 0.5099486453534019, "learning_rate": 5.349257174653147e-06, "loss": 0.0288, "step": 26791 }, { "epoch": 3.1770425708526027, "grad_norm": 0.5993588671905306, "learning_rate": 5.3477733890620305e-06, "loss": 0.0323, "step": 26792 }, { "epoch": 3.177161152614728, "grad_norm": 0.7938445013696611, "learning_rate": 5.3462897846403435e-06, "loss": 0.029, "step": 26793 }, { "epoch": 3.177279734376853, "grad_norm": 0.5865005496649374, "learning_rate": 5.3448063614017636e-06, "loss": 0.0254, "step": 26794 }, { "epoch": 3.177398316138978, "grad_norm": 0.3818087169213731, "learning_rate": 5.3433231193599744e-06, "loss": 0.0266, "step": 26795 }, { "epoch": 3.1775168979011026, "grad_norm": 0.5594325596336476, "learning_rate": 5.341840058528627e-06, "loss": 0.0302, "step": 26796 }, { "epoch": 3.177635479663228, "grad_norm": 0.5802394438463239, "learning_rate": 5.340357178921421e-06, "loss": 0.0299, "step": 26797 }, { "epoch": 3.177754061425353, "grad_norm": 0.5279920513652755, "learning_rate": 5.338874480552006e-06, "loss": 0.0305, "step": 26798 }, { "epoch": 3.1778726431874778, "grad_norm": 0.47305673537198467, "learning_rate": 5.33739196343406e-06, "loss": 0.0296, "step": 26799 }, { "epoch": 3.177991224949603, "grad_norm": 0.4474453899512623, "learning_rate": 5.335909627581248e-06, "loss": 0.0194, "step": 26800 }, { "epoch": 3.1781098067117277, "grad_norm": 0.7296312190459509, "learning_rate": 5.334427473007242e-06, "loss": 0.0279, "step": 26801 }, { "epoch": 3.178228388473853, "grad_norm": 1.0048302875385127, "learning_rate": 5.3329454997256914e-06, "loss": 0.0633, "step": 26802 }, { "epoch": 3.1783469702359777, "grad_norm": 0.4337947930426528, "learning_rate": 5.331463707750267e-06, "loss": 0.0217, "step": 26803 }, { "epoch": 3.178465551998103, "grad_norm": 0.5181409822665912, "learning_rate": 5.329982097094627e-06, "loss": 0.0256, "step": 26804 }, { "epoch": 3.1785841337602276, "grad_norm": 0.9807587412101548, "learning_rate": 5.328500667772432e-06, "loss": 0.02, "step": 26805 }, { "epoch": 3.178702715522353, "grad_norm": 0.27950838349928675, "learning_rate": 5.327019419797344e-06, "loss": 0.0125, "step": 26806 }, { "epoch": 3.1788212972844776, "grad_norm": 0.37850986303017686, "learning_rate": 5.325538353182999e-06, "loss": 0.0212, "step": 26807 }, { "epoch": 3.178939879046603, "grad_norm": 0.7206390767464694, "learning_rate": 5.32405746794308e-06, "loss": 0.0477, "step": 26808 }, { "epoch": 3.1790584608087276, "grad_norm": 0.6863932849820379, "learning_rate": 5.322576764091214e-06, "loss": 0.0345, "step": 26809 }, { "epoch": 3.1791770425708528, "grad_norm": 0.6605511570906201, "learning_rate": 5.321096241641066e-06, "loss": 0.0369, "step": 26810 }, { "epoch": 3.1792956243329775, "grad_norm": 0.7328284820131797, "learning_rate": 5.319615900606268e-06, "loss": 0.0388, "step": 26811 }, { "epoch": 3.1794142060951027, "grad_norm": 0.7076397884328195, "learning_rate": 5.318135741000488e-06, "loss": 0.0363, "step": 26812 }, { "epoch": 3.1795327878572275, "grad_norm": 0.689626470306249, "learning_rate": 5.316655762837355e-06, "loss": 0.0443, "step": 26813 }, { "epoch": 3.1796513696193527, "grad_norm": 0.39094048085489175, "learning_rate": 5.31517596613052e-06, "loss": 0.0238, "step": 26814 }, { "epoch": 3.1797699513814774, "grad_norm": 0.3773723682130621, "learning_rate": 5.313696350893624e-06, "loss": 0.0261, "step": 26815 }, { "epoch": 3.1798885331436026, "grad_norm": 0.7033633002365893, "learning_rate": 5.312216917140306e-06, "loss": 0.0481, "step": 26816 }, { "epoch": 3.1800071149057274, "grad_norm": 0.5500596841137808, "learning_rate": 5.31073766488421e-06, "loss": 0.0276, "step": 26817 }, { "epoch": 3.1801256966678526, "grad_norm": 0.7472721728101827, "learning_rate": 5.309258594138955e-06, "loss": 0.0417, "step": 26818 }, { "epoch": 3.1802442784299774, "grad_norm": 0.24367490078301382, "learning_rate": 5.307779704918206e-06, "loss": 0.0114, "step": 26819 }, { "epoch": 3.1803628601921026, "grad_norm": 0.5434923152316425, "learning_rate": 5.30630099723557e-06, "loss": 0.0284, "step": 26820 }, { "epoch": 3.1804814419542273, "grad_norm": 0.4574806081281509, "learning_rate": 5.304822471104689e-06, "loss": 0.0244, "step": 26821 }, { "epoch": 3.1806000237163525, "grad_norm": 0.7202678387298678, "learning_rate": 5.303344126539195e-06, "loss": 0.0313, "step": 26822 }, { "epoch": 3.1807186054784773, "grad_norm": 0.5166203110422887, "learning_rate": 5.301865963552713e-06, "loss": 0.0362, "step": 26823 }, { "epoch": 3.1808371872406025, "grad_norm": 0.638123706666866, "learning_rate": 5.3003879821588804e-06, "loss": 0.0358, "step": 26824 }, { "epoch": 3.1809557690027273, "grad_norm": 0.7691235530570324, "learning_rate": 5.298910182371297e-06, "loss": 0.0289, "step": 26825 }, { "epoch": 3.1810743507648525, "grad_norm": 0.4029409742305586, "learning_rate": 5.297432564203617e-06, "loss": 0.0192, "step": 26826 }, { "epoch": 3.181192932526977, "grad_norm": 0.6299706603324136, "learning_rate": 5.295955127669442e-06, "loss": 0.0321, "step": 26827 }, { "epoch": 3.1813115142891024, "grad_norm": 0.6449479160731105, "learning_rate": 5.294477872782405e-06, "loss": 0.0341, "step": 26828 }, { "epoch": 3.181430096051227, "grad_norm": 0.3931527033012658, "learning_rate": 5.2930007995561026e-06, "loss": 0.0124, "step": 26829 }, { "epoch": 3.1815486778133524, "grad_norm": 0.5888279715254057, "learning_rate": 5.291523908004181e-06, "loss": 0.0252, "step": 26830 }, { "epoch": 3.181667259575477, "grad_norm": 0.5147081623279509, "learning_rate": 5.290047198140233e-06, "loss": 0.0336, "step": 26831 }, { "epoch": 3.1817858413376023, "grad_norm": 0.4088419844042816, "learning_rate": 5.28857066997788e-06, "loss": 0.0222, "step": 26832 }, { "epoch": 3.181904423099727, "grad_norm": 0.5598879424699281, "learning_rate": 5.287094323530736e-06, "loss": 0.0263, "step": 26833 }, { "epoch": 3.1820230048618523, "grad_norm": 0.34939301068446005, "learning_rate": 5.285618158812405e-06, "loss": 0.0162, "step": 26834 }, { "epoch": 3.182141586623977, "grad_norm": 0.6924403396130859, "learning_rate": 5.2841421758365096e-06, "loss": 0.0371, "step": 26835 }, { "epoch": 3.1822601683861023, "grad_norm": 0.6677149912649722, "learning_rate": 5.282666374616638e-06, "loss": 0.0365, "step": 26836 }, { "epoch": 3.182378750148227, "grad_norm": 0.3554379760659778, "learning_rate": 5.281190755166401e-06, "loss": 0.0145, "step": 26837 }, { "epoch": 3.1824973319103522, "grad_norm": 0.4363064642971755, "learning_rate": 5.279715317499409e-06, "loss": 0.0228, "step": 26838 }, { "epoch": 3.182615913672477, "grad_norm": 0.5314700989520961, "learning_rate": 5.278240061629263e-06, "loss": 0.0259, "step": 26839 }, { "epoch": 3.182734495434602, "grad_norm": 0.6523889440008109, "learning_rate": 5.276764987569546e-06, "loss": 0.0296, "step": 26840 }, { "epoch": 3.182853077196727, "grad_norm": 0.5062895369129786, "learning_rate": 5.275290095333882e-06, "loss": 0.0259, "step": 26841 }, { "epoch": 3.182971658958852, "grad_norm": 0.4792294278546509, "learning_rate": 5.273815384935851e-06, "loss": 0.0205, "step": 26842 }, { "epoch": 3.1830902407209773, "grad_norm": 0.5541322949990094, "learning_rate": 5.272340856389052e-06, "loss": 0.0297, "step": 26843 }, { "epoch": 3.183208822483102, "grad_norm": 0.45610817757689254, "learning_rate": 5.270866509707076e-06, "loss": 0.0302, "step": 26844 }, { "epoch": 3.183327404245227, "grad_norm": 0.5102327367347915, "learning_rate": 5.269392344903521e-06, "loss": 0.0206, "step": 26845 }, { "epoch": 3.183445986007352, "grad_norm": 0.6441262173664005, "learning_rate": 5.267918361991977e-06, "loss": 0.035, "step": 26846 }, { "epoch": 3.1835645677694773, "grad_norm": 0.663606786625218, "learning_rate": 5.2664445609860225e-06, "loss": 0.033, "step": 26847 }, { "epoch": 3.183683149531602, "grad_norm": 0.4426758501637859, "learning_rate": 5.2649709418992525e-06, "loss": 0.0208, "step": 26848 }, { "epoch": 3.183801731293727, "grad_norm": 0.37986531636850274, "learning_rate": 5.263497504745246e-06, "loss": 0.0208, "step": 26849 }, { "epoch": 3.183920313055852, "grad_norm": 0.3593113126318941, "learning_rate": 5.262024249537601e-06, "loss": 0.0177, "step": 26850 }, { "epoch": 3.184038894817977, "grad_norm": 0.5883108326470412, "learning_rate": 5.260551176289879e-06, "loss": 0.0305, "step": 26851 }, { "epoch": 3.184157476580102, "grad_norm": 0.5020708291046785, "learning_rate": 5.259078285015668e-06, "loss": 0.0223, "step": 26852 }, { "epoch": 3.184276058342227, "grad_norm": 0.6172789996855184, "learning_rate": 5.25760557572855e-06, "loss": 0.0306, "step": 26853 }, { "epoch": 3.184394640104352, "grad_norm": 0.49382882496351843, "learning_rate": 5.256133048442097e-06, "loss": 0.0193, "step": 26854 }, { "epoch": 3.184513221866477, "grad_norm": 0.3687136612839422, "learning_rate": 5.254660703169886e-06, "loss": 0.0182, "step": 26855 }, { "epoch": 3.184631803628602, "grad_norm": 0.3495508239636011, "learning_rate": 5.253188539925491e-06, "loss": 0.0128, "step": 26856 }, { "epoch": 3.184750385390727, "grad_norm": 0.46449517700966914, "learning_rate": 5.251716558722486e-06, "loss": 0.0218, "step": 26857 }, { "epoch": 3.184868967152852, "grad_norm": 0.33572881035942825, "learning_rate": 5.250244759574435e-06, "loss": 0.0142, "step": 26858 }, { "epoch": 3.184987548914977, "grad_norm": 0.5132430921719489, "learning_rate": 5.248773142494906e-06, "loss": 0.0265, "step": 26859 }, { "epoch": 3.185106130677102, "grad_norm": 0.5954197946549163, "learning_rate": 5.247301707497468e-06, "loss": 0.0308, "step": 26860 }, { "epoch": 3.185224712439227, "grad_norm": 0.4879108104150854, "learning_rate": 5.245830454595693e-06, "loss": 0.0268, "step": 26861 }, { "epoch": 3.1853432942013518, "grad_norm": 0.49730229513760893, "learning_rate": 5.244359383803133e-06, "loss": 0.0185, "step": 26862 }, { "epoch": 3.185461875963477, "grad_norm": 0.41433594843682797, "learning_rate": 5.24288849513335e-06, "loss": 0.017, "step": 26863 }, { "epoch": 3.1855804577256017, "grad_norm": 0.46623146862940057, "learning_rate": 5.241417788599909e-06, "loss": 0.0221, "step": 26864 }, { "epoch": 3.185699039487727, "grad_norm": 0.5698166875020524, "learning_rate": 5.239947264216366e-06, "loss": 0.0247, "step": 26865 }, { "epoch": 3.1858176212498517, "grad_norm": 0.22618105242619485, "learning_rate": 5.238476921996282e-06, "loss": 0.0082, "step": 26866 }, { "epoch": 3.185936203011977, "grad_norm": 0.712252788864357, "learning_rate": 5.237006761953198e-06, "loss": 0.0228, "step": 26867 }, { "epoch": 3.1860547847741016, "grad_norm": 0.4714340938192217, "learning_rate": 5.235536784100689e-06, "loss": 0.0271, "step": 26868 }, { "epoch": 3.186173366536227, "grad_norm": 0.6783402657536916, "learning_rate": 5.234066988452288e-06, "loss": 0.0396, "step": 26869 }, { "epoch": 3.1862919482983516, "grad_norm": 0.8315144517770839, "learning_rate": 5.232597375021548e-06, "loss": 0.0317, "step": 26870 }, { "epoch": 3.186410530060477, "grad_norm": 0.7220648204853435, "learning_rate": 5.231127943822023e-06, "loss": 0.0372, "step": 26871 }, { "epoch": 3.1865291118226016, "grad_norm": 0.44113899681960206, "learning_rate": 5.229658694867254e-06, "loss": 0.0195, "step": 26872 }, { "epoch": 3.1866476935847268, "grad_norm": 0.351324277541486, "learning_rate": 5.228189628170796e-06, "loss": 0.0211, "step": 26873 }, { "epoch": 3.1867662753468515, "grad_norm": 0.34591846390019915, "learning_rate": 5.226720743746169e-06, "loss": 0.0197, "step": 26874 }, { "epoch": 3.1868848571089767, "grad_norm": 0.47834410517266174, "learning_rate": 5.225252041606943e-06, "loss": 0.0195, "step": 26875 }, { "epoch": 3.1870034388711015, "grad_norm": 0.5567583130931552, "learning_rate": 5.223783521766637e-06, "loss": 0.0381, "step": 26876 }, { "epoch": 3.1871220206332267, "grad_norm": 0.590401183149789, "learning_rate": 5.222315184238805e-06, "loss": 0.0325, "step": 26877 }, { "epoch": 3.1872406023953515, "grad_norm": 0.4935294640730042, "learning_rate": 5.220847029036957e-06, "loss": 0.0273, "step": 26878 }, { "epoch": 3.1873591841574767, "grad_norm": 0.7959633571533623, "learning_rate": 5.219379056174659e-06, "loss": 0.0267, "step": 26879 }, { "epoch": 3.1874777659196014, "grad_norm": 0.3919850445173988, "learning_rate": 5.217911265665423e-06, "loss": 0.0236, "step": 26880 }, { "epoch": 3.1875963476817266, "grad_norm": 0.44386047966804726, "learning_rate": 5.216443657522788e-06, "loss": 0.0224, "step": 26881 }, { "epoch": 3.1877149294438514, "grad_norm": 0.48728790945666384, "learning_rate": 5.214976231760282e-06, "loss": 0.0267, "step": 26882 }, { "epoch": 3.1878335112059766, "grad_norm": 0.6534744615939063, "learning_rate": 5.21350898839143e-06, "loss": 0.0397, "step": 26883 }, { "epoch": 3.1879520929681013, "grad_norm": 0.631548320934242, "learning_rate": 5.212041927429773e-06, "loss": 0.0407, "step": 26884 }, { "epoch": 3.1880706747302265, "grad_norm": 0.6741993654369586, "learning_rate": 5.210575048888808e-06, "loss": 0.0284, "step": 26885 }, { "epoch": 3.1881892564923513, "grad_norm": 0.42135164548227744, "learning_rate": 5.2091083527820855e-06, "loss": 0.027, "step": 26886 }, { "epoch": 3.1883078382544765, "grad_norm": 0.4167114539793162, "learning_rate": 5.207641839123109e-06, "loss": 0.021, "step": 26887 }, { "epoch": 3.1884264200166013, "grad_norm": 0.5585956950253819, "learning_rate": 5.206175507925412e-06, "loss": 0.0332, "step": 26888 }, { "epoch": 3.1885450017787265, "grad_norm": 0.3798935372401742, "learning_rate": 5.204709359202489e-06, "loss": 0.0213, "step": 26889 }, { "epoch": 3.188663583540851, "grad_norm": 0.3852500064502099, "learning_rate": 5.203243392967885e-06, "loss": 0.0176, "step": 26890 }, { "epoch": 3.1887821653029764, "grad_norm": 0.40524215163086674, "learning_rate": 5.201777609235095e-06, "loss": 0.0213, "step": 26891 }, { "epoch": 3.188900747065101, "grad_norm": 0.3874925616092312, "learning_rate": 5.200312008017636e-06, "loss": 0.0185, "step": 26892 }, { "epoch": 3.1890193288272264, "grad_norm": 0.488996036762367, "learning_rate": 5.19884658932902e-06, "loss": 0.023, "step": 26893 }, { "epoch": 3.189137910589351, "grad_norm": 0.4602820510744636, "learning_rate": 5.197381353182757e-06, "loss": 0.0194, "step": 26894 }, { "epoch": 3.1892564923514763, "grad_norm": 0.6089486670454884, "learning_rate": 5.19591629959236e-06, "loss": 0.0366, "step": 26895 }, { "epoch": 3.1893750741136015, "grad_norm": 0.5130614591717626, "learning_rate": 5.194451428571326e-06, "loss": 0.0225, "step": 26896 }, { "epoch": 3.1894936558757263, "grad_norm": 0.8288422824931505, "learning_rate": 5.19298674013316e-06, "loss": 0.0446, "step": 26897 }, { "epoch": 3.189612237637851, "grad_norm": 0.43701231129706497, "learning_rate": 5.1915222342913685e-06, "loss": 0.0246, "step": 26898 }, { "epoch": 3.1897308193999763, "grad_norm": 0.4929255422692809, "learning_rate": 5.190057911059457e-06, "loss": 0.0223, "step": 26899 }, { "epoch": 3.1898494011621015, "grad_norm": 0.565366128086729, "learning_rate": 5.188593770450906e-06, "loss": 0.0286, "step": 26900 }, { "epoch": 3.1899679829242262, "grad_norm": 0.633630582106647, "learning_rate": 5.1871298124792385e-06, "loss": 0.0301, "step": 26901 }, { "epoch": 3.1900865646863514, "grad_norm": 0.9457000334095375, "learning_rate": 5.185666037157932e-06, "loss": 0.0644, "step": 26902 }, { "epoch": 3.190205146448476, "grad_norm": 0.7453050979299765, "learning_rate": 5.184202444500486e-06, "loss": 0.0342, "step": 26903 }, { "epoch": 3.1903237282106014, "grad_norm": 0.4326760769074383, "learning_rate": 5.182739034520395e-06, "loss": 0.0168, "step": 26904 }, { "epoch": 3.190442309972726, "grad_norm": 0.4672052559254063, "learning_rate": 5.1812758072311475e-06, "loss": 0.0214, "step": 26905 }, { "epoch": 3.1905608917348514, "grad_norm": 0.6210463267289279, "learning_rate": 5.1798127626462415e-06, "loss": 0.0298, "step": 26906 }, { "epoch": 3.190679473496976, "grad_norm": 0.4613150462225525, "learning_rate": 5.17834990077915e-06, "loss": 0.0215, "step": 26907 }, { "epoch": 3.1907980552591013, "grad_norm": 0.7078582233017807, "learning_rate": 5.176887221643364e-06, "loss": 0.0332, "step": 26908 }, { "epoch": 3.190916637021226, "grad_norm": 0.6968138130032312, "learning_rate": 5.17542472525237e-06, "loss": 0.0339, "step": 26909 }, { "epoch": 3.1910352187833513, "grad_norm": 0.7380974991422068, "learning_rate": 5.173962411619656e-06, "loss": 0.0603, "step": 26910 }, { "epoch": 3.191153800545476, "grad_norm": 0.5399279622744187, "learning_rate": 5.172500280758685e-06, "loss": 0.0271, "step": 26911 }, { "epoch": 3.1912723823076012, "grad_norm": 0.5230416023677075, "learning_rate": 5.171038332682959e-06, "loss": 0.0198, "step": 26912 }, { "epoch": 3.191390964069726, "grad_norm": 0.4267788106189349, "learning_rate": 5.169576567405937e-06, "loss": 0.0192, "step": 26913 }, { "epoch": 3.191509545831851, "grad_norm": 0.5444334376043003, "learning_rate": 5.1681149849411e-06, "loss": 0.03, "step": 26914 }, { "epoch": 3.191628127593976, "grad_norm": 0.6605983668055896, "learning_rate": 5.166653585301925e-06, "loss": 0.046, "step": 26915 }, { "epoch": 3.191746709356101, "grad_norm": 0.5804601109006878, "learning_rate": 5.1651923685018835e-06, "loss": 0.0238, "step": 26916 }, { "epoch": 3.191865291118226, "grad_norm": 0.5506490675674369, "learning_rate": 5.1637313345544506e-06, "loss": 0.0349, "step": 26917 }, { "epoch": 3.191983872880351, "grad_norm": 0.2737267421503641, "learning_rate": 5.1622704834730834e-06, "loss": 0.0135, "step": 26918 }, { "epoch": 3.192102454642476, "grad_norm": 0.7075414080767747, "learning_rate": 5.160809815271256e-06, "loss": 0.0313, "step": 26919 }, { "epoch": 3.192221036404601, "grad_norm": 0.435815790185961, "learning_rate": 5.159349329962435e-06, "loss": 0.022, "step": 26920 }, { "epoch": 3.192339618166726, "grad_norm": 0.6664140898913483, "learning_rate": 5.1578890275600865e-06, "loss": 0.0345, "step": 26921 }, { "epoch": 3.192458199928851, "grad_norm": 0.5166507247554195, "learning_rate": 5.156428908077665e-06, "loss": 0.0284, "step": 26922 }, { "epoch": 3.192576781690976, "grad_norm": 0.4568992427265952, "learning_rate": 5.154968971528634e-06, "loss": 0.025, "step": 26923 }, { "epoch": 3.192695363453101, "grad_norm": 0.4249494605353448, "learning_rate": 5.153509217926453e-06, "loss": 0.0183, "step": 26924 }, { "epoch": 3.1928139452152258, "grad_norm": 0.3654665189903101, "learning_rate": 5.152049647284582e-06, "loss": 0.0212, "step": 26925 }, { "epoch": 3.192932526977351, "grad_norm": 0.5554830484481221, "learning_rate": 5.150590259616472e-06, "loss": 0.0386, "step": 26926 }, { "epoch": 3.1930511087394757, "grad_norm": 0.5777776823105643, "learning_rate": 5.14913105493558e-06, "loss": 0.0272, "step": 26927 }, { "epoch": 3.193169690501601, "grad_norm": 0.3645540197811924, "learning_rate": 5.147672033255363e-06, "loss": 0.019, "step": 26928 }, { "epoch": 3.1932882722637257, "grad_norm": 0.5371166395928136, "learning_rate": 5.146213194589258e-06, "loss": 0.0223, "step": 26929 }, { "epoch": 3.193406854025851, "grad_norm": 0.49485726358195337, "learning_rate": 5.144754538950722e-06, "loss": 0.0211, "step": 26930 }, { "epoch": 3.1935254357879757, "grad_norm": 0.6303027171134735, "learning_rate": 5.143296066353201e-06, "loss": 0.037, "step": 26931 }, { "epoch": 3.193644017550101, "grad_norm": 0.6376935049765313, "learning_rate": 5.141837776810141e-06, "loss": 0.0303, "step": 26932 }, { "epoch": 3.1937625993122256, "grad_norm": 0.8571658843404708, "learning_rate": 5.140379670334988e-06, "loss": 0.0412, "step": 26933 }, { "epoch": 3.193881181074351, "grad_norm": 0.5557093152771418, "learning_rate": 5.138921746941172e-06, "loss": 0.0317, "step": 26934 }, { "epoch": 3.1939997628364756, "grad_norm": 0.5338415654053801, "learning_rate": 5.137464006642154e-06, "loss": 0.0289, "step": 26935 }, { "epoch": 3.194118344598601, "grad_norm": 0.49504335197646526, "learning_rate": 5.1360064494513555e-06, "loss": 0.0233, "step": 26936 }, { "epoch": 3.1942369263607255, "grad_norm": 0.593200567055769, "learning_rate": 5.134549075382222e-06, "loss": 0.0226, "step": 26937 }, { "epoch": 3.1943555081228507, "grad_norm": 0.757324279441108, "learning_rate": 5.133091884448174e-06, "loss": 0.037, "step": 26938 }, { "epoch": 3.1944740898849755, "grad_norm": 0.47938179582051904, "learning_rate": 5.1316348766626695e-06, "loss": 0.0305, "step": 26939 }, { "epoch": 3.1945926716471007, "grad_norm": 0.7188923463162126, "learning_rate": 5.130178052039123e-06, "loss": 0.0312, "step": 26940 }, { "epoch": 3.1947112534092255, "grad_norm": 0.531042918874453, "learning_rate": 5.128721410590967e-06, "loss": 0.0226, "step": 26941 }, { "epoch": 3.1948298351713507, "grad_norm": 0.9329678800153057, "learning_rate": 5.127264952331631e-06, "loss": 0.0271, "step": 26942 }, { "epoch": 3.1949484169334754, "grad_norm": 0.7363641898866483, "learning_rate": 5.125808677274543e-06, "loss": 0.0274, "step": 26943 }, { "epoch": 3.1950669986956006, "grad_norm": 0.4205734845729374, "learning_rate": 5.124352585433134e-06, "loss": 0.0278, "step": 26944 }, { "epoch": 3.195185580457726, "grad_norm": 0.819016515921391, "learning_rate": 5.122896676820807e-06, "loss": 0.0358, "step": 26945 }, { "epoch": 3.1953041622198506, "grad_norm": 0.5645696568523688, "learning_rate": 5.121440951451015e-06, "loss": 0.0339, "step": 26946 }, { "epoch": 3.1954227439819753, "grad_norm": 0.587494219987295, "learning_rate": 5.11998540933715e-06, "loss": 0.033, "step": 26947 }, { "epoch": 3.1955413257441005, "grad_norm": 0.8491334166511576, "learning_rate": 5.1185300504926485e-06, "loss": 0.0486, "step": 26948 }, { "epoch": 3.1956599075062257, "grad_norm": 0.5721603121715274, "learning_rate": 5.117074874930908e-06, "loss": 0.0335, "step": 26949 }, { "epoch": 3.1957784892683505, "grad_norm": 0.9624704081770229, "learning_rate": 5.115619882665365e-06, "loss": 0.0658, "step": 26950 }, { "epoch": 3.1958970710304757, "grad_norm": 0.5620849120622815, "learning_rate": 5.1141650737094205e-06, "loss": 0.0216, "step": 26951 }, { "epoch": 3.1960156527926005, "grad_norm": 0.6688371887508554, "learning_rate": 5.112710448076485e-06, "loss": 0.028, "step": 26952 }, { "epoch": 3.1961342345547257, "grad_norm": 0.47348511612817334, "learning_rate": 5.111256005779974e-06, "loss": 0.0285, "step": 26953 }, { "epoch": 3.1962528163168504, "grad_norm": 0.6652151528020434, "learning_rate": 5.109801746833292e-06, "loss": 0.0428, "step": 26954 }, { "epoch": 3.1963713980789756, "grad_norm": 0.5249441914903855, "learning_rate": 5.108347671249855e-06, "loss": 0.0187, "step": 26955 }, { "epoch": 3.1964899798411004, "grad_norm": 0.5799846680153278, "learning_rate": 5.106893779043045e-06, "loss": 0.0285, "step": 26956 }, { "epoch": 3.1966085616032256, "grad_norm": 0.6993994776526516, "learning_rate": 5.105440070226294e-06, "loss": 0.0371, "step": 26957 }, { "epoch": 3.1967271433653504, "grad_norm": 0.5021421296473735, "learning_rate": 5.103986544812983e-06, "loss": 0.0264, "step": 26958 }, { "epoch": 3.1968457251274756, "grad_norm": 0.4171662770103438, "learning_rate": 5.102533202816523e-06, "loss": 0.0201, "step": 26959 }, { "epoch": 3.1969643068896003, "grad_norm": 0.4653848871200171, "learning_rate": 5.101080044250295e-06, "loss": 0.0285, "step": 26960 }, { "epoch": 3.1970828886517255, "grad_norm": 0.366187067557477, "learning_rate": 5.099627069127719e-06, "loss": 0.0228, "step": 26961 }, { "epoch": 3.1972014704138503, "grad_norm": 0.5357662143837045, "learning_rate": 5.098174277462173e-06, "loss": 0.0258, "step": 26962 }, { "epoch": 3.1973200521759755, "grad_norm": 0.445187534544748, "learning_rate": 5.096721669267055e-06, "loss": 0.0239, "step": 26963 }, { "epoch": 3.1974386339381002, "grad_norm": 0.6798602734259284, "learning_rate": 5.095269244555756e-06, "loss": 0.0301, "step": 26964 }, { "epoch": 3.1975572157002254, "grad_norm": 0.3319861509842143, "learning_rate": 5.093817003341664e-06, "loss": 0.0179, "step": 26965 }, { "epoch": 3.19767579746235, "grad_norm": 0.7620064736457987, "learning_rate": 5.092364945638178e-06, "loss": 0.0396, "step": 26966 }, { "epoch": 3.1977943792244754, "grad_norm": 0.6782925350114105, "learning_rate": 5.090913071458667e-06, "loss": 0.0401, "step": 26967 }, { "epoch": 3.1979129609866, "grad_norm": 0.3936527582674498, "learning_rate": 5.089461380816524e-06, "loss": 0.0217, "step": 26968 }, { "epoch": 3.1980315427487254, "grad_norm": 0.4108785335325245, "learning_rate": 5.088009873725128e-06, "loss": 0.0302, "step": 26969 }, { "epoch": 3.19815012451085, "grad_norm": 0.5556151159635605, "learning_rate": 5.086558550197873e-06, "loss": 0.0259, "step": 26970 }, { "epoch": 3.1982687062729753, "grad_norm": 0.39165405415067356, "learning_rate": 5.0851074102481165e-06, "loss": 0.0163, "step": 26971 }, { "epoch": 3.1983872880351, "grad_norm": 0.6823119537551505, "learning_rate": 5.083656453889257e-06, "loss": 0.0284, "step": 26972 }, { "epoch": 3.1985058697972253, "grad_norm": 0.4942851588830452, "learning_rate": 5.0822056811346604e-06, "loss": 0.0371, "step": 26973 }, { "epoch": 3.19862445155935, "grad_norm": 0.5242442853084224, "learning_rate": 5.080755091997699e-06, "loss": 0.0246, "step": 26974 }, { "epoch": 3.1987430333214752, "grad_norm": 0.9727699985113757, "learning_rate": 5.079304686491751e-06, "loss": 0.0498, "step": 26975 }, { "epoch": 3.1988616150836, "grad_norm": 0.6081117222061302, "learning_rate": 5.077854464630186e-06, "loss": 0.0261, "step": 26976 }, { "epoch": 3.198980196845725, "grad_norm": 0.5412970283348593, "learning_rate": 5.076404426426376e-06, "loss": 0.0232, "step": 26977 }, { "epoch": 3.19909877860785, "grad_norm": 0.501996230416171, "learning_rate": 5.074954571893681e-06, "loss": 0.024, "step": 26978 }, { "epoch": 3.199217360369975, "grad_norm": 0.3473407292522982, "learning_rate": 5.073504901045473e-06, "loss": 0.0159, "step": 26979 }, { "epoch": 3.1993359421321, "grad_norm": 0.32571162881402876, "learning_rate": 5.072055413895111e-06, "loss": 0.019, "step": 26980 }, { "epoch": 3.199454523894225, "grad_norm": 0.5457594387112908, "learning_rate": 5.0706061104559635e-06, "loss": 0.0204, "step": 26981 }, { "epoch": 3.19957310565635, "grad_norm": 0.5557206569432737, "learning_rate": 5.069156990741394e-06, "loss": 0.0287, "step": 26982 }, { "epoch": 3.199691687418475, "grad_norm": 0.46074883778845904, "learning_rate": 5.0677080547647416e-06, "loss": 0.0187, "step": 26983 }, { "epoch": 3.1998102691806, "grad_norm": 0.6070037521275514, "learning_rate": 5.066259302539395e-06, "loss": 0.0288, "step": 26984 }, { "epoch": 3.199928850942725, "grad_norm": 0.958927570581507, "learning_rate": 5.064810734078681e-06, "loss": 0.047, "step": 26985 }, { "epoch": 3.20004743270485, "grad_norm": 0.44476879634917704, "learning_rate": 5.0633623493959695e-06, "loss": 0.0137, "step": 26986 }, { "epoch": 3.200166014466975, "grad_norm": 0.9451667499560445, "learning_rate": 5.0619141485046096e-06, "loss": 0.0498, "step": 26987 }, { "epoch": 3.2002845962290998, "grad_norm": 0.43749996325044727, "learning_rate": 5.06046613141796e-06, "loss": 0.0199, "step": 26988 }, { "epoch": 3.200403177991225, "grad_norm": 0.48543583744740887, "learning_rate": 5.059018298149351e-06, "loss": 0.0232, "step": 26989 }, { "epoch": 3.2005217597533497, "grad_norm": 0.4681283058829727, "learning_rate": 5.057570648712143e-06, "loss": 0.0206, "step": 26990 }, { "epoch": 3.200640341515475, "grad_norm": 0.39888778038165357, "learning_rate": 5.056123183119676e-06, "loss": 0.0173, "step": 26991 }, { "epoch": 3.2007589232775997, "grad_norm": 0.41691228848472983, "learning_rate": 5.054675901385297e-06, "loss": 0.0212, "step": 26992 }, { "epoch": 3.200877505039725, "grad_norm": 0.5189003098316355, "learning_rate": 5.053228803522356e-06, "loss": 0.0304, "step": 26993 }, { "epoch": 3.20099608680185, "grad_norm": 0.5883890898892518, "learning_rate": 5.051781889544169e-06, "loss": 0.0274, "step": 26994 }, { "epoch": 3.201114668563975, "grad_norm": 0.46807797485358543, "learning_rate": 5.050335159464106e-06, "loss": 0.0293, "step": 26995 }, { "epoch": 3.2012332503260996, "grad_norm": 0.5441720170603063, "learning_rate": 5.0488886132954825e-06, "loss": 0.0331, "step": 26996 }, { "epoch": 3.201351832088225, "grad_norm": 0.42801238799822366, "learning_rate": 5.047442251051637e-06, "loss": 0.0216, "step": 26997 }, { "epoch": 3.20147041385035, "grad_norm": 0.46029273728171827, "learning_rate": 5.045996072745912e-06, "loss": 0.0313, "step": 26998 }, { "epoch": 3.201588995612475, "grad_norm": 0.4618799528244907, "learning_rate": 5.044550078391635e-06, "loss": 0.0263, "step": 26999 }, { "epoch": 3.2017075773746, "grad_norm": 0.4225154990092172, "learning_rate": 5.043104268002133e-06, "loss": 0.0266, "step": 27000 }, { "epoch": 3.2018261591367247, "grad_norm": 0.49078660720147144, "learning_rate": 5.041658641590735e-06, "loss": 0.0224, "step": 27001 }, { "epoch": 3.20194474089885, "grad_norm": 0.46418812320321884, "learning_rate": 5.040213199170771e-06, "loss": 0.0248, "step": 27002 }, { "epoch": 3.2020633226609747, "grad_norm": 0.5874413973202138, "learning_rate": 5.0387679407555645e-06, "loss": 0.0323, "step": 27003 }, { "epoch": 3.2021819044231, "grad_norm": 0.44344644418150325, "learning_rate": 5.037322866358446e-06, "loss": 0.0256, "step": 27004 }, { "epoch": 3.2023004861852247, "grad_norm": 0.4924068526311927, "learning_rate": 5.035877975992717e-06, "loss": 0.0303, "step": 27005 }, { "epoch": 3.20241906794735, "grad_norm": 0.5700032887492784, "learning_rate": 5.034433269671723e-06, "loss": 0.0271, "step": 27006 }, { "epoch": 3.2025376497094746, "grad_norm": 0.5998223131560486, "learning_rate": 5.032988747408768e-06, "loss": 0.0342, "step": 27007 }, { "epoch": 3.2026562314716, "grad_norm": 0.5514272677471899, "learning_rate": 5.031544409217176e-06, "loss": 0.0247, "step": 27008 }, { "epoch": 3.2027748132337246, "grad_norm": 0.607516964120448, "learning_rate": 5.030100255110246e-06, "loss": 0.0263, "step": 27009 }, { "epoch": 3.20289339499585, "grad_norm": 0.4976230338614118, "learning_rate": 5.028656285101313e-06, "loss": 0.0236, "step": 27010 }, { "epoch": 3.2030119767579746, "grad_norm": 0.5984066332696342, "learning_rate": 5.027212499203676e-06, "loss": 0.0364, "step": 27011 }, { "epoch": 3.2031305585200998, "grad_norm": 0.8242423879729986, "learning_rate": 5.025768897430644e-06, "loss": 0.0406, "step": 27012 }, { "epoch": 3.2032491402822245, "grad_norm": 0.43333348996469395, "learning_rate": 5.02432547979553e-06, "loss": 0.0153, "step": 27013 }, { "epoch": 3.2033677220443497, "grad_norm": 0.6039307882506398, "learning_rate": 5.022882246311641e-06, "loss": 0.0424, "step": 27014 }, { "epoch": 3.2034863038064745, "grad_norm": 0.9679023078825638, "learning_rate": 5.021439196992283e-06, "loss": 0.047, "step": 27015 }, { "epoch": 3.2036048855685997, "grad_norm": 0.6667951644951909, "learning_rate": 5.019996331850746e-06, "loss": 0.0335, "step": 27016 }, { "epoch": 3.2037234673307244, "grad_norm": 0.7390087243550055, "learning_rate": 5.0185536509003536e-06, "loss": 0.0357, "step": 27017 }, { "epoch": 3.2038420490928496, "grad_norm": 0.7018625511454313, "learning_rate": 5.017111154154389e-06, "loss": 0.0394, "step": 27018 }, { "epoch": 3.2039606308549744, "grad_norm": 0.49231765453931814, "learning_rate": 5.0156688416261615e-06, "loss": 0.0319, "step": 27019 }, { "epoch": 3.2040792126170996, "grad_norm": 0.5099516512560945, "learning_rate": 5.014226713328945e-06, "loss": 0.0255, "step": 27020 }, { "epoch": 3.2041977943792244, "grad_norm": 0.8101994007336396, "learning_rate": 5.012784769276066e-06, "loss": 0.0526, "step": 27021 }, { "epoch": 3.2043163761413496, "grad_norm": 0.37430980102540684, "learning_rate": 5.011343009480793e-06, "loss": 0.0167, "step": 27022 }, { "epoch": 3.2044349579034743, "grad_norm": 0.47601551850807333, "learning_rate": 5.009901433956427e-06, "loss": 0.0291, "step": 27023 }, { "epoch": 3.2045535396655995, "grad_norm": 1.2641074905364464, "learning_rate": 5.008460042716254e-06, "loss": 0.0733, "step": 27024 }, { "epoch": 3.2046721214277243, "grad_norm": 0.4581452423669284, "learning_rate": 5.007018835773567e-06, "loss": 0.0289, "step": 27025 }, { "epoch": 3.2047907031898495, "grad_norm": 0.6501507776870904, "learning_rate": 5.005577813141654e-06, "loss": 0.0291, "step": 27026 }, { "epoch": 3.2049092849519742, "grad_norm": 0.6948443016552884, "learning_rate": 5.004136974833782e-06, "loss": 0.0342, "step": 27027 }, { "epoch": 3.2050278667140994, "grad_norm": 0.8696228859462399, "learning_rate": 5.002696320863259e-06, "loss": 0.0478, "step": 27028 }, { "epoch": 3.205146448476224, "grad_norm": 0.6465157202831386, "learning_rate": 5.00125585124335e-06, "loss": 0.0331, "step": 27029 }, { "epoch": 3.2052650302383494, "grad_norm": 0.7477856646787003, "learning_rate": 4.999815565987334e-06, "loss": 0.0463, "step": 27030 }, { "epoch": 3.205383612000474, "grad_norm": 0.6122153423727835, "learning_rate": 4.998375465108496e-06, "loss": 0.0345, "step": 27031 }, { "epoch": 3.2055021937625994, "grad_norm": 0.574014058150516, "learning_rate": 4.996935548620108e-06, "loss": 0.0199, "step": 27032 }, { "epoch": 3.205620775524724, "grad_norm": 0.7814319621461882, "learning_rate": 4.99549581653545e-06, "loss": 0.0379, "step": 27033 }, { "epoch": 3.2057393572868493, "grad_norm": 0.45107671511856645, "learning_rate": 4.994056268867783e-06, "loss": 0.0183, "step": 27034 }, { "epoch": 3.205857939048974, "grad_norm": 0.739923069996488, "learning_rate": 4.992616905630385e-06, "loss": 0.0331, "step": 27035 }, { "epoch": 3.2059765208110993, "grad_norm": 0.5714840892985907, "learning_rate": 4.991177726836524e-06, "loss": 0.0217, "step": 27036 }, { "epoch": 3.206095102573224, "grad_norm": 0.5405641356418535, "learning_rate": 4.989738732499474e-06, "loss": 0.0371, "step": 27037 }, { "epoch": 3.2062136843353493, "grad_norm": 0.5890050645721773, "learning_rate": 4.9882999226324885e-06, "loss": 0.0295, "step": 27038 }, { "epoch": 3.206332266097474, "grad_norm": 0.7399834596570002, "learning_rate": 4.986861297248837e-06, "loss": 0.0377, "step": 27039 }, { "epoch": 3.206450847859599, "grad_norm": 0.7275216183452793, "learning_rate": 4.985422856361785e-06, "loss": 0.0422, "step": 27040 }, { "epoch": 3.206569429621724, "grad_norm": 0.32120667345484194, "learning_rate": 4.98398459998459e-06, "loss": 0.0196, "step": 27041 }, { "epoch": 3.206688011383849, "grad_norm": 0.4771235024221319, "learning_rate": 4.98254652813051e-06, "loss": 0.0194, "step": 27042 }, { "epoch": 3.2068065931459744, "grad_norm": 0.651345734592974, "learning_rate": 4.9811086408128065e-06, "loss": 0.0266, "step": 27043 }, { "epoch": 3.206925174908099, "grad_norm": 0.803997498964712, "learning_rate": 4.979670938044736e-06, "loss": 0.0505, "step": 27044 }, { "epoch": 3.207043756670224, "grad_norm": 0.4982464681226307, "learning_rate": 4.9782334198395454e-06, "loss": 0.017, "step": 27045 }, { "epoch": 3.207162338432349, "grad_norm": 0.6368611994785668, "learning_rate": 4.976796086210489e-06, "loss": 0.036, "step": 27046 }, { "epoch": 3.2072809201944743, "grad_norm": 0.7811527215216876, "learning_rate": 4.975358937170818e-06, "loss": 0.0349, "step": 27047 }, { "epoch": 3.207399501956599, "grad_norm": 0.4002245382166025, "learning_rate": 4.9739219727337885e-06, "loss": 0.0217, "step": 27048 }, { "epoch": 3.2075180837187243, "grad_norm": 0.4443350364265798, "learning_rate": 4.972485192912637e-06, "loss": 0.0263, "step": 27049 }, { "epoch": 3.207636665480849, "grad_norm": 0.60458807625833, "learning_rate": 4.971048597720609e-06, "loss": 0.0222, "step": 27050 }, { "epoch": 3.2077552472429742, "grad_norm": 0.6688705494664458, "learning_rate": 4.969612187170955e-06, "loss": 0.0499, "step": 27051 }, { "epoch": 3.207873829005099, "grad_norm": 0.3736367763607001, "learning_rate": 4.968175961276911e-06, "loss": 0.0199, "step": 27052 }, { "epoch": 3.207992410767224, "grad_norm": 0.3708767055072503, "learning_rate": 4.966739920051728e-06, "loss": 0.0153, "step": 27053 }, { "epoch": 3.208110992529349, "grad_norm": 1.005678773778029, "learning_rate": 4.965304063508622e-06, "loss": 0.0356, "step": 27054 }, { "epoch": 3.208229574291474, "grad_norm": 0.377417483074164, "learning_rate": 4.963868391660859e-06, "loss": 0.0189, "step": 27055 }, { "epoch": 3.208348156053599, "grad_norm": 0.3347756174342757, "learning_rate": 4.962432904521652e-06, "loss": 0.0151, "step": 27056 }, { "epoch": 3.208466737815724, "grad_norm": 0.4269748662688113, "learning_rate": 4.960997602104242e-06, "loss": 0.0181, "step": 27057 }, { "epoch": 3.208585319577849, "grad_norm": 0.7544825183747258, "learning_rate": 4.959562484421862e-06, "loss": 0.043, "step": 27058 }, { "epoch": 3.208703901339974, "grad_norm": 0.7040985803380599, "learning_rate": 4.958127551487746e-06, "loss": 0.0326, "step": 27059 }, { "epoch": 3.208822483102099, "grad_norm": 0.3385875643611163, "learning_rate": 4.95669280331511e-06, "loss": 0.0193, "step": 27060 }, { "epoch": 3.208941064864224, "grad_norm": 0.3186687415153728, "learning_rate": 4.955258239917187e-06, "loss": 0.0131, "step": 27061 }, { "epoch": 3.209059646626349, "grad_norm": 0.5627789663838764, "learning_rate": 4.953823861307205e-06, "loss": 0.0329, "step": 27062 }, { "epoch": 3.209178228388474, "grad_norm": 0.6553090016138743, "learning_rate": 4.952389667498383e-06, "loss": 0.0301, "step": 27063 }, { "epoch": 3.2092968101505988, "grad_norm": 0.8261983408702537, "learning_rate": 4.950955658503953e-06, "loss": 0.0481, "step": 27064 }, { "epoch": 3.209415391912724, "grad_norm": 0.6844416030563497, "learning_rate": 4.9495218343371115e-06, "loss": 0.039, "step": 27065 }, { "epoch": 3.2095339736748487, "grad_norm": 0.4254018947038711, "learning_rate": 4.948088195011103e-06, "loss": 0.0188, "step": 27066 }, { "epoch": 3.209652555436974, "grad_norm": 0.42440294512836524, "learning_rate": 4.946654740539128e-06, "loss": 0.0233, "step": 27067 }, { "epoch": 3.2097711371990987, "grad_norm": 0.5836880152690598, "learning_rate": 4.9452214709344025e-06, "loss": 0.0221, "step": 27068 }, { "epoch": 3.209889718961224, "grad_norm": 0.5532553111630646, "learning_rate": 4.943788386210144e-06, "loss": 0.0233, "step": 27069 }, { "epoch": 3.2100083007233486, "grad_norm": 0.7099197626351222, "learning_rate": 4.942355486379568e-06, "loss": 0.0292, "step": 27070 }, { "epoch": 3.210126882485474, "grad_norm": 0.6750075678336807, "learning_rate": 4.940922771455872e-06, "loss": 0.0326, "step": 27071 }, { "epoch": 3.2102454642475986, "grad_norm": 0.24672741959556543, "learning_rate": 4.9394902414522685e-06, "loss": 0.0095, "step": 27072 }, { "epoch": 3.210364046009724, "grad_norm": 1.0587347708000714, "learning_rate": 4.938057896381968e-06, "loss": 0.0393, "step": 27073 }, { "epoch": 3.2104826277718486, "grad_norm": 0.3589549981023637, "learning_rate": 4.936625736258171e-06, "loss": 0.0179, "step": 27074 }, { "epoch": 3.2106012095339738, "grad_norm": 0.9341626036407942, "learning_rate": 4.935193761094087e-06, "loss": 0.0519, "step": 27075 }, { "epoch": 3.2107197912960985, "grad_norm": 0.4606809112924022, "learning_rate": 4.933761970902897e-06, "loss": 0.0229, "step": 27076 }, { "epoch": 3.2108383730582237, "grad_norm": 0.6385392694709144, "learning_rate": 4.932330365697832e-06, "loss": 0.0297, "step": 27077 }, { "epoch": 3.2109569548203485, "grad_norm": 0.46613393651723734, "learning_rate": 4.930898945492063e-06, "loss": 0.0256, "step": 27078 }, { "epoch": 3.2110755365824737, "grad_norm": 0.5412300386164081, "learning_rate": 4.929467710298796e-06, "loss": 0.0288, "step": 27079 }, { "epoch": 3.2111941183445984, "grad_norm": 0.6170897691184155, "learning_rate": 4.9280366601312245e-06, "loss": 0.0221, "step": 27080 }, { "epoch": 3.2113127001067236, "grad_norm": 0.3608064221916736, "learning_rate": 4.92660579500254e-06, "loss": 0.0181, "step": 27081 }, { "epoch": 3.2114312818688484, "grad_norm": 0.4280773022820683, "learning_rate": 4.9251751149259426e-06, "loss": 0.0179, "step": 27082 }, { "epoch": 3.2115498636309736, "grad_norm": 0.5708351776761729, "learning_rate": 4.9237446199146e-06, "loss": 0.0314, "step": 27083 }, { "epoch": 3.2116684453930984, "grad_norm": 0.5309402318453341, "learning_rate": 4.922314309981727e-06, "loss": 0.0292, "step": 27084 }, { "epoch": 3.2117870271552236, "grad_norm": 0.6746003432433283, "learning_rate": 4.920884185140487e-06, "loss": 0.0403, "step": 27085 }, { "epoch": 3.2119056089173483, "grad_norm": 0.49827690586654494, "learning_rate": 4.919454245404079e-06, "loss": 0.0235, "step": 27086 }, { "epoch": 3.2120241906794735, "grad_norm": 0.5211201699153272, "learning_rate": 4.918024490785664e-06, "loss": 0.0244, "step": 27087 }, { "epoch": 3.2121427724415983, "grad_norm": 0.7221696912377594, "learning_rate": 4.9165949212984505e-06, "loss": 0.0395, "step": 27088 }, { "epoch": 3.2122613542037235, "grad_norm": 0.5111766467096094, "learning_rate": 4.915165536955599e-06, "loss": 0.0307, "step": 27089 }, { "epoch": 3.2123799359658483, "grad_norm": 0.4668384807154018, "learning_rate": 4.913736337770292e-06, "loss": 0.0231, "step": 27090 }, { "epoch": 3.2124985177279735, "grad_norm": 0.4502471405867554, "learning_rate": 4.912307323755702e-06, "loss": 0.0166, "step": 27091 }, { "epoch": 3.212617099490098, "grad_norm": 0.4790266769230135, "learning_rate": 4.910878494925008e-06, "loss": 0.0302, "step": 27092 }, { "epoch": 3.2127356812522234, "grad_norm": 0.47904577771318585, "learning_rate": 4.909449851291384e-06, "loss": 0.0226, "step": 27093 }, { "epoch": 3.212854263014348, "grad_norm": 0.5950838707099914, "learning_rate": 4.908021392867989e-06, "loss": 0.0222, "step": 27094 }, { "epoch": 3.2129728447764734, "grad_norm": 0.595833036685722, "learning_rate": 4.906593119668001e-06, "loss": 0.0352, "step": 27095 }, { "epoch": 3.2130914265385986, "grad_norm": 0.6218192035972047, "learning_rate": 4.905165031704584e-06, "loss": 0.0461, "step": 27096 }, { "epoch": 3.2132100083007233, "grad_norm": 0.5048711979939895, "learning_rate": 4.903737128990907e-06, "loss": 0.0271, "step": 27097 }, { "epoch": 3.213328590062848, "grad_norm": 0.7317280229295835, "learning_rate": 4.902309411540118e-06, "loss": 0.0451, "step": 27098 }, { "epoch": 3.2134471718249733, "grad_norm": 0.6032619348672169, "learning_rate": 4.900881879365404e-06, "loss": 0.0302, "step": 27099 }, { "epoch": 3.2135657535870985, "grad_norm": 0.3661808598821854, "learning_rate": 4.899454532479908e-06, "loss": 0.0149, "step": 27100 }, { "epoch": 3.2136843353492233, "grad_norm": 0.5011063889781777, "learning_rate": 4.898027370896788e-06, "loss": 0.0265, "step": 27101 }, { "epoch": 3.2138029171113485, "grad_norm": 0.6382682593000024, "learning_rate": 4.8966003946292085e-06, "loss": 0.0369, "step": 27102 }, { "epoch": 3.2139214988734732, "grad_norm": 0.7181157597680852, "learning_rate": 4.89517360369032e-06, "loss": 0.0373, "step": 27103 }, { "epoch": 3.2140400806355984, "grad_norm": 0.706273821929243, "learning_rate": 4.893746998093282e-06, "loss": 0.0372, "step": 27104 }, { "epoch": 3.214158662397723, "grad_norm": 0.3861241538474286, "learning_rate": 4.8923205778512335e-06, "loss": 0.0235, "step": 27105 }, { "epoch": 3.2142772441598484, "grad_norm": 0.5401576288422283, "learning_rate": 4.890894342977334e-06, "loss": 0.03, "step": 27106 }, { "epoch": 3.214395825921973, "grad_norm": 0.4343890969821429, "learning_rate": 4.889468293484728e-06, "loss": 0.0302, "step": 27107 }, { "epoch": 3.2145144076840984, "grad_norm": 0.46471599184794277, "learning_rate": 4.8880424293865715e-06, "loss": 0.0183, "step": 27108 }, { "epoch": 3.214632989446223, "grad_norm": 0.4021538951870774, "learning_rate": 4.886616750695991e-06, "loss": 0.0172, "step": 27109 }, { "epoch": 3.2147515712083483, "grad_norm": 0.5637703410099437, "learning_rate": 4.885191257426142e-06, "loss": 0.034, "step": 27110 }, { "epoch": 3.214870152970473, "grad_norm": 0.4348594010007127, "learning_rate": 4.883765949590163e-06, "loss": 0.0253, "step": 27111 }, { "epoch": 3.2149887347325983, "grad_norm": 0.34667848526763956, "learning_rate": 4.882340827201193e-06, "loss": 0.0191, "step": 27112 }, { "epoch": 3.215107316494723, "grad_norm": 0.5809350441624331, "learning_rate": 4.880915890272372e-06, "loss": 0.0261, "step": 27113 }, { "epoch": 3.2152258982568482, "grad_norm": 0.9528832511135107, "learning_rate": 4.879491138816833e-06, "loss": 0.0442, "step": 27114 }, { "epoch": 3.215344480018973, "grad_norm": 0.3868807965045889, "learning_rate": 4.87806657284772e-06, "loss": 0.0114, "step": 27115 }, { "epoch": 3.215463061781098, "grad_norm": 0.43152103354340143, "learning_rate": 4.87664219237815e-06, "loss": 0.0229, "step": 27116 }, { "epoch": 3.215581643543223, "grad_norm": 0.6712397869178642, "learning_rate": 4.875217997421264e-06, "loss": 0.0439, "step": 27117 }, { "epoch": 3.215700225305348, "grad_norm": 0.362022499431157, "learning_rate": 4.8737939879901884e-06, "loss": 0.0212, "step": 27118 }, { "epoch": 3.215818807067473, "grad_norm": 0.326404774477491, "learning_rate": 4.872370164098058e-06, "loss": 0.019, "step": 27119 }, { "epoch": 3.215937388829598, "grad_norm": 0.46732394697785645, "learning_rate": 4.870946525757988e-06, "loss": 0.0182, "step": 27120 }, { "epoch": 3.216055970591723, "grad_norm": 0.6140216446254606, "learning_rate": 4.869523072983107e-06, "loss": 0.0476, "step": 27121 }, { "epoch": 3.216174552353848, "grad_norm": 0.6480079462522715, "learning_rate": 4.868099805786535e-06, "loss": 0.0341, "step": 27122 }, { "epoch": 3.216293134115973, "grad_norm": 0.43391969400473984, "learning_rate": 4.866676724181396e-06, "loss": 0.021, "step": 27123 }, { "epoch": 3.216411715878098, "grad_norm": 0.5521883071652235, "learning_rate": 4.865253828180816e-06, "loss": 0.0281, "step": 27124 }, { "epoch": 3.216530297640223, "grad_norm": 0.4393152817037006, "learning_rate": 4.863831117797893e-06, "loss": 0.0286, "step": 27125 }, { "epoch": 3.216648879402348, "grad_norm": 0.5039165618084218, "learning_rate": 4.862408593045764e-06, "loss": 0.028, "step": 27126 }, { "epoch": 3.2167674611644728, "grad_norm": 0.9214251305383692, "learning_rate": 4.86098625393753e-06, "loss": 0.0526, "step": 27127 }, { "epoch": 3.216886042926598, "grad_norm": 0.373713198876367, "learning_rate": 4.859564100486305e-06, "loss": 0.0176, "step": 27128 }, { "epoch": 3.2170046246887227, "grad_norm": 0.5630233497742907, "learning_rate": 4.858142132705204e-06, "loss": 0.0243, "step": 27129 }, { "epoch": 3.217123206450848, "grad_norm": 0.4484121259477251, "learning_rate": 4.856720350607335e-06, "loss": 0.0167, "step": 27130 }, { "epoch": 3.2172417882129727, "grad_norm": 0.4132359966213061, "learning_rate": 4.8552987542058005e-06, "loss": 0.0245, "step": 27131 }, { "epoch": 3.217360369975098, "grad_norm": 0.3698949788257355, "learning_rate": 4.853877343513708e-06, "loss": 0.0177, "step": 27132 }, { "epoch": 3.2174789517372226, "grad_norm": 0.5029102724569371, "learning_rate": 4.85245611854416e-06, "loss": 0.0224, "step": 27133 }, { "epoch": 3.217597533499348, "grad_norm": 0.4030284089438012, "learning_rate": 4.851035079310259e-06, "loss": 0.0285, "step": 27134 }, { "epoch": 3.2177161152614726, "grad_norm": 0.47646274945417577, "learning_rate": 4.849614225825116e-06, "loss": 0.0262, "step": 27135 }, { "epoch": 3.217834697023598, "grad_norm": 0.4590026839089797, "learning_rate": 4.848193558101804e-06, "loss": 0.0284, "step": 27136 }, { "epoch": 3.2179532787857226, "grad_norm": 0.6170639358475326, "learning_rate": 4.846773076153449e-06, "loss": 0.0225, "step": 27137 }, { "epoch": 3.2180718605478478, "grad_norm": 0.5422598124536067, "learning_rate": 4.8453527799931285e-06, "loss": 0.0279, "step": 27138 }, { "epoch": 3.2181904423099725, "grad_norm": 0.7069774808154977, "learning_rate": 4.843932669633938e-06, "loss": 0.0222, "step": 27139 }, { "epoch": 3.2183090240720977, "grad_norm": 0.5965760294677922, "learning_rate": 4.842512745088973e-06, "loss": 0.0365, "step": 27140 }, { "epoch": 3.2184276058342225, "grad_norm": 0.2759040607079294, "learning_rate": 4.841093006371319e-06, "loss": 0.0111, "step": 27141 }, { "epoch": 3.2185461875963477, "grad_norm": 0.40280837084055854, "learning_rate": 4.839673453494076e-06, "loss": 0.0189, "step": 27142 }, { "epoch": 3.2186647693584725, "grad_norm": 0.36165063826515925, "learning_rate": 4.838254086470306e-06, "loss": 0.017, "step": 27143 }, { "epoch": 3.2187833511205977, "grad_norm": 0.43430132713101016, "learning_rate": 4.836834905313126e-06, "loss": 0.0196, "step": 27144 }, { "epoch": 3.218901932882723, "grad_norm": 0.5135293199952029, "learning_rate": 4.835415910035593e-06, "loss": 0.0231, "step": 27145 }, { "epoch": 3.2190205146448476, "grad_norm": 0.3014963065494346, "learning_rate": 4.833997100650806e-06, "loss": 0.015, "step": 27146 }, { "epoch": 3.2191390964069724, "grad_norm": 0.3996442367640868, "learning_rate": 4.832578477171823e-06, "loss": 0.0222, "step": 27147 }, { "epoch": 3.2192576781690976, "grad_norm": 0.3951939941393159, "learning_rate": 4.831160039611748e-06, "loss": 0.0211, "step": 27148 }, { "epoch": 3.219376259931223, "grad_norm": 0.3872800449136801, "learning_rate": 4.829741787983638e-06, "loss": 0.0196, "step": 27149 }, { "epoch": 3.2194948416933475, "grad_norm": 0.45373269610860995, "learning_rate": 4.828323722300573e-06, "loss": 0.02, "step": 27150 }, { "epoch": 3.2196134234554727, "grad_norm": 0.5716222343559536, "learning_rate": 4.826905842575629e-06, "loss": 0.0306, "step": 27151 }, { "epoch": 3.2197320052175975, "grad_norm": 0.846795912393875, "learning_rate": 4.825488148821877e-06, "loss": 0.0595, "step": 27152 }, { "epoch": 3.2198505869797227, "grad_norm": 0.4850570304323358, "learning_rate": 4.824070641052389e-06, "loss": 0.0252, "step": 27153 }, { "epoch": 3.2199691687418475, "grad_norm": 8.9272579282371, "learning_rate": 4.822653319280216e-06, "loss": 0.0186, "step": 27154 }, { "epoch": 3.2200877505039727, "grad_norm": 0.47755910618699005, "learning_rate": 4.821236183518449e-06, "loss": 0.0279, "step": 27155 }, { "epoch": 3.2202063322660974, "grad_norm": 0.4100536017443718, "learning_rate": 4.819819233780135e-06, "loss": 0.0161, "step": 27156 }, { "epoch": 3.2203249140282226, "grad_norm": 0.4427425726145863, "learning_rate": 4.818402470078346e-06, "loss": 0.0255, "step": 27157 }, { "epoch": 3.2204434957903474, "grad_norm": 0.4636609009081708, "learning_rate": 4.8169858924261235e-06, "loss": 0.0302, "step": 27158 }, { "epoch": 3.2205620775524726, "grad_norm": 0.26778667652964355, "learning_rate": 4.8155695008365535e-06, "loss": 0.011, "step": 27159 }, { "epoch": 3.2206806593145973, "grad_norm": 0.6961199130508007, "learning_rate": 4.8141532953226764e-06, "loss": 0.0399, "step": 27160 }, { "epoch": 3.2207992410767226, "grad_norm": 0.6196551936558504, "learning_rate": 4.812737275897553e-06, "loss": 0.0317, "step": 27161 }, { "epoch": 3.2209178228388473, "grad_norm": 0.4883468496797669, "learning_rate": 4.811321442574235e-06, "loss": 0.0281, "step": 27162 }, { "epoch": 3.2210364046009725, "grad_norm": 0.43617583432439855, "learning_rate": 4.809905795365777e-06, "loss": 0.0244, "step": 27163 }, { "epoch": 3.2211549863630973, "grad_norm": 0.35113986524343976, "learning_rate": 4.808490334285237e-06, "loss": 0.0179, "step": 27164 }, { "epoch": 3.2212735681252225, "grad_norm": 0.3626224811718582, "learning_rate": 4.807075059345647e-06, "loss": 0.0156, "step": 27165 }, { "epoch": 3.2213921498873472, "grad_norm": 0.7076252079584505, "learning_rate": 4.805659970560061e-06, "loss": 0.0424, "step": 27166 }, { "epoch": 3.2215107316494724, "grad_norm": 0.7130614216583722, "learning_rate": 4.804245067941529e-06, "loss": 0.0333, "step": 27167 }, { "epoch": 3.221629313411597, "grad_norm": 0.41116083335624093, "learning_rate": 4.802830351503098e-06, "loss": 0.0247, "step": 27168 }, { "epoch": 3.2217478951737224, "grad_norm": 0.5468125883755182, "learning_rate": 4.80141582125779e-06, "loss": 0.022, "step": 27169 }, { "epoch": 3.221866476935847, "grad_norm": 0.5473328506678831, "learning_rate": 4.800001477218674e-06, "loss": 0.0264, "step": 27170 }, { "epoch": 3.2219850586979724, "grad_norm": 0.6068111300145196, "learning_rate": 4.798587319398765e-06, "loss": 0.0282, "step": 27171 }, { "epoch": 3.222103640460097, "grad_norm": 0.38567903666739445, "learning_rate": 4.79717334781111e-06, "loss": 0.0164, "step": 27172 }, { "epoch": 3.2222222222222223, "grad_norm": 0.7611366238625927, "learning_rate": 4.795759562468741e-06, "loss": 0.035, "step": 27173 }, { "epoch": 3.222340803984347, "grad_norm": 0.6287737515709998, "learning_rate": 4.794345963384692e-06, "loss": 0.0192, "step": 27174 }, { "epoch": 3.2224593857464723, "grad_norm": 0.6116212450539225, "learning_rate": 4.792932550572002e-06, "loss": 0.0242, "step": 27175 }, { "epoch": 3.222577967508597, "grad_norm": 0.46821756156886396, "learning_rate": 4.791519324043689e-06, "loss": 0.0225, "step": 27176 }, { "epoch": 3.2226965492707222, "grad_norm": 0.5022430533376327, "learning_rate": 4.790106283812787e-06, "loss": 0.0243, "step": 27177 }, { "epoch": 3.222815131032847, "grad_norm": 0.3838372176504816, "learning_rate": 4.7886934298923214e-06, "loss": 0.022, "step": 27178 }, { "epoch": 3.222933712794972, "grad_norm": 0.6696419063302662, "learning_rate": 4.787280762295323e-06, "loss": 0.0332, "step": 27179 }, { "epoch": 3.223052294557097, "grad_norm": 0.3607563745955057, "learning_rate": 4.785868281034805e-06, "loss": 0.0146, "step": 27180 }, { "epoch": 3.223170876319222, "grad_norm": 0.48588530841065447, "learning_rate": 4.784455986123793e-06, "loss": 0.0273, "step": 27181 }, { "epoch": 3.223289458081347, "grad_norm": 0.6021044113496843, "learning_rate": 4.783043877575305e-06, "loss": 0.0285, "step": 27182 }, { "epoch": 3.223408039843472, "grad_norm": 0.492233230842435, "learning_rate": 4.781631955402363e-06, "loss": 0.0217, "step": 27183 }, { "epoch": 3.223526621605597, "grad_norm": 0.4813082936505619, "learning_rate": 4.7802202196179785e-06, "loss": 0.0278, "step": 27184 }, { "epoch": 3.223645203367722, "grad_norm": 0.3972934109123442, "learning_rate": 4.778808670235169e-06, "loss": 0.0173, "step": 27185 }, { "epoch": 3.223763785129847, "grad_norm": 0.5287712987925662, "learning_rate": 4.777397307266953e-06, "loss": 0.0233, "step": 27186 }, { "epoch": 3.223882366891972, "grad_norm": 0.6304803889652346, "learning_rate": 4.7759861307263316e-06, "loss": 0.0363, "step": 27187 }, { "epoch": 3.224000948654097, "grad_norm": 0.4729973360915809, "learning_rate": 4.7745751406263165e-06, "loss": 0.0249, "step": 27188 }, { "epoch": 3.224119530416222, "grad_norm": 0.4966293707801062, "learning_rate": 4.773164336979916e-06, "loss": 0.0293, "step": 27189 }, { "epoch": 3.2242381121783468, "grad_norm": 0.5559308855874671, "learning_rate": 4.771753719800137e-06, "loss": 0.0206, "step": 27190 }, { "epoch": 3.224356693940472, "grad_norm": 0.6513599687497477, "learning_rate": 4.770343289099988e-06, "loss": 0.0274, "step": 27191 }, { "epoch": 3.2244752757025967, "grad_norm": 0.4374826064104485, "learning_rate": 4.7689330448924545e-06, "loss": 0.0257, "step": 27192 }, { "epoch": 3.224593857464722, "grad_norm": 0.34141180696292306, "learning_rate": 4.767522987190562e-06, "loss": 0.016, "step": 27193 }, { "epoch": 3.224712439226847, "grad_norm": 0.41551493681557905, "learning_rate": 4.766113116007293e-06, "loss": 0.0171, "step": 27194 }, { "epoch": 3.224831020988972, "grad_norm": 0.579757331846841, "learning_rate": 4.764703431355652e-06, "loss": 0.0269, "step": 27195 }, { "epoch": 3.2249496027510967, "grad_norm": 0.699520498545478, "learning_rate": 4.763293933248619e-06, "loss": 0.0375, "step": 27196 }, { "epoch": 3.225068184513222, "grad_norm": 0.5656277662874142, "learning_rate": 4.761884621699214e-06, "loss": 0.0331, "step": 27197 }, { "epoch": 3.225186766275347, "grad_norm": 0.7478985043410501, "learning_rate": 4.760475496720407e-06, "loss": 0.0378, "step": 27198 }, { "epoch": 3.225305348037472, "grad_norm": 0.4541358333010349, "learning_rate": 4.759066558325198e-06, "loss": 0.0144, "step": 27199 }, { "epoch": 3.225423929799597, "grad_norm": 0.5908917204278737, "learning_rate": 4.757657806526575e-06, "loss": 0.0285, "step": 27200 }, { "epoch": 3.225542511561722, "grad_norm": 0.4268808018888554, "learning_rate": 4.756249241337524e-06, "loss": 0.0273, "step": 27201 }, { "epoch": 3.225661093323847, "grad_norm": 0.5174343959506252, "learning_rate": 4.754840862771035e-06, "loss": 0.0302, "step": 27202 }, { "epoch": 3.2257796750859717, "grad_norm": 0.4488819410253685, "learning_rate": 4.753432670840075e-06, "loss": 0.0274, "step": 27203 }, { "epoch": 3.225898256848097, "grad_norm": 0.7268380559881276, "learning_rate": 4.75202466555765e-06, "loss": 0.0317, "step": 27204 }, { "epoch": 3.2260168386102217, "grad_norm": 0.5164663497540717, "learning_rate": 4.750616846936723e-06, "loss": 0.0192, "step": 27205 }, { "epoch": 3.226135420372347, "grad_norm": 0.5448167221231031, "learning_rate": 4.749209214990283e-06, "loss": 0.0215, "step": 27206 }, { "epoch": 3.2262540021344717, "grad_norm": 0.5155822948114486, "learning_rate": 4.747801769731289e-06, "loss": 0.0244, "step": 27207 }, { "epoch": 3.226372583896597, "grad_norm": 0.5333707520888621, "learning_rate": 4.746394511172739e-06, "loss": 0.0321, "step": 27208 }, { "epoch": 3.2264911656587216, "grad_norm": 0.6479083009218652, "learning_rate": 4.744987439327589e-06, "loss": 0.0375, "step": 27209 }, { "epoch": 3.226609747420847, "grad_norm": 0.4860643838989823, "learning_rate": 4.743580554208815e-06, "loss": 0.019, "step": 27210 }, { "epoch": 3.2267283291829716, "grad_norm": 0.41854442424160354, "learning_rate": 4.742173855829391e-06, "loss": 0.0168, "step": 27211 }, { "epoch": 3.226846910945097, "grad_norm": 0.4299372891995034, "learning_rate": 4.740767344202282e-06, "loss": 0.0275, "step": 27212 }, { "epoch": 3.2269654927072216, "grad_norm": 0.7581268589265074, "learning_rate": 4.739361019340461e-06, "loss": 0.0378, "step": 27213 }, { "epoch": 3.2270840744693468, "grad_norm": 0.4823390743884749, "learning_rate": 4.73795488125687e-06, "loss": 0.0276, "step": 27214 }, { "epoch": 3.2272026562314715, "grad_norm": 0.5149344518590976, "learning_rate": 4.736548929964505e-06, "loss": 0.0249, "step": 27215 }, { "epoch": 3.2273212379935967, "grad_norm": 0.6881579566905157, "learning_rate": 4.735143165476302e-06, "loss": 0.0247, "step": 27216 }, { "epoch": 3.2274398197557215, "grad_norm": 0.7089730601168054, "learning_rate": 4.733737587805235e-06, "loss": 0.0398, "step": 27217 }, { "epoch": 3.2275584015178467, "grad_norm": 0.5598952929888358, "learning_rate": 4.732332196964243e-06, "loss": 0.0208, "step": 27218 }, { "epoch": 3.2276769832799714, "grad_norm": 0.6215132880781084, "learning_rate": 4.730926992966306e-06, "loss": 0.0331, "step": 27219 }, { "epoch": 3.2277955650420966, "grad_norm": 0.6885247839542904, "learning_rate": 4.729521975824361e-06, "loss": 0.038, "step": 27220 }, { "epoch": 3.2279141468042214, "grad_norm": 0.8884847774825126, "learning_rate": 4.728117145551364e-06, "loss": 0.0454, "step": 27221 }, { "epoch": 3.2280327285663466, "grad_norm": 0.4914453375744442, "learning_rate": 4.726712502160271e-06, "loss": 0.0324, "step": 27222 }, { "epoch": 3.2281513103284714, "grad_norm": 0.3346655981494968, "learning_rate": 4.725308045664026e-06, "loss": 0.0163, "step": 27223 }, { "epoch": 3.2282698920905966, "grad_norm": 0.5529492898912853, "learning_rate": 4.723903776075586e-06, "loss": 0.0276, "step": 27224 }, { "epoch": 3.2283884738527213, "grad_norm": 0.743271418933701, "learning_rate": 4.722499693407883e-06, "loss": 0.0362, "step": 27225 }, { "epoch": 3.2285070556148465, "grad_norm": 0.7114762183776034, "learning_rate": 4.721095797673866e-06, "loss": 0.0309, "step": 27226 }, { "epoch": 3.2286256373769713, "grad_norm": 0.35137482278333276, "learning_rate": 4.719692088886477e-06, "loss": 0.0159, "step": 27227 }, { "epoch": 3.2287442191390965, "grad_norm": 0.4726193434232054, "learning_rate": 4.7182885670586654e-06, "loss": 0.0295, "step": 27228 }, { "epoch": 3.2288628009012212, "grad_norm": 0.4085488101353301, "learning_rate": 4.71688523220335e-06, "loss": 0.0219, "step": 27229 }, { "epoch": 3.2289813826633464, "grad_norm": 0.3365209878045844, "learning_rate": 4.715482084333492e-06, "loss": 0.0156, "step": 27230 }, { "epoch": 3.229099964425471, "grad_norm": 0.3277080834762484, "learning_rate": 4.7140791234620066e-06, "loss": 0.0133, "step": 27231 }, { "epoch": 3.2292185461875964, "grad_norm": 0.5837757441141516, "learning_rate": 4.712676349601838e-06, "loss": 0.0173, "step": 27232 }, { "epoch": 3.229337127949721, "grad_norm": 1.3201098765321355, "learning_rate": 4.711273762765914e-06, "loss": 0.0469, "step": 27233 }, { "epoch": 3.2294557097118464, "grad_norm": 0.43680062058148034, "learning_rate": 4.7098713629671665e-06, "loss": 0.0177, "step": 27234 }, { "epoch": 3.229574291473971, "grad_norm": 0.42391958258498086, "learning_rate": 4.708469150218531e-06, "loss": 0.0256, "step": 27235 }, { "epoch": 3.2296928732360963, "grad_norm": 0.49478193726192476, "learning_rate": 4.707067124532918e-06, "loss": 0.019, "step": 27236 }, { "epoch": 3.229811454998221, "grad_norm": 0.4059518878359518, "learning_rate": 4.705665285923263e-06, "loss": 0.0188, "step": 27237 }, { "epoch": 3.2299300367603463, "grad_norm": 0.30795073622635466, "learning_rate": 4.704263634402489e-06, "loss": 0.0162, "step": 27238 }, { "epoch": 3.230048618522471, "grad_norm": 0.6678603193228485, "learning_rate": 4.702862169983513e-06, "loss": 0.0268, "step": 27239 }, { "epoch": 3.2301672002845963, "grad_norm": 0.3355299364201921, "learning_rate": 4.70146089267926e-06, "loss": 0.0164, "step": 27240 }, { "epoch": 3.230285782046721, "grad_norm": 0.73707987109389, "learning_rate": 4.700059802502646e-06, "loss": 0.0422, "step": 27241 }, { "epoch": 3.230404363808846, "grad_norm": 0.7557841445410667, "learning_rate": 4.698658899466593e-06, "loss": 0.0361, "step": 27242 }, { "epoch": 3.2305229455709714, "grad_norm": 0.6338994056629211, "learning_rate": 4.6972581835840025e-06, "loss": 0.0367, "step": 27243 }, { "epoch": 3.230641527333096, "grad_norm": 0.6031666777382958, "learning_rate": 4.695857654867794e-06, "loss": 0.033, "step": 27244 }, { "epoch": 3.230760109095221, "grad_norm": 1.0973068580788656, "learning_rate": 4.69445731333088e-06, "loss": 0.0859, "step": 27245 }, { "epoch": 3.230878690857346, "grad_norm": 0.8569519777573233, "learning_rate": 4.693057158986177e-06, "loss": 0.041, "step": 27246 }, { "epoch": 3.2309972726194713, "grad_norm": 0.5398864517867409, "learning_rate": 4.691657191846577e-06, "loss": 0.0276, "step": 27247 }, { "epoch": 3.231115854381596, "grad_norm": 0.5714824562462999, "learning_rate": 4.690257411924995e-06, "loss": 0.0307, "step": 27248 }, { "epoch": 3.2312344361437213, "grad_norm": 0.47702098789470504, "learning_rate": 4.688857819234336e-06, "loss": 0.0242, "step": 27249 }, { "epoch": 3.231353017905846, "grad_norm": 0.4413578491803499, "learning_rate": 4.687458413787499e-06, "loss": 0.0244, "step": 27250 }, { "epoch": 3.2314715996679713, "grad_norm": 0.54266868645611, "learning_rate": 4.686059195597392e-06, "loss": 0.0292, "step": 27251 }, { "epoch": 3.231590181430096, "grad_norm": 0.6224659353968609, "learning_rate": 4.684660164676896e-06, "loss": 0.023, "step": 27252 }, { "epoch": 3.2317087631922212, "grad_norm": 0.40294754485153766, "learning_rate": 4.683261321038934e-06, "loss": 0.0182, "step": 27253 }, { "epoch": 3.231827344954346, "grad_norm": 0.8028299641802137, "learning_rate": 4.681862664696382e-06, "loss": 0.049, "step": 27254 }, { "epoch": 3.231945926716471, "grad_norm": 0.5154538989852842, "learning_rate": 4.68046419566214e-06, "loss": 0.0209, "step": 27255 }, { "epoch": 3.232064508478596, "grad_norm": 0.5608194042542419, "learning_rate": 4.679065913949101e-06, "loss": 0.0239, "step": 27256 }, { "epoch": 3.232183090240721, "grad_norm": 0.5925360250346736, "learning_rate": 4.677667819570164e-06, "loss": 0.0291, "step": 27257 }, { "epoch": 3.232301672002846, "grad_norm": 0.3434359106587691, "learning_rate": 4.6762699125382e-06, "loss": 0.0192, "step": 27258 }, { "epoch": 3.232420253764971, "grad_norm": 0.6951223406458682, "learning_rate": 4.674872192866106e-06, "loss": 0.0342, "step": 27259 }, { "epoch": 3.232538835527096, "grad_norm": 0.4116545053717428, "learning_rate": 4.673474660566765e-06, "loss": 0.0173, "step": 27260 }, { "epoch": 3.232657417289221, "grad_norm": 0.38910162068966403, "learning_rate": 4.672077315653062e-06, "loss": 0.0177, "step": 27261 }, { "epoch": 3.232775999051346, "grad_norm": 0.7106965114560032, "learning_rate": 4.670680158137885e-06, "loss": 0.0501, "step": 27262 }, { "epoch": 3.232894580813471, "grad_norm": 0.5129262747663298, "learning_rate": 4.669283188034093e-06, "loss": 0.0249, "step": 27263 }, { "epoch": 3.233013162575596, "grad_norm": 0.6184323269722121, "learning_rate": 4.667886405354594e-06, "loss": 0.0418, "step": 27264 }, { "epoch": 3.233131744337721, "grad_norm": 0.6341111404147647, "learning_rate": 4.666489810112243e-06, "loss": 0.0313, "step": 27265 }, { "epoch": 3.2332503260998458, "grad_norm": 0.6056428846621508, "learning_rate": 4.665093402319925e-06, "loss": 0.0328, "step": 27266 }, { "epoch": 3.233368907861971, "grad_norm": 0.8269813691422123, "learning_rate": 4.663697181990501e-06, "loss": 0.0344, "step": 27267 }, { "epoch": 3.2334874896240957, "grad_norm": 0.42553873655514707, "learning_rate": 4.66230114913686e-06, "loss": 0.0282, "step": 27268 }, { "epoch": 3.233606071386221, "grad_norm": 0.9230575845232698, "learning_rate": 4.660905303771859e-06, "loss": 0.0417, "step": 27269 }, { "epoch": 3.2337246531483457, "grad_norm": 0.39540061546662736, "learning_rate": 4.659509645908367e-06, "loss": 0.0197, "step": 27270 }, { "epoch": 3.233843234910471, "grad_norm": 0.8169935928637869, "learning_rate": 4.658114175559256e-06, "loss": 0.0586, "step": 27271 }, { "epoch": 3.2339618166725956, "grad_norm": 0.5817890802245697, "learning_rate": 4.656718892737383e-06, "loss": 0.0236, "step": 27272 }, { "epoch": 3.234080398434721, "grad_norm": 0.5273790105393465, "learning_rate": 4.655323797455624e-06, "loss": 0.0245, "step": 27273 }, { "epoch": 3.2341989801968456, "grad_norm": 0.5843239134491043, "learning_rate": 4.653928889726819e-06, "loss": 0.0284, "step": 27274 }, { "epoch": 3.234317561958971, "grad_norm": 0.4182482136212162, "learning_rate": 4.652534169563852e-06, "loss": 0.0262, "step": 27275 }, { "epoch": 3.2344361437210956, "grad_norm": 0.5094863693057446, "learning_rate": 4.651139636979563e-06, "loss": 0.0232, "step": 27276 }, { "epoch": 3.2345547254832208, "grad_norm": 0.3152121986058886, "learning_rate": 4.649745291986818e-06, "loss": 0.014, "step": 27277 }, { "epoch": 3.2346733072453455, "grad_norm": 0.7273817856099339, "learning_rate": 4.648351134598455e-06, "loss": 0.0247, "step": 27278 }, { "epoch": 3.2347918890074707, "grad_norm": 0.33676065867603083, "learning_rate": 4.646957164827348e-06, "loss": 0.0153, "step": 27279 }, { "epoch": 3.2349104707695955, "grad_norm": 0.3596976961187567, "learning_rate": 4.6455633826863326e-06, "loss": 0.0119, "step": 27280 }, { "epoch": 3.2350290525317207, "grad_norm": 0.5222262561324078, "learning_rate": 4.644169788188263e-06, "loss": 0.0272, "step": 27281 }, { "epoch": 3.2351476342938454, "grad_norm": 0.7135090957657931, "learning_rate": 4.642776381345987e-06, "loss": 0.0377, "step": 27282 }, { "epoch": 3.2352662160559706, "grad_norm": 0.401714610730151, "learning_rate": 4.641383162172347e-06, "loss": 0.0143, "step": 27283 }, { "epoch": 3.2353847978180954, "grad_norm": 0.46142739701940166, "learning_rate": 4.639990130680197e-06, "loss": 0.0216, "step": 27284 }, { "epoch": 3.2355033795802206, "grad_norm": 0.2994213547780573, "learning_rate": 4.638597286882354e-06, "loss": 0.017, "step": 27285 }, { "epoch": 3.2356219613423454, "grad_norm": 0.5836146720479091, "learning_rate": 4.6372046307916915e-06, "loss": 0.0306, "step": 27286 }, { "epoch": 3.2357405431044706, "grad_norm": 0.6575671371037373, "learning_rate": 4.635812162421024e-06, "loss": 0.0305, "step": 27287 }, { "epoch": 3.2358591248665953, "grad_norm": 0.5715480045699568, "learning_rate": 4.634419881783198e-06, "loss": 0.0307, "step": 27288 }, { "epoch": 3.2359777066287205, "grad_norm": 0.3954912849445684, "learning_rate": 4.633027788891045e-06, "loss": 0.0191, "step": 27289 }, { "epoch": 3.2360962883908453, "grad_norm": 0.4873538680900388, "learning_rate": 4.631635883757401e-06, "loss": 0.0256, "step": 27290 }, { "epoch": 3.2362148701529705, "grad_norm": 0.3966020594275511, "learning_rate": 4.630244166395103e-06, "loss": 0.0147, "step": 27291 }, { "epoch": 3.2363334519150957, "grad_norm": 0.40710454337668217, "learning_rate": 4.628852636816969e-06, "loss": 0.0209, "step": 27292 }, { "epoch": 3.2364520336772205, "grad_norm": 0.5272838559903921, "learning_rate": 4.627461295035835e-06, "loss": 0.0227, "step": 27293 }, { "epoch": 3.236570615439345, "grad_norm": 0.806128464936843, "learning_rate": 4.62607014106452e-06, "loss": 0.0504, "step": 27294 }, { "epoch": 3.2366891972014704, "grad_norm": 0.476165796579598, "learning_rate": 4.624679174915864e-06, "loss": 0.0241, "step": 27295 }, { "epoch": 3.2368077789635956, "grad_norm": 0.4484873541252045, "learning_rate": 4.623288396602673e-06, "loss": 0.0216, "step": 27296 }, { "epoch": 3.2369263607257204, "grad_norm": 0.4577438470960431, "learning_rate": 4.621897806137776e-06, "loss": 0.0211, "step": 27297 }, { "epoch": 3.237044942487845, "grad_norm": 0.4445869190818124, "learning_rate": 4.6205074035339915e-06, "loss": 0.0245, "step": 27298 }, { "epoch": 3.2371635242499703, "grad_norm": 0.6016173955031497, "learning_rate": 4.619117188804137e-06, "loss": 0.0278, "step": 27299 }, { "epoch": 3.2372821060120955, "grad_norm": 0.5010630120937563, "learning_rate": 4.6177271619610305e-06, "loss": 0.0283, "step": 27300 }, { "epoch": 3.2374006877742203, "grad_norm": 0.3388603095386597, "learning_rate": 4.616337323017484e-06, "loss": 0.0169, "step": 27301 }, { "epoch": 3.2375192695363455, "grad_norm": 0.6033642240666882, "learning_rate": 4.614947671986319e-06, "loss": 0.0345, "step": 27302 }, { "epoch": 3.2376378512984703, "grad_norm": 0.47106580384936847, "learning_rate": 4.613558208880331e-06, "loss": 0.0229, "step": 27303 }, { "epoch": 3.2377564330605955, "grad_norm": 0.5632848076030716, "learning_rate": 4.612168933712338e-06, "loss": 0.0296, "step": 27304 }, { "epoch": 3.23787501482272, "grad_norm": 0.533806084203638, "learning_rate": 4.610779846495145e-06, "loss": 0.0271, "step": 27305 }, { "epoch": 3.2379935965848454, "grad_norm": 0.8057453971406728, "learning_rate": 4.609390947241565e-06, "loss": 0.0479, "step": 27306 }, { "epoch": 3.23811217834697, "grad_norm": 1.1889861441133327, "learning_rate": 4.608002235964392e-06, "loss": 0.0742, "step": 27307 }, { "epoch": 3.2382307601090954, "grad_norm": 0.3645307030764082, "learning_rate": 4.60661371267643e-06, "loss": 0.0196, "step": 27308 }, { "epoch": 3.23834934187122, "grad_norm": 0.6487430304160718, "learning_rate": 4.605225377390482e-06, "loss": 0.0257, "step": 27309 }, { "epoch": 3.2384679236333453, "grad_norm": 0.5909204472800735, "learning_rate": 4.603837230119346e-06, "loss": 0.033, "step": 27310 }, { "epoch": 3.23858650539547, "grad_norm": 0.6163002612858551, "learning_rate": 4.60244927087582e-06, "loss": 0.0321, "step": 27311 }, { "epoch": 3.2387050871575953, "grad_norm": 0.4958752890752176, "learning_rate": 4.601061499672698e-06, "loss": 0.0252, "step": 27312 }, { "epoch": 3.23882366891972, "grad_norm": 0.322413057849458, "learning_rate": 4.599673916522781e-06, "loss": 0.0118, "step": 27313 }, { "epoch": 3.2389422506818453, "grad_norm": 0.5794683131841242, "learning_rate": 4.598286521438846e-06, "loss": 0.0295, "step": 27314 }, { "epoch": 3.23906083244397, "grad_norm": 0.3254165354660108, "learning_rate": 4.596899314433695e-06, "loss": 0.018, "step": 27315 }, { "epoch": 3.2391794142060952, "grad_norm": 0.5201244613664262, "learning_rate": 4.5955122955201076e-06, "loss": 0.0298, "step": 27316 }, { "epoch": 3.23929799596822, "grad_norm": 0.8370913953662976, "learning_rate": 4.594125464710886e-06, "loss": 0.0512, "step": 27317 }, { "epoch": 3.239416577730345, "grad_norm": 0.4324646141044177, "learning_rate": 4.592738822018794e-06, "loss": 0.0277, "step": 27318 }, { "epoch": 3.23953515949247, "grad_norm": 0.49818732220943585, "learning_rate": 4.591352367456628e-06, "loss": 0.0381, "step": 27319 }, { "epoch": 3.239653741254595, "grad_norm": 0.6096855063138534, "learning_rate": 4.589966101037163e-06, "loss": 0.0321, "step": 27320 }, { "epoch": 3.23977232301672, "grad_norm": 0.4185235780663556, "learning_rate": 4.588580022773184e-06, "loss": 0.023, "step": 27321 }, { "epoch": 3.239890904778845, "grad_norm": 0.7117572112539658, "learning_rate": 4.587194132677475e-06, "loss": 0.0423, "step": 27322 }, { "epoch": 3.24000948654097, "grad_norm": 0.4198776867915564, "learning_rate": 4.585808430762789e-06, "loss": 0.0196, "step": 27323 }, { "epoch": 3.240128068303095, "grad_norm": 0.49236310478793366, "learning_rate": 4.58442291704193e-06, "loss": 0.0218, "step": 27324 }, { "epoch": 3.24024665006522, "grad_norm": 0.720286817161152, "learning_rate": 4.58303759152765e-06, "loss": 0.0264, "step": 27325 }, { "epoch": 3.240365231827345, "grad_norm": 0.32826045755656735, "learning_rate": 4.581652454232727e-06, "loss": 0.0111, "step": 27326 }, { "epoch": 3.24048381358947, "grad_norm": 0.48291498865046056, "learning_rate": 4.580267505169928e-06, "loss": 0.0175, "step": 27327 }, { "epoch": 3.240602395351595, "grad_norm": 0.28362267793892687, "learning_rate": 4.578882744352028e-06, "loss": 0.0133, "step": 27328 }, { "epoch": 3.2407209771137198, "grad_norm": 0.5583341798906812, "learning_rate": 4.577498171791783e-06, "loss": 0.0235, "step": 27329 }, { "epoch": 3.240839558875845, "grad_norm": 0.4750956381021061, "learning_rate": 4.576113787501962e-06, "loss": 0.0237, "step": 27330 }, { "epoch": 3.2409581406379697, "grad_norm": 0.5637804753962082, "learning_rate": 4.574729591495324e-06, "loss": 0.0317, "step": 27331 }, { "epoch": 3.241076722400095, "grad_norm": 0.43710229275976853, "learning_rate": 4.573345583784633e-06, "loss": 0.0266, "step": 27332 }, { "epoch": 3.2411953041622197, "grad_norm": 0.6682334126310312, "learning_rate": 4.571961764382654e-06, "loss": 0.0284, "step": 27333 }, { "epoch": 3.241313885924345, "grad_norm": 0.7042824345659573, "learning_rate": 4.570578133302122e-06, "loss": 0.0475, "step": 27334 }, { "epoch": 3.2414324676864696, "grad_norm": 0.5996761122512324, "learning_rate": 4.569194690555822e-06, "loss": 0.0273, "step": 27335 }, { "epoch": 3.241551049448595, "grad_norm": 0.5098890458949079, "learning_rate": 4.567811436156485e-06, "loss": 0.0273, "step": 27336 }, { "epoch": 3.2416696312107196, "grad_norm": 0.6238107744147662, "learning_rate": 4.566428370116874e-06, "loss": 0.0263, "step": 27337 }, { "epoch": 3.241788212972845, "grad_norm": 0.46000537076243697, "learning_rate": 4.565045492449735e-06, "loss": 0.0187, "step": 27338 }, { "epoch": 3.2419067947349696, "grad_norm": 0.6176173352627707, "learning_rate": 4.563662803167815e-06, "loss": 0.0349, "step": 27339 }, { "epoch": 3.2420253764970948, "grad_norm": 0.6634864913685125, "learning_rate": 4.562280302283872e-06, "loss": 0.0376, "step": 27340 }, { "epoch": 3.2421439582592195, "grad_norm": 0.6307902602259513, "learning_rate": 4.560897989810628e-06, "loss": 0.0389, "step": 27341 }, { "epoch": 3.2422625400213447, "grad_norm": 0.4696488837713808, "learning_rate": 4.559515865760855e-06, "loss": 0.0345, "step": 27342 }, { "epoch": 3.2423811217834695, "grad_norm": 0.5894769902239515, "learning_rate": 4.558133930147273e-06, "loss": 0.0391, "step": 27343 }, { "epoch": 3.2424997035455947, "grad_norm": 0.38806531732752153, "learning_rate": 4.556752182982635e-06, "loss": 0.0248, "step": 27344 }, { "epoch": 3.24261828530772, "grad_norm": 0.5184335037237602, "learning_rate": 4.555370624279659e-06, "loss": 0.026, "step": 27345 }, { "epoch": 3.2427368670698447, "grad_norm": 0.4683361526217257, "learning_rate": 4.553989254051108e-06, "loss": 0.0222, "step": 27346 }, { "epoch": 3.2428554488319694, "grad_norm": 0.633745625212085, "learning_rate": 4.5526080723097e-06, "loss": 0.0369, "step": 27347 }, { "epoch": 3.2429740305940946, "grad_norm": 0.7835557348044226, "learning_rate": 4.551227079068169e-06, "loss": 0.0262, "step": 27348 }, { "epoch": 3.24309261235622, "grad_norm": 0.7625867256495316, "learning_rate": 4.549846274339248e-06, "loss": 0.0385, "step": 27349 }, { "epoch": 3.2432111941183446, "grad_norm": 0.4233362697504844, "learning_rate": 4.548465658135667e-06, "loss": 0.0241, "step": 27350 }, { "epoch": 3.24332977588047, "grad_norm": 0.7692601095033128, "learning_rate": 4.5470852304701575e-06, "loss": 0.0353, "step": 27351 }, { "epoch": 3.2434483576425945, "grad_norm": 0.6918448639330976, "learning_rate": 4.545704991355437e-06, "loss": 0.0534, "step": 27352 }, { "epoch": 3.2435669394047197, "grad_norm": 0.5677232084239886, "learning_rate": 4.544324940804234e-06, "loss": 0.0347, "step": 27353 }, { "epoch": 3.2436855211668445, "grad_norm": 0.8165486150843095, "learning_rate": 4.542945078829272e-06, "loss": 0.0398, "step": 27354 }, { "epoch": 3.2438041029289697, "grad_norm": 0.6523069173206353, "learning_rate": 4.541565405443274e-06, "loss": 0.0268, "step": 27355 }, { "epoch": 3.2439226846910945, "grad_norm": 0.6369354615631665, "learning_rate": 4.540185920658943e-06, "loss": 0.0301, "step": 27356 }, { "epoch": 3.2440412664532197, "grad_norm": 0.6356614147764452, "learning_rate": 4.5388066244890226e-06, "loss": 0.0362, "step": 27357 }, { "epoch": 3.2441598482153444, "grad_norm": 0.37369812507215877, "learning_rate": 4.537427516946208e-06, "loss": 0.0162, "step": 27358 }, { "epoch": 3.2442784299774696, "grad_norm": 0.5741404796001948, "learning_rate": 4.536048598043216e-06, "loss": 0.0252, "step": 27359 }, { "epoch": 3.2443970117395944, "grad_norm": 0.7944198066295567, "learning_rate": 4.534669867792763e-06, "loss": 0.0418, "step": 27360 }, { "epoch": 3.2445155935017196, "grad_norm": 0.32090548395512947, "learning_rate": 4.533291326207559e-06, "loss": 0.0139, "step": 27361 }, { "epoch": 3.2446341752638443, "grad_norm": 0.42480184792404996, "learning_rate": 4.531912973300315e-06, "loss": 0.0173, "step": 27362 }, { "epoch": 3.2447527570259695, "grad_norm": 0.4380770192852842, "learning_rate": 4.530534809083728e-06, "loss": 0.0292, "step": 27363 }, { "epoch": 3.2448713387880943, "grad_norm": 0.6234087563659148, "learning_rate": 4.529156833570508e-06, "loss": 0.0222, "step": 27364 }, { "epoch": 3.2449899205502195, "grad_norm": 0.6664669358081164, "learning_rate": 4.5277790467733585e-06, "loss": 0.0298, "step": 27365 }, { "epoch": 3.2451085023123443, "grad_norm": 0.31894014727263564, "learning_rate": 4.526401448704992e-06, "loss": 0.0209, "step": 27366 }, { "epoch": 3.2452270840744695, "grad_norm": 0.7590152596678472, "learning_rate": 4.525024039378087e-06, "loss": 0.0346, "step": 27367 }, { "epoch": 3.2453456658365942, "grad_norm": 0.565794962137157, "learning_rate": 4.523646818805355e-06, "loss": 0.0246, "step": 27368 }, { "epoch": 3.2454642475987194, "grad_norm": 0.5977421911653837, "learning_rate": 4.522269786999489e-06, "loss": 0.0255, "step": 27369 }, { "epoch": 3.245582829360844, "grad_norm": 0.48433850931694944, "learning_rate": 4.520892943973182e-06, "loss": 0.0313, "step": 27370 }, { "epoch": 3.2457014111229694, "grad_norm": 0.582899132317779, "learning_rate": 4.519516289739132e-06, "loss": 0.0281, "step": 27371 }, { "epoch": 3.245819992885094, "grad_norm": 0.5435044509677875, "learning_rate": 4.518139824310025e-06, "loss": 0.0309, "step": 27372 }, { "epoch": 3.2459385746472194, "grad_norm": 1.0326268253664608, "learning_rate": 4.516763547698558e-06, "loss": 0.0366, "step": 27373 }, { "epoch": 3.246057156409344, "grad_norm": 0.298111224145929, "learning_rate": 4.515387459917408e-06, "loss": 0.0183, "step": 27374 }, { "epoch": 3.2461757381714693, "grad_norm": 0.5111312494474423, "learning_rate": 4.514011560979267e-06, "loss": 0.0221, "step": 27375 }, { "epoch": 3.246294319933594, "grad_norm": 0.5804552797863295, "learning_rate": 4.512635850896816e-06, "loss": 0.0329, "step": 27376 }, { "epoch": 3.2464129016957193, "grad_norm": 0.5174700618264639, "learning_rate": 4.511260329682746e-06, "loss": 0.0242, "step": 27377 }, { "epoch": 3.246531483457844, "grad_norm": 0.534838604389928, "learning_rate": 4.5098849973497224e-06, "loss": 0.0295, "step": 27378 }, { "epoch": 3.2466500652199692, "grad_norm": 0.5900219704656952, "learning_rate": 4.5085098539104335e-06, "loss": 0.0321, "step": 27379 }, { "epoch": 3.246768646982094, "grad_norm": 0.5724684638199294, "learning_rate": 4.507134899377557e-06, "loss": 0.0269, "step": 27380 }, { "epoch": 3.246887228744219, "grad_norm": 0.41337218026176337, "learning_rate": 4.505760133763767e-06, "loss": 0.017, "step": 27381 }, { "epoch": 3.247005810506344, "grad_norm": 0.38169045347208264, "learning_rate": 4.504385557081742e-06, "loss": 0.0225, "step": 27382 }, { "epoch": 3.247124392268469, "grad_norm": 0.5941108042095287, "learning_rate": 4.503011169344138e-06, "loss": 0.0281, "step": 27383 }, { "epoch": 3.247242974030594, "grad_norm": 0.38735980539473686, "learning_rate": 4.501636970563644e-06, "loss": 0.0212, "step": 27384 }, { "epoch": 3.247361555792719, "grad_norm": 0.774212854758011, "learning_rate": 4.5002629607529185e-06, "loss": 0.0218, "step": 27385 }, { "epoch": 3.247480137554844, "grad_norm": 0.6301239610651268, "learning_rate": 4.498889139924628e-06, "loss": 0.036, "step": 27386 }, { "epoch": 3.247598719316969, "grad_norm": 0.2706665194422037, "learning_rate": 4.49751550809144e-06, "loss": 0.0154, "step": 27387 }, { "epoch": 3.247717301079094, "grad_norm": 0.9746812033701433, "learning_rate": 4.496142065266024e-06, "loss": 0.0369, "step": 27388 }, { "epoch": 3.247835882841219, "grad_norm": 0.3449711372556873, "learning_rate": 4.494768811461028e-06, "loss": 0.0198, "step": 27389 }, { "epoch": 3.247954464603344, "grad_norm": 0.5336246444327859, "learning_rate": 4.493395746689116e-06, "loss": 0.0286, "step": 27390 }, { "epoch": 3.248073046365469, "grad_norm": 0.473151080437943, "learning_rate": 4.492022870962953e-06, "loss": 0.0227, "step": 27391 }, { "epoch": 3.2481916281275938, "grad_norm": 0.48718309663628, "learning_rate": 4.4906501842951865e-06, "loss": 0.0193, "step": 27392 }, { "epoch": 3.248310209889719, "grad_norm": 0.40622114310778723, "learning_rate": 4.489277686698485e-06, "loss": 0.0219, "step": 27393 }, { "epoch": 3.248428791651844, "grad_norm": 0.9225840947491843, "learning_rate": 4.487905378185475e-06, "loss": 0.0614, "step": 27394 }, { "epoch": 3.248547373413969, "grad_norm": 0.6879308011312439, "learning_rate": 4.486533258768838e-06, "loss": 0.0324, "step": 27395 }, { "epoch": 3.2486659551760937, "grad_norm": 0.43505714360855335, "learning_rate": 4.4851613284612045e-06, "loss": 0.022, "step": 27396 }, { "epoch": 3.248784536938219, "grad_norm": 0.808959178777857, "learning_rate": 4.483789587275225e-06, "loss": 0.0436, "step": 27397 }, { "epoch": 3.248903118700344, "grad_norm": 0.4437904808254502, "learning_rate": 4.4824180352235455e-06, "loss": 0.0219, "step": 27398 }, { "epoch": 3.249021700462469, "grad_norm": 0.35209507786537586, "learning_rate": 4.481046672318815e-06, "loss": 0.0202, "step": 27399 }, { "epoch": 3.249140282224594, "grad_norm": 0.5583964971034012, "learning_rate": 4.479675498573676e-06, "loss": 0.0307, "step": 27400 }, { "epoch": 3.249258863986719, "grad_norm": 0.643103234627052, "learning_rate": 4.478304514000753e-06, "loss": 0.0345, "step": 27401 }, { "epoch": 3.249377445748844, "grad_norm": 0.39678451373725615, "learning_rate": 4.476933718612708e-06, "loss": 0.0246, "step": 27402 }, { "epoch": 3.2494960275109688, "grad_norm": 0.6052258564524259, "learning_rate": 4.475563112422163e-06, "loss": 0.0353, "step": 27403 }, { "epoch": 3.249614609273094, "grad_norm": 0.4866823179577112, "learning_rate": 4.474192695441765e-06, "loss": 0.0212, "step": 27404 }, { "epoch": 3.2497331910352187, "grad_norm": 0.4665782514470352, "learning_rate": 4.472822467684123e-06, "loss": 0.0182, "step": 27405 }, { "epoch": 3.249851772797344, "grad_norm": 0.5149479347549677, "learning_rate": 4.4714524291619015e-06, "loss": 0.0258, "step": 27406 }, { "epoch": 3.2499703545594687, "grad_norm": 0.6652262508351136, "learning_rate": 4.470082579887708e-06, "loss": 0.0354, "step": 27407 }, { "epoch": 3.250088936321594, "grad_norm": 0.9807370166508604, "learning_rate": 4.468712919874177e-06, "loss": 0.0443, "step": 27408 }, { "epoch": 3.2502075180837187, "grad_norm": 0.5383983770005903, "learning_rate": 4.467343449133937e-06, "loss": 0.0281, "step": 27409 }, { "epoch": 3.250326099845844, "grad_norm": 0.4319105292999387, "learning_rate": 4.465974167679609e-06, "loss": 0.0245, "step": 27410 }, { "epoch": 3.2504446816079686, "grad_norm": 0.4315315469292918, "learning_rate": 4.464605075523828e-06, "loss": 0.0194, "step": 27411 }, { "epoch": 3.250563263370094, "grad_norm": 0.5511434818096692, "learning_rate": 4.463236172679192e-06, "loss": 0.0257, "step": 27412 }, { "epoch": 3.2506818451322186, "grad_norm": 0.8480545764501413, "learning_rate": 4.461867459158348e-06, "loss": 0.0361, "step": 27413 }, { "epoch": 3.250800426894344, "grad_norm": 0.5942261056552267, "learning_rate": 4.460498934973892e-06, "loss": 0.0238, "step": 27414 }, { "epoch": 3.2509190086564685, "grad_norm": 0.4146816983189422, "learning_rate": 4.459130600138459e-06, "loss": 0.0215, "step": 27415 }, { "epoch": 3.2510375904185937, "grad_norm": 0.5043980642688954, "learning_rate": 4.457762454664638e-06, "loss": 0.0257, "step": 27416 }, { "epoch": 3.2511561721807185, "grad_norm": 0.6914993442939822, "learning_rate": 4.456394498565067e-06, "loss": 0.0235, "step": 27417 }, { "epoch": 3.2512747539428437, "grad_norm": 0.3282884167784285, "learning_rate": 4.455026731852344e-06, "loss": 0.0178, "step": 27418 }, { "epoch": 3.2513933357049685, "grad_norm": 0.8944123763727835, "learning_rate": 4.453659154539077e-06, "loss": 0.0479, "step": 27419 }, { "epoch": 3.2515119174670937, "grad_norm": 0.6623683644306712, "learning_rate": 4.452291766637881e-06, "loss": 0.0358, "step": 27420 }, { "epoch": 3.2516304992292184, "grad_norm": 0.5509851969184071, "learning_rate": 4.450924568161355e-06, "loss": 0.0241, "step": 27421 }, { "epoch": 3.2517490809913436, "grad_norm": 0.6253450639534796, "learning_rate": 4.449557559122111e-06, "loss": 0.027, "step": 27422 }, { "epoch": 3.2518676627534684, "grad_norm": 0.9448586315133146, "learning_rate": 4.448190739532743e-06, "loss": 0.0455, "step": 27423 }, { "epoch": 3.2519862445155936, "grad_norm": 0.6949222107265272, "learning_rate": 4.44682410940585e-06, "loss": 0.0341, "step": 27424 }, { "epoch": 3.2521048262777184, "grad_norm": 0.42859376115520054, "learning_rate": 4.445457668754038e-06, "loss": 0.0186, "step": 27425 }, { "epoch": 3.2522234080398436, "grad_norm": 0.41494245480329633, "learning_rate": 4.444091417589904e-06, "loss": 0.0289, "step": 27426 }, { "epoch": 3.2523419898019683, "grad_norm": 0.41923482180920324, "learning_rate": 4.442725355926031e-06, "loss": 0.022, "step": 27427 }, { "epoch": 3.2524605715640935, "grad_norm": 0.8585326740464695, "learning_rate": 4.441359483775032e-06, "loss": 0.0379, "step": 27428 }, { "epoch": 3.2525791533262183, "grad_norm": 0.624664669597386, "learning_rate": 4.439993801149481e-06, "loss": 0.0376, "step": 27429 }, { "epoch": 3.2526977350883435, "grad_norm": 0.7391954138222044, "learning_rate": 4.438628308061973e-06, "loss": 0.039, "step": 27430 }, { "epoch": 3.2528163168504682, "grad_norm": 0.9892812417590744, "learning_rate": 4.437263004525103e-06, "loss": 0.0517, "step": 27431 }, { "epoch": 3.2529348986125934, "grad_norm": 0.5382487305108428, "learning_rate": 4.435897890551446e-06, "loss": 0.0234, "step": 27432 }, { "epoch": 3.253053480374718, "grad_norm": 0.7663903048397577, "learning_rate": 4.4345329661536035e-06, "loss": 0.0555, "step": 27433 }, { "epoch": 3.2531720621368434, "grad_norm": 0.5272520530046483, "learning_rate": 4.4331682313441425e-06, "loss": 0.0331, "step": 27434 }, { "epoch": 3.253290643898968, "grad_norm": 0.27865109680579014, "learning_rate": 4.431803686135647e-06, "loss": 0.0117, "step": 27435 }, { "epoch": 3.2534092256610934, "grad_norm": 0.3517553719447311, "learning_rate": 4.4304393305407e-06, "loss": 0.0197, "step": 27436 }, { "epoch": 3.253527807423218, "grad_norm": 0.5579854093445203, "learning_rate": 4.429075164571886e-06, "loss": 0.0275, "step": 27437 }, { "epoch": 3.2536463891853433, "grad_norm": 0.5180311620271351, "learning_rate": 4.427711188241765e-06, "loss": 0.0221, "step": 27438 }, { "epoch": 3.2537649709474685, "grad_norm": 0.39920967138653324, "learning_rate": 4.426347401562922e-06, "loss": 0.0187, "step": 27439 }, { "epoch": 3.2538835527095933, "grad_norm": 0.3908766073094901, "learning_rate": 4.4249838045479245e-06, "loss": 0.0265, "step": 27440 }, { "epoch": 3.254002134471718, "grad_norm": 0.453343602388618, "learning_rate": 4.423620397209346e-06, "loss": 0.021, "step": 27441 }, { "epoch": 3.2541207162338432, "grad_norm": 0.6316820210284874, "learning_rate": 4.422257179559755e-06, "loss": 0.0309, "step": 27442 }, { "epoch": 3.2542392979959684, "grad_norm": 0.48454037168230085, "learning_rate": 4.420894151611721e-06, "loss": 0.0266, "step": 27443 }, { "epoch": 3.254357879758093, "grad_norm": 0.5048845638089743, "learning_rate": 4.419531313377809e-06, "loss": 0.0252, "step": 27444 }, { "epoch": 3.254476461520218, "grad_norm": 0.49654339170126044, "learning_rate": 4.418168664870578e-06, "loss": 0.0283, "step": 27445 }, { "epoch": 3.254595043282343, "grad_norm": 0.7965128061092743, "learning_rate": 4.4168062061025914e-06, "loss": 0.0641, "step": 27446 }, { "epoch": 3.2547136250444684, "grad_norm": 0.2951391176705984, "learning_rate": 4.415443937086411e-06, "loss": 0.0166, "step": 27447 }, { "epoch": 3.254832206806593, "grad_norm": 0.8224164649781752, "learning_rate": 4.4140818578345955e-06, "loss": 0.0483, "step": 27448 }, { "epoch": 3.254950788568718, "grad_norm": 0.4924167428517313, "learning_rate": 4.4127199683597075e-06, "loss": 0.0257, "step": 27449 }, { "epoch": 3.255069370330843, "grad_norm": 0.4331850706488214, "learning_rate": 4.4113582686742844e-06, "loss": 0.0202, "step": 27450 }, { "epoch": 3.2551879520929683, "grad_norm": 0.3856871251883508, "learning_rate": 4.409996758790899e-06, "loss": 0.019, "step": 27451 }, { "epoch": 3.255306533855093, "grad_norm": 0.4880504036834003, "learning_rate": 4.408635438722092e-06, "loss": 0.0243, "step": 27452 }, { "epoch": 3.2554251156172183, "grad_norm": 0.49520720819912356, "learning_rate": 4.407274308480419e-06, "loss": 0.0218, "step": 27453 }, { "epoch": 3.255543697379343, "grad_norm": 0.5883904733659263, "learning_rate": 4.405913368078416e-06, "loss": 0.0365, "step": 27454 }, { "epoch": 3.255662279141468, "grad_norm": 0.49430067601908184, "learning_rate": 4.404552617528646e-06, "loss": 0.0227, "step": 27455 }, { "epoch": 3.255780860903593, "grad_norm": 0.2685945345705816, "learning_rate": 4.4031920568436385e-06, "loss": 0.0089, "step": 27456 }, { "epoch": 3.255899442665718, "grad_norm": 0.44645219998441327, "learning_rate": 4.4018316860359454e-06, "loss": 0.0229, "step": 27457 }, { "epoch": 3.256018024427843, "grad_norm": 0.3976671457414399, "learning_rate": 4.400471505118103e-06, "loss": 0.0215, "step": 27458 }, { "epoch": 3.256136606189968, "grad_norm": 0.43174570856963485, "learning_rate": 4.399111514102655e-06, "loss": 0.0243, "step": 27459 }, { "epoch": 3.256255187952093, "grad_norm": 0.404817289182233, "learning_rate": 4.397751713002141e-06, "loss": 0.0169, "step": 27460 }, { "epoch": 3.256373769714218, "grad_norm": 0.4575109253054868, "learning_rate": 4.39639210182908e-06, "loss": 0.0193, "step": 27461 }, { "epoch": 3.256492351476343, "grad_norm": 0.6560206630339931, "learning_rate": 4.3950326805960285e-06, "loss": 0.0334, "step": 27462 }, { "epoch": 3.256610933238468, "grad_norm": 0.4161816220360348, "learning_rate": 4.393673449315505e-06, "loss": 0.0241, "step": 27463 }, { "epoch": 3.256729515000593, "grad_norm": 0.6185252030213831, "learning_rate": 4.3923144080000475e-06, "loss": 0.0314, "step": 27464 }, { "epoch": 3.256848096762718, "grad_norm": 0.5880207902676712, "learning_rate": 4.390955556662168e-06, "loss": 0.0299, "step": 27465 }, { "epoch": 3.256966678524843, "grad_norm": 0.6805509390063731, "learning_rate": 4.389596895314418e-06, "loss": 0.0277, "step": 27466 }, { "epoch": 3.257085260286968, "grad_norm": 0.48353745638704393, "learning_rate": 4.388238423969307e-06, "loss": 0.0258, "step": 27467 }, { "epoch": 3.2572038420490927, "grad_norm": 0.6229152534666167, "learning_rate": 4.3868801426393604e-06, "loss": 0.027, "step": 27468 }, { "epoch": 3.257322423811218, "grad_norm": 0.7407065867554066, "learning_rate": 4.385522051337099e-06, "loss": 0.0503, "step": 27469 }, { "epoch": 3.2574410055733427, "grad_norm": 0.5813011194619719, "learning_rate": 4.384164150075049e-06, "loss": 0.0316, "step": 27470 }, { "epoch": 3.257559587335468, "grad_norm": 0.7116677240370645, "learning_rate": 4.3828064388657274e-06, "loss": 0.0486, "step": 27471 }, { "epoch": 3.2576781690975927, "grad_norm": 0.398604772940103, "learning_rate": 4.381448917721637e-06, "loss": 0.0209, "step": 27472 }, { "epoch": 3.257796750859718, "grad_norm": 0.4414383057302515, "learning_rate": 4.380091586655316e-06, "loss": 0.0245, "step": 27473 }, { "epoch": 3.2579153326218426, "grad_norm": 0.6814042412884624, "learning_rate": 4.378734445679258e-06, "loss": 0.0377, "step": 27474 }, { "epoch": 3.258033914383968, "grad_norm": 0.5233947258855991, "learning_rate": 4.37737749480599e-06, "loss": 0.0176, "step": 27475 }, { "epoch": 3.2581524961460926, "grad_norm": 0.600634991853613, "learning_rate": 4.376020734047995e-06, "loss": 0.0362, "step": 27476 }, { "epoch": 3.258271077908218, "grad_norm": 0.5989517079428535, "learning_rate": 4.374664163417813e-06, "loss": 0.0286, "step": 27477 }, { "epoch": 3.2583896596703426, "grad_norm": 0.3409289829043572, "learning_rate": 4.3733077829279294e-06, "loss": 0.0174, "step": 27478 }, { "epoch": 3.2585082414324678, "grad_norm": 0.5073487364764008, "learning_rate": 4.371951592590853e-06, "loss": 0.0346, "step": 27479 }, { "epoch": 3.2586268231945925, "grad_norm": 0.49208102088161193, "learning_rate": 4.370595592419088e-06, "loss": 0.0253, "step": 27480 }, { "epoch": 3.2587454049567177, "grad_norm": 0.4238932909236821, "learning_rate": 4.369239782425133e-06, "loss": 0.0228, "step": 27481 }, { "epoch": 3.2588639867188425, "grad_norm": 0.6013816888425871, "learning_rate": 4.367884162621497e-06, "loss": 0.0324, "step": 27482 }, { "epoch": 3.2589825684809677, "grad_norm": 0.559372945908543, "learning_rate": 4.366528733020653e-06, "loss": 0.0304, "step": 27483 }, { "epoch": 3.2591011502430924, "grad_norm": 0.32586191497277933, "learning_rate": 4.365173493635124e-06, "loss": 0.0148, "step": 27484 }, { "epoch": 3.2592197320052176, "grad_norm": 0.6857722350690668, "learning_rate": 4.363818444477385e-06, "loss": 0.0355, "step": 27485 }, { "epoch": 3.2593383137673424, "grad_norm": 0.34961616325701544, "learning_rate": 4.362463585559942e-06, "loss": 0.0175, "step": 27486 }, { "epoch": 3.2594568955294676, "grad_norm": 0.5161976983702874, "learning_rate": 4.361108916895265e-06, "loss": 0.0329, "step": 27487 }, { "epoch": 3.2595754772915924, "grad_norm": 0.3485586314415198, "learning_rate": 4.359754438495867e-06, "loss": 0.017, "step": 27488 }, { "epoch": 3.2596940590537176, "grad_norm": 0.8292672034950662, "learning_rate": 4.3584001503742145e-06, "loss": 0.0461, "step": 27489 }, { "epoch": 3.2598126408158423, "grad_norm": 0.46736087449348834, "learning_rate": 4.357046052542801e-06, "loss": 0.023, "step": 27490 }, { "epoch": 3.2599312225779675, "grad_norm": 0.572638981901984, "learning_rate": 4.355692145014109e-06, "loss": 0.0377, "step": 27491 }, { "epoch": 3.2600498043400927, "grad_norm": 0.9144498748028626, "learning_rate": 4.3543384278006195e-06, "loss": 0.0304, "step": 27492 }, { "epoch": 3.2601683861022175, "grad_norm": 0.4149419992445855, "learning_rate": 4.35298490091482e-06, "loss": 0.0227, "step": 27493 }, { "epoch": 3.2602869678643422, "grad_norm": 0.3696008057527235, "learning_rate": 4.351631564369171e-06, "loss": 0.0181, "step": 27494 }, { "epoch": 3.2604055496264674, "grad_norm": 0.639151818160008, "learning_rate": 4.3502784181761625e-06, "loss": 0.0234, "step": 27495 }, { "epoch": 3.2605241313885927, "grad_norm": 0.6053286782244033, "learning_rate": 4.348925462348264e-06, "loss": 0.0354, "step": 27496 }, { "epoch": 3.2606427131507174, "grad_norm": 0.4896273952442982, "learning_rate": 4.3475726968979444e-06, "loss": 0.0347, "step": 27497 }, { "epoch": 3.260761294912842, "grad_norm": 0.47705584126866163, "learning_rate": 4.346220121837682e-06, "loss": 0.0235, "step": 27498 }, { "epoch": 3.2608798766749674, "grad_norm": 0.5137712480765328, "learning_rate": 4.344867737179944e-06, "loss": 0.0273, "step": 27499 }, { "epoch": 3.2609984584370926, "grad_norm": 0.6361111562050317, "learning_rate": 4.343515542937201e-06, "loss": 0.0278, "step": 27500 }, { "epoch": 3.2611170401992173, "grad_norm": 0.4917233911965857, "learning_rate": 4.342163539121907e-06, "loss": 0.0274, "step": 27501 }, { "epoch": 3.2612356219613425, "grad_norm": 0.28640298054872043, "learning_rate": 4.340811725746535e-06, "loss": 0.0125, "step": 27502 }, { "epoch": 3.2613542037234673, "grad_norm": 0.5927389163474427, "learning_rate": 4.339460102823542e-06, "loss": 0.0428, "step": 27503 }, { "epoch": 3.2614727854855925, "grad_norm": 0.37485249297835155, "learning_rate": 4.338108670365401e-06, "loss": 0.0178, "step": 27504 }, { "epoch": 3.2615913672477173, "grad_norm": 0.6575010603286068, "learning_rate": 4.336757428384553e-06, "loss": 0.0261, "step": 27505 }, { "epoch": 3.2617099490098425, "grad_norm": 0.6771885293329892, "learning_rate": 4.335406376893461e-06, "loss": 0.0357, "step": 27506 }, { "epoch": 3.261828530771967, "grad_norm": 0.45277598299350585, "learning_rate": 4.334055515904584e-06, "loss": 0.022, "step": 27507 }, { "epoch": 3.2619471125340924, "grad_norm": 0.6142434818157242, "learning_rate": 4.332704845430371e-06, "loss": 0.0402, "step": 27508 }, { "epoch": 3.262065694296217, "grad_norm": 0.2889210235062536, "learning_rate": 4.331354365483284e-06, "loss": 0.0154, "step": 27509 }, { "epoch": 3.2621842760583424, "grad_norm": 0.5994774610292379, "learning_rate": 4.33000407607575e-06, "loss": 0.0265, "step": 27510 }, { "epoch": 3.262302857820467, "grad_norm": 0.5957955563208144, "learning_rate": 4.328653977220243e-06, "loss": 0.0426, "step": 27511 }, { "epoch": 3.2624214395825923, "grad_norm": 0.37742456867804114, "learning_rate": 4.327304068929192e-06, "loss": 0.0153, "step": 27512 }, { "epoch": 3.262540021344717, "grad_norm": 0.5092078409694742, "learning_rate": 4.325954351215047e-06, "loss": 0.033, "step": 27513 }, { "epoch": 3.2626586031068423, "grad_norm": 0.31098454367864525, "learning_rate": 4.32460482409025e-06, "loss": 0.0146, "step": 27514 }, { "epoch": 3.262777184868967, "grad_norm": 0.6873067105879418, "learning_rate": 4.323255487567252e-06, "loss": 0.0333, "step": 27515 }, { "epoch": 3.2628957666310923, "grad_norm": 0.4764696150234353, "learning_rate": 4.321906341658472e-06, "loss": 0.0258, "step": 27516 }, { "epoch": 3.263014348393217, "grad_norm": 0.4166926236052506, "learning_rate": 4.320557386376361e-06, "loss": 0.0212, "step": 27517 }, { "epoch": 3.2631329301553422, "grad_norm": 0.6177278780077081, "learning_rate": 4.319208621733353e-06, "loss": 0.0266, "step": 27518 }, { "epoch": 3.263251511917467, "grad_norm": 0.7818841848663666, "learning_rate": 4.317860047741878e-06, "loss": 0.0343, "step": 27519 }, { "epoch": 3.263370093679592, "grad_norm": 0.40615298766792934, "learning_rate": 4.31651166441438e-06, "loss": 0.0177, "step": 27520 }, { "epoch": 3.263488675441717, "grad_norm": 0.4371795398615913, "learning_rate": 4.315163471763267e-06, "loss": 0.0269, "step": 27521 }, { "epoch": 3.263607257203842, "grad_norm": 0.702712954922298, "learning_rate": 4.313815469800994e-06, "loss": 0.0377, "step": 27522 }, { "epoch": 3.263725838965967, "grad_norm": 0.6670333974296963, "learning_rate": 4.31246765853997e-06, "loss": 0.0378, "step": 27523 }, { "epoch": 3.263844420728092, "grad_norm": 0.7610393599110505, "learning_rate": 4.31112003799263e-06, "loss": 0.0401, "step": 27524 }, { "epoch": 3.263963002490217, "grad_norm": 0.7162927234035701, "learning_rate": 4.309772608171383e-06, "loss": 0.0371, "step": 27525 }, { "epoch": 3.264081584252342, "grad_norm": 0.452119410483237, "learning_rate": 4.308425369088673e-06, "loss": 0.0236, "step": 27526 }, { "epoch": 3.264200166014467, "grad_norm": 0.5573616953009053, "learning_rate": 4.307078320756899e-06, "loss": 0.0286, "step": 27527 }, { "epoch": 3.264318747776592, "grad_norm": 0.5137920545947589, "learning_rate": 4.3057314631884864e-06, "loss": 0.0297, "step": 27528 }, { "epoch": 3.264437329538717, "grad_norm": 0.5941374684870109, "learning_rate": 4.304384796395855e-06, "loss": 0.0301, "step": 27529 }, { "epoch": 3.264555911300842, "grad_norm": 0.4305857069581418, "learning_rate": 4.303038320391417e-06, "loss": 0.0216, "step": 27530 }, { "epoch": 3.2646744930629668, "grad_norm": 0.3240217776047274, "learning_rate": 4.3016920351875905e-06, "loss": 0.0163, "step": 27531 }, { "epoch": 3.264793074825092, "grad_norm": 0.3815836540893597, "learning_rate": 4.300345940796771e-06, "loss": 0.0173, "step": 27532 }, { "epoch": 3.2649116565872167, "grad_norm": 0.6611722555063648, "learning_rate": 4.299000037231391e-06, "loss": 0.0491, "step": 27533 }, { "epoch": 3.265030238349342, "grad_norm": 0.5150464550686736, "learning_rate": 4.297654324503836e-06, "loss": 0.0232, "step": 27534 }, { "epoch": 3.2651488201114667, "grad_norm": 0.6167451474387654, "learning_rate": 4.296308802626531e-06, "loss": 0.0385, "step": 27535 }, { "epoch": 3.265267401873592, "grad_norm": 0.4045712305398629, "learning_rate": 4.294963471611854e-06, "loss": 0.0196, "step": 27536 }, { "epoch": 3.2653859836357166, "grad_norm": 0.7410824147261891, "learning_rate": 4.293618331472238e-06, "loss": 0.0265, "step": 27537 }, { "epoch": 3.265504565397842, "grad_norm": 0.5506868508489816, "learning_rate": 4.292273382220063e-06, "loss": 0.0305, "step": 27538 }, { "epoch": 3.2656231471599666, "grad_norm": 0.6239591609214893, "learning_rate": 4.2909286238677345e-06, "loss": 0.0349, "step": 27539 }, { "epoch": 3.265741728922092, "grad_norm": 0.606824917008572, "learning_rate": 4.289584056427648e-06, "loss": 0.0339, "step": 27540 }, { "epoch": 3.265860310684217, "grad_norm": 0.5970452030744962, "learning_rate": 4.288239679912201e-06, "loss": 0.0301, "step": 27541 }, { "epoch": 3.2659788924463418, "grad_norm": 0.6209764553749242, "learning_rate": 4.286895494333792e-06, "loss": 0.0263, "step": 27542 }, { "epoch": 3.2660974742084665, "grad_norm": 0.312877274120093, "learning_rate": 4.285551499704793e-06, "loss": 0.0178, "step": 27543 }, { "epoch": 3.2662160559705917, "grad_norm": 0.7628541817431468, "learning_rate": 4.2842076960376205e-06, "loss": 0.0504, "step": 27544 }, { "epoch": 3.266334637732717, "grad_norm": 0.31963281968597956, "learning_rate": 4.282864083344643e-06, "loss": 0.0147, "step": 27545 }, { "epoch": 3.2664532194948417, "grad_norm": 0.4168326656487008, "learning_rate": 4.281520661638253e-06, "loss": 0.0247, "step": 27546 }, { "epoch": 3.2665718012569664, "grad_norm": 0.6579037677426373, "learning_rate": 4.280177430930835e-06, "loss": 0.0319, "step": 27547 }, { "epoch": 3.2666903830190916, "grad_norm": 0.9389573444389527, "learning_rate": 4.278834391234776e-06, "loss": 0.0401, "step": 27548 }, { "epoch": 3.266808964781217, "grad_norm": 0.4593748995723798, "learning_rate": 4.277491542562456e-06, "loss": 0.0177, "step": 27549 }, { "epoch": 3.2669275465433416, "grad_norm": 0.3132922090935393, "learning_rate": 4.276148884926249e-06, "loss": 0.0139, "step": 27550 }, { "epoch": 3.267046128305467, "grad_norm": 0.7163484651250634, "learning_rate": 4.2748064183385365e-06, "loss": 0.0361, "step": 27551 }, { "epoch": 3.2671647100675916, "grad_norm": 0.7228543479727375, "learning_rate": 4.273464142811693e-06, "loss": 0.0366, "step": 27552 }, { "epoch": 3.2672832918297168, "grad_norm": 0.5897531251084762, "learning_rate": 4.2721220583581e-06, "loss": 0.025, "step": 27553 }, { "epoch": 3.2674018735918415, "grad_norm": 0.29706812898836005, "learning_rate": 4.270780164990115e-06, "loss": 0.0143, "step": 27554 }, { "epoch": 3.2675204553539667, "grad_norm": 0.7812462879803099, "learning_rate": 4.269438462720119e-06, "loss": 0.0306, "step": 27555 }, { "epoch": 3.2676390371160915, "grad_norm": 0.576972745123371, "learning_rate": 4.2680969515604795e-06, "loss": 0.0407, "step": 27556 }, { "epoch": 3.2677576188782167, "grad_norm": 0.6879410073480865, "learning_rate": 4.266755631523561e-06, "loss": 0.0305, "step": 27557 }, { "epoch": 3.2678762006403415, "grad_norm": 0.3629331393123434, "learning_rate": 4.265414502621734e-06, "loss": 0.0102, "step": 27558 }, { "epoch": 3.2679947824024667, "grad_norm": 0.4399623409838315, "learning_rate": 4.264073564867355e-06, "loss": 0.0188, "step": 27559 }, { "epoch": 3.2681133641645914, "grad_norm": 0.34694610684396254, "learning_rate": 4.2627328182728e-06, "loss": 0.0165, "step": 27560 }, { "epoch": 3.2682319459267166, "grad_norm": 0.3658092218422071, "learning_rate": 4.261392262850408e-06, "loss": 0.0161, "step": 27561 }, { "epoch": 3.2683505276888414, "grad_norm": 0.37342700688862956, "learning_rate": 4.260051898612552e-06, "loss": 0.025, "step": 27562 }, { "epoch": 3.2684691094509666, "grad_norm": 0.31995926267479696, "learning_rate": 4.258711725571582e-06, "loss": 0.0152, "step": 27563 }, { "epoch": 3.2685876912130913, "grad_norm": 0.7619027266502364, "learning_rate": 4.257371743739863e-06, "loss": 0.0357, "step": 27564 }, { "epoch": 3.2687062729752165, "grad_norm": 0.471043609957352, "learning_rate": 4.256031953129733e-06, "loss": 0.0176, "step": 27565 }, { "epoch": 3.2688248547373413, "grad_norm": 0.46301898044629225, "learning_rate": 4.254692353753551e-06, "loss": 0.0277, "step": 27566 }, { "epoch": 3.2689434364994665, "grad_norm": 0.5217453202892884, "learning_rate": 4.253352945623665e-06, "loss": 0.0295, "step": 27567 }, { "epoch": 3.2690620182615913, "grad_norm": 0.4210009910570425, "learning_rate": 4.252013728752424e-06, "loss": 0.0184, "step": 27568 }, { "epoch": 3.2691806000237165, "grad_norm": 0.6149979361791961, "learning_rate": 4.250674703152175e-06, "loss": 0.035, "step": 27569 }, { "epoch": 3.2692991817858412, "grad_norm": 0.7278667119439104, "learning_rate": 4.249335868835258e-06, "loss": 0.0396, "step": 27570 }, { "epoch": 3.2694177635479664, "grad_norm": 0.47475326103259835, "learning_rate": 4.247997225814027e-06, "loss": 0.0258, "step": 27571 }, { "epoch": 3.269536345310091, "grad_norm": 0.557910598745053, "learning_rate": 4.246658774100804e-06, "loss": 0.026, "step": 27572 }, { "epoch": 3.2696549270722164, "grad_norm": 1.0363389612712, "learning_rate": 4.24532051370794e-06, "loss": 0.0615, "step": 27573 }, { "epoch": 3.269773508834341, "grad_norm": 0.9215530412542449, "learning_rate": 4.243982444647771e-06, "loss": 0.0262, "step": 27574 }, { "epoch": 3.2698920905964663, "grad_norm": 0.9850574648068916, "learning_rate": 4.242644566932636e-06, "loss": 0.0302, "step": 27575 }, { "epoch": 3.270010672358591, "grad_norm": 0.8337972565305137, "learning_rate": 4.241306880574858e-06, "loss": 0.0396, "step": 27576 }, { "epoch": 3.2701292541207163, "grad_norm": 0.8187755464292139, "learning_rate": 4.239969385586773e-06, "loss": 0.0462, "step": 27577 }, { "epoch": 3.270247835882841, "grad_norm": 0.5217700517336539, "learning_rate": 4.238632081980714e-06, "loss": 0.024, "step": 27578 }, { "epoch": 3.2703664176449663, "grad_norm": 0.6239463199406439, "learning_rate": 4.237294969769009e-06, "loss": 0.0319, "step": 27579 }, { "epoch": 3.270484999407091, "grad_norm": 0.8317569086336254, "learning_rate": 4.23595804896399e-06, "loss": 0.0437, "step": 27580 }, { "epoch": 3.2706035811692162, "grad_norm": 0.5151762814881445, "learning_rate": 4.234621319577961e-06, "loss": 0.0253, "step": 27581 }, { "epoch": 3.270722162931341, "grad_norm": 0.6623125651074556, "learning_rate": 4.23328478162327e-06, "loss": 0.025, "step": 27582 }, { "epoch": 3.270840744693466, "grad_norm": 0.36951341046968245, "learning_rate": 4.231948435112223e-06, "loss": 0.0242, "step": 27583 }, { "epoch": 3.270959326455591, "grad_norm": 0.6202896253154944, "learning_rate": 4.2306122800571445e-06, "loss": 0.032, "step": 27584 }, { "epoch": 3.271077908217716, "grad_norm": 0.5012170902809104, "learning_rate": 4.229276316470351e-06, "loss": 0.0287, "step": 27585 }, { "epoch": 3.271196489979841, "grad_norm": 0.7457675802822706, "learning_rate": 4.227940544364167e-06, "loss": 0.0278, "step": 27586 }, { "epoch": 3.271315071741966, "grad_norm": 0.5459139428757235, "learning_rate": 4.226604963750891e-06, "loss": 0.0224, "step": 27587 }, { "epoch": 3.271433653504091, "grad_norm": 0.6720082601202649, "learning_rate": 4.225269574642843e-06, "loss": 0.0311, "step": 27588 }, { "epoch": 3.271552235266216, "grad_norm": 0.5711726096600208, "learning_rate": 4.223934377052336e-06, "loss": 0.02, "step": 27589 }, { "epoch": 3.2716708170283413, "grad_norm": 0.6547966606921288, "learning_rate": 4.222599370991676e-06, "loss": 0.0262, "step": 27590 }, { "epoch": 3.271789398790466, "grad_norm": 0.6222067998719052, "learning_rate": 4.221264556473176e-06, "loss": 0.033, "step": 27591 }, { "epoch": 3.271907980552591, "grad_norm": 0.6685532085809174, "learning_rate": 4.219929933509123e-06, "loss": 0.0322, "step": 27592 }, { "epoch": 3.272026562314716, "grad_norm": 0.4387945476706775, "learning_rate": 4.218595502111847e-06, "loss": 0.0246, "step": 27593 }, { "epoch": 3.272145144076841, "grad_norm": 0.4534282942433627, "learning_rate": 4.217261262293631e-06, "loss": 0.024, "step": 27594 }, { "epoch": 3.272263725838966, "grad_norm": 0.42240394801791253, "learning_rate": 4.215927214066786e-06, "loss": 0.0216, "step": 27595 }, { "epoch": 3.2723823076010907, "grad_norm": 0.4102414006956543, "learning_rate": 4.214593357443594e-06, "loss": 0.0239, "step": 27596 }, { "epoch": 3.272500889363216, "grad_norm": 0.3186082529883775, "learning_rate": 4.213259692436367e-06, "loss": 0.0157, "step": 27597 }, { "epoch": 3.272619471125341, "grad_norm": 0.5640467636711263, "learning_rate": 4.211926219057402e-06, "loss": 0.03, "step": 27598 }, { "epoch": 3.272738052887466, "grad_norm": 0.42813264365961584, "learning_rate": 4.210592937318975e-06, "loss": 0.0235, "step": 27599 }, { "epoch": 3.2728566346495906, "grad_norm": 0.7178717389345427, "learning_rate": 4.209259847233396e-06, "loss": 0.0355, "step": 27600 }, { "epoch": 3.272975216411716, "grad_norm": 0.6996770513165891, "learning_rate": 4.207926948812943e-06, "loss": 0.0287, "step": 27601 }, { "epoch": 3.273093798173841, "grad_norm": 0.4316505420066711, "learning_rate": 4.206594242069911e-06, "loss": 0.017, "step": 27602 }, { "epoch": 3.273212379935966, "grad_norm": 0.7311381281792692, "learning_rate": 4.205261727016571e-06, "loss": 0.0324, "step": 27603 }, { "epoch": 3.273330961698091, "grad_norm": 0.5146609717396601, "learning_rate": 4.203929403665227e-06, "loss": 0.0247, "step": 27604 }, { "epoch": 3.2734495434602158, "grad_norm": 0.4398504643338687, "learning_rate": 4.202597272028147e-06, "loss": 0.0205, "step": 27605 }, { "epoch": 3.273568125222341, "grad_norm": 0.6550494655096323, "learning_rate": 4.201265332117618e-06, "loss": 0.0283, "step": 27606 }, { "epoch": 3.2736867069844657, "grad_norm": 0.41263978727171396, "learning_rate": 4.199933583945917e-06, "loss": 0.0199, "step": 27607 }, { "epoch": 3.273805288746591, "grad_norm": 0.31091481613516364, "learning_rate": 4.198602027525323e-06, "loss": 0.0144, "step": 27608 }, { "epoch": 3.2739238705087157, "grad_norm": 0.350973207217307, "learning_rate": 4.197270662868113e-06, "loss": 0.0202, "step": 27609 }, { "epoch": 3.274042452270841, "grad_norm": 0.6256917674559739, "learning_rate": 4.195939489986553e-06, "loss": 0.037, "step": 27610 }, { "epoch": 3.2741610340329657, "grad_norm": 0.6081137236548672, "learning_rate": 4.19460850889292e-06, "loss": 0.0243, "step": 27611 }, { "epoch": 3.274279615795091, "grad_norm": 0.5623160960057183, "learning_rate": 4.193277719599484e-06, "loss": 0.0276, "step": 27612 }, { "epoch": 3.2743981975572156, "grad_norm": 0.5532387294916726, "learning_rate": 4.191947122118517e-06, "loss": 0.0377, "step": 27613 }, { "epoch": 3.274516779319341, "grad_norm": 0.33095411037069605, "learning_rate": 4.190616716462268e-06, "loss": 0.0154, "step": 27614 }, { "epoch": 3.2746353610814656, "grad_norm": 0.4994354259666392, "learning_rate": 4.189286502643028e-06, "loss": 0.0266, "step": 27615 }, { "epoch": 3.274753942843591, "grad_norm": 0.8914823230408414, "learning_rate": 4.187956480673039e-06, "loss": 0.0513, "step": 27616 }, { "epoch": 3.2748725246057155, "grad_norm": 0.4103752090470813, "learning_rate": 4.1866266505645735e-06, "loss": 0.0164, "step": 27617 }, { "epoch": 3.2749911063678407, "grad_norm": 0.7542518861801499, "learning_rate": 4.185297012329883e-06, "loss": 0.0308, "step": 27618 }, { "epoch": 3.2751096881299655, "grad_norm": 0.5824155363166574, "learning_rate": 4.183967565981231e-06, "loss": 0.0333, "step": 27619 }, { "epoch": 3.2752282698920907, "grad_norm": 0.669132380706832, "learning_rate": 4.182638311530879e-06, "loss": 0.0369, "step": 27620 }, { "epoch": 3.2753468516542155, "grad_norm": 0.7758817493087665, "learning_rate": 4.1813092489910664e-06, "loss": 0.0336, "step": 27621 }, { "epoch": 3.2754654334163407, "grad_norm": 0.7741616706257758, "learning_rate": 4.179980378374054e-06, "loss": 0.0455, "step": 27622 }, { "epoch": 3.2755840151784654, "grad_norm": 0.9856173853623642, "learning_rate": 4.178651699692091e-06, "loss": 0.0584, "step": 27623 }, { "epoch": 3.2757025969405906, "grad_norm": 0.39190650870230215, "learning_rate": 4.1773232129574316e-06, "loss": 0.02, "step": 27624 }, { "epoch": 3.2758211787027154, "grad_norm": 0.7197819911606649, "learning_rate": 4.175994918182314e-06, "loss": 0.0346, "step": 27625 }, { "epoch": 3.2759397604648406, "grad_norm": 0.345889754627339, "learning_rate": 4.174666815378986e-06, "loss": 0.0172, "step": 27626 }, { "epoch": 3.2760583422269653, "grad_norm": 0.6524784025737468, "learning_rate": 4.17333890455969e-06, "loss": 0.0321, "step": 27627 }, { "epoch": 3.2761769239890906, "grad_norm": 0.34003757512996713, "learning_rate": 4.172011185736674e-06, "loss": 0.0167, "step": 27628 }, { "epoch": 3.2762955057512153, "grad_norm": 0.5774918414914245, "learning_rate": 4.17068365892217e-06, "loss": 0.0367, "step": 27629 }, { "epoch": 3.2764140875133405, "grad_norm": 0.6646852089335636, "learning_rate": 4.1693563241284235e-06, "loss": 0.0397, "step": 27630 }, { "epoch": 3.2765326692754653, "grad_norm": 0.5211842791073708, "learning_rate": 4.168029181367672e-06, "loss": 0.0273, "step": 27631 }, { "epoch": 3.2766512510375905, "grad_norm": 0.640270896184452, "learning_rate": 4.166702230652139e-06, "loss": 0.0348, "step": 27632 }, { "epoch": 3.2767698327997152, "grad_norm": 0.44051121513001035, "learning_rate": 4.165375471994066e-06, "loss": 0.023, "step": 27633 }, { "epoch": 3.2768884145618404, "grad_norm": 0.4818542001877877, "learning_rate": 4.164048905405679e-06, "loss": 0.0226, "step": 27634 }, { "epoch": 3.277006996323965, "grad_norm": 0.5018849161755313, "learning_rate": 4.162722530899219e-06, "loss": 0.0248, "step": 27635 }, { "epoch": 3.2771255780860904, "grad_norm": 0.8262444333590893, "learning_rate": 4.161396348486895e-06, "loss": 0.0397, "step": 27636 }, { "epoch": 3.277244159848215, "grad_norm": 0.6433167318189187, "learning_rate": 4.160070358180945e-06, "loss": 0.0414, "step": 27637 }, { "epoch": 3.2773627416103404, "grad_norm": 0.7658351411955707, "learning_rate": 4.158744559993591e-06, "loss": 0.025, "step": 27638 }, { "epoch": 3.2774813233724656, "grad_norm": 0.6407528001087418, "learning_rate": 4.157418953937056e-06, "loss": 0.0336, "step": 27639 }, { "epoch": 3.2775999051345903, "grad_norm": 0.34166358482379533, "learning_rate": 4.156093540023559e-06, "loss": 0.0175, "step": 27640 }, { "epoch": 3.277718486896715, "grad_norm": 0.5133153843082404, "learning_rate": 4.154768318265317e-06, "loss": 0.0309, "step": 27641 }, { "epoch": 3.2778370686588403, "grad_norm": 0.5465299861174026, "learning_rate": 4.153443288674558e-06, "loss": 0.0198, "step": 27642 }, { "epoch": 3.2779556504209655, "grad_norm": 0.5994331055627089, "learning_rate": 4.152118451263479e-06, "loss": 0.028, "step": 27643 }, { "epoch": 3.2780742321830902, "grad_norm": 0.6191744671876762, "learning_rate": 4.1507938060443065e-06, "loss": 0.0333, "step": 27644 }, { "epoch": 3.278192813945215, "grad_norm": 0.5706617004853547, "learning_rate": 4.149469353029245e-06, "loss": 0.0223, "step": 27645 }, { "epoch": 3.27831139570734, "grad_norm": 0.4779353368012013, "learning_rate": 4.148145092230512e-06, "loss": 0.0245, "step": 27646 }, { "epoch": 3.2784299774694654, "grad_norm": 0.5175324109791112, "learning_rate": 4.1468210236603075e-06, "loss": 0.0311, "step": 27647 }, { "epoch": 3.27854855923159, "grad_norm": 0.2831581917262618, "learning_rate": 4.145497147330843e-06, "loss": 0.0117, "step": 27648 }, { "epoch": 3.278667140993715, "grad_norm": 0.3047512035179695, "learning_rate": 4.144173463254319e-06, "loss": 0.0137, "step": 27649 }, { "epoch": 3.27878572275584, "grad_norm": 0.38621876385161175, "learning_rate": 4.142849971442941e-06, "loss": 0.0218, "step": 27650 }, { "epoch": 3.2789043045179653, "grad_norm": 0.31895243193914274, "learning_rate": 4.141526671908915e-06, "loss": 0.0161, "step": 27651 }, { "epoch": 3.27902288628009, "grad_norm": 0.5092611959870055, "learning_rate": 4.1402035646644215e-06, "loss": 0.0292, "step": 27652 }, { "epoch": 3.2791414680422153, "grad_norm": 0.5718087583146946, "learning_rate": 4.1388806497216855e-06, "loss": 0.0281, "step": 27653 }, { "epoch": 3.27926004980434, "grad_norm": 0.33576564466761255, "learning_rate": 4.1375579270928835e-06, "loss": 0.0214, "step": 27654 }, { "epoch": 3.2793786315664653, "grad_norm": 1.0016903289680932, "learning_rate": 4.136235396790211e-06, "loss": 0.0645, "step": 27655 }, { "epoch": 3.27949721332859, "grad_norm": 0.6369415869141795, "learning_rate": 4.134913058825865e-06, "loss": 0.0301, "step": 27656 }, { "epoch": 3.279615795090715, "grad_norm": 0.567381667725607, "learning_rate": 4.133590913212032e-06, "loss": 0.0255, "step": 27657 }, { "epoch": 3.27973437685284, "grad_norm": 0.49417850150946663, "learning_rate": 4.132268959960911e-06, "loss": 0.0279, "step": 27658 }, { "epoch": 3.279852958614965, "grad_norm": 0.5649575334916819, "learning_rate": 4.13094719908467e-06, "loss": 0.0215, "step": 27659 }, { "epoch": 3.27997154037709, "grad_norm": 0.6099357536340307, "learning_rate": 4.129625630595513e-06, "loss": 0.0315, "step": 27660 }, { "epoch": 3.280090122139215, "grad_norm": 0.3478587190235009, "learning_rate": 4.12830425450561e-06, "loss": 0.0185, "step": 27661 }, { "epoch": 3.28020870390134, "grad_norm": 0.35622541647046807, "learning_rate": 4.126983070827153e-06, "loss": 0.0201, "step": 27662 }, { "epoch": 3.280327285663465, "grad_norm": 0.5774038817663291, "learning_rate": 4.125662079572304e-06, "loss": 0.0268, "step": 27663 }, { "epoch": 3.28044586742559, "grad_norm": 0.37295195069442144, "learning_rate": 4.124341280753266e-06, "loss": 0.0161, "step": 27664 }, { "epoch": 3.280564449187715, "grad_norm": 0.4981730602286615, "learning_rate": 4.123020674382194e-06, "loss": 0.0192, "step": 27665 }, { "epoch": 3.28068303094984, "grad_norm": 0.42672788283114627, "learning_rate": 4.1217002604712726e-06, "loss": 0.0227, "step": 27666 }, { "epoch": 3.280801612711965, "grad_norm": 0.6283968614900276, "learning_rate": 4.1203800390326706e-06, "loss": 0.0305, "step": 27667 }, { "epoch": 3.28092019447409, "grad_norm": 0.4574462688110259, "learning_rate": 4.119060010078563e-06, "loss": 0.018, "step": 27668 }, { "epoch": 3.281038776236215, "grad_norm": 0.450768164512053, "learning_rate": 4.117740173621118e-06, "loss": 0.0229, "step": 27669 }, { "epoch": 3.2811573579983397, "grad_norm": 0.5434316316582783, "learning_rate": 4.11642052967249e-06, "loss": 0.0288, "step": 27670 }, { "epoch": 3.281275939760465, "grad_norm": 0.4318013514490227, "learning_rate": 4.115101078244871e-06, "loss": 0.0167, "step": 27671 }, { "epoch": 3.2813945215225897, "grad_norm": 0.5655376558489232, "learning_rate": 4.113781819350399e-06, "loss": 0.0262, "step": 27672 }, { "epoch": 3.281513103284715, "grad_norm": 0.42811558697762947, "learning_rate": 4.112462753001256e-06, "loss": 0.0188, "step": 27673 }, { "epoch": 3.2816316850468397, "grad_norm": 0.730944409151821, "learning_rate": 4.111143879209578e-06, "loss": 0.034, "step": 27674 }, { "epoch": 3.281750266808965, "grad_norm": 0.7838866744250385, "learning_rate": 4.109825197987549e-06, "loss": 0.0263, "step": 27675 }, { "epoch": 3.2818688485710896, "grad_norm": 0.7032740331428541, "learning_rate": 4.108506709347309e-06, "loss": 0.0354, "step": 27676 }, { "epoch": 3.281987430333215, "grad_norm": 0.5017665679043052, "learning_rate": 4.1071884133010216e-06, "loss": 0.0271, "step": 27677 }, { "epoch": 3.2821060120953396, "grad_norm": 0.585310243650561, "learning_rate": 4.105870309860832e-06, "loss": 0.0326, "step": 27678 }, { "epoch": 3.282224593857465, "grad_norm": 0.7409760221376896, "learning_rate": 4.1045523990388994e-06, "loss": 0.0383, "step": 27679 }, { "epoch": 3.2823431756195895, "grad_norm": 0.33580102363846065, "learning_rate": 4.1032346808473755e-06, "loss": 0.0113, "step": 27680 }, { "epoch": 3.2824617573817148, "grad_norm": 0.7314912141816878, "learning_rate": 4.101917155298396e-06, "loss": 0.0449, "step": 27681 }, { "epoch": 3.2825803391438395, "grad_norm": 0.5633475260571601, "learning_rate": 4.100599822404114e-06, "loss": 0.0303, "step": 27682 }, { "epoch": 3.2826989209059647, "grad_norm": 0.5191714706796204, "learning_rate": 4.099282682176669e-06, "loss": 0.0195, "step": 27683 }, { "epoch": 3.2828175026680895, "grad_norm": 0.5088797374184142, "learning_rate": 4.097965734628217e-06, "loss": 0.0304, "step": 27684 }, { "epoch": 3.2829360844302147, "grad_norm": 0.6693199761313292, "learning_rate": 4.0966489797708785e-06, "loss": 0.0247, "step": 27685 }, { "epoch": 3.2830546661923394, "grad_norm": 0.646725299698644, "learning_rate": 4.0953324176168125e-06, "loss": 0.0277, "step": 27686 }, { "epoch": 3.2831732479544646, "grad_norm": 0.578931862593603, "learning_rate": 4.094016048178143e-06, "loss": 0.0356, "step": 27687 }, { "epoch": 3.28329182971659, "grad_norm": 0.47679338082421313, "learning_rate": 4.092699871467006e-06, "loss": 0.029, "step": 27688 }, { "epoch": 3.2834104114787146, "grad_norm": 0.6708136800449935, "learning_rate": 4.09138388749554e-06, "loss": 0.0237, "step": 27689 }, { "epoch": 3.2835289932408394, "grad_norm": 0.4300764409304966, "learning_rate": 4.090068096275876e-06, "loss": 0.0186, "step": 27690 }, { "epoch": 3.2836475750029646, "grad_norm": 0.6477583030576066, "learning_rate": 4.088752497820144e-06, "loss": 0.0334, "step": 27691 }, { "epoch": 3.2837661567650898, "grad_norm": 0.5242404972754818, "learning_rate": 4.08743709214047e-06, "loss": 0.0224, "step": 27692 }, { "epoch": 3.2838847385272145, "grad_norm": 0.47599322654465215, "learning_rate": 4.086121879248978e-06, "loss": 0.0265, "step": 27693 }, { "epoch": 3.2840033202893393, "grad_norm": 0.35063322836847793, "learning_rate": 4.084806859157797e-06, "loss": 0.0172, "step": 27694 }, { "epoch": 3.2841219020514645, "grad_norm": 0.3989833100381488, "learning_rate": 4.0834920318790546e-06, "loss": 0.0238, "step": 27695 }, { "epoch": 3.2842404838135897, "grad_norm": 0.5628632165092365, "learning_rate": 4.08217739742486e-06, "loss": 0.0294, "step": 27696 }, { "epoch": 3.2843590655757144, "grad_norm": 0.5335644767132095, "learning_rate": 4.080862955807341e-06, "loss": 0.023, "step": 27697 }, { "epoch": 3.284477647337839, "grad_norm": 0.4154810777261224, "learning_rate": 4.0795487070386106e-06, "loss": 0.0196, "step": 27698 }, { "epoch": 3.2845962290999644, "grad_norm": 0.547498946325707, "learning_rate": 4.0782346511307884e-06, "loss": 0.0195, "step": 27699 }, { "epoch": 3.2847148108620896, "grad_norm": 0.6672513271205129, "learning_rate": 4.0769207880959836e-06, "loss": 0.033, "step": 27700 }, { "epoch": 3.2848333926242144, "grad_norm": 0.2737417005163801, "learning_rate": 4.075607117946314e-06, "loss": 0.011, "step": 27701 }, { "epoch": 3.2849519743863396, "grad_norm": 0.6911142906498657, "learning_rate": 4.0742936406938935e-06, "loss": 0.03, "step": 27702 }, { "epoch": 3.2850705561484643, "grad_norm": 0.4486216751229737, "learning_rate": 4.072980356350819e-06, "loss": 0.0227, "step": 27703 }, { "epoch": 3.2851891379105895, "grad_norm": 0.40685175396021567, "learning_rate": 4.071667264929202e-06, "loss": 0.0166, "step": 27704 }, { "epoch": 3.2853077196727143, "grad_norm": 0.4205754036225102, "learning_rate": 4.070354366441151e-06, "loss": 0.0167, "step": 27705 }, { "epoch": 3.2854263014348395, "grad_norm": 0.37409449204555006, "learning_rate": 4.069041660898765e-06, "loss": 0.0211, "step": 27706 }, { "epoch": 3.2855448831969642, "grad_norm": 0.717188520785088, "learning_rate": 4.067729148314153e-06, "loss": 0.0327, "step": 27707 }, { "epoch": 3.2856634649590895, "grad_norm": 0.48834564287900045, "learning_rate": 4.066416828699399e-06, "loss": 0.0216, "step": 27708 }, { "epoch": 3.285782046721214, "grad_norm": 0.40461115718298907, "learning_rate": 4.065104702066625e-06, "loss": 0.0266, "step": 27709 }, { "epoch": 3.2859006284833394, "grad_norm": 0.629382263635639, "learning_rate": 4.063792768427904e-06, "loss": 0.022, "step": 27710 }, { "epoch": 3.286019210245464, "grad_norm": 0.7323934934875286, "learning_rate": 4.062481027795348e-06, "loss": 0.0355, "step": 27711 }, { "epoch": 3.2861377920075894, "grad_norm": 0.4667073063648745, "learning_rate": 4.061169480181029e-06, "loss": 0.0229, "step": 27712 }, { "epoch": 3.286256373769714, "grad_norm": 0.4648141881135656, "learning_rate": 4.059858125597063e-06, "loss": 0.0191, "step": 27713 }, { "epoch": 3.2863749555318393, "grad_norm": 0.5525607694964968, "learning_rate": 4.05854696405552e-06, "loss": 0.0281, "step": 27714 }, { "epoch": 3.286493537293964, "grad_norm": 0.6328632388367769, "learning_rate": 4.057235995568496e-06, "loss": 0.0335, "step": 27715 }, { "epoch": 3.2866121190560893, "grad_norm": 0.5503832911816732, "learning_rate": 4.0559252201480715e-06, "loss": 0.014, "step": 27716 }, { "epoch": 3.286730700818214, "grad_norm": 0.505830722730045, "learning_rate": 4.054614637806334e-06, "loss": 0.0222, "step": 27717 }, { "epoch": 3.2868492825803393, "grad_norm": 0.5006015906164181, "learning_rate": 4.05330424855537e-06, "loss": 0.0345, "step": 27718 }, { "epoch": 3.286967864342464, "grad_norm": 0.42648946465412635, "learning_rate": 4.051994052407243e-06, "loss": 0.0188, "step": 27719 }, { "epoch": 3.287086446104589, "grad_norm": 0.6382527540482905, "learning_rate": 4.050684049374054e-06, "loss": 0.0381, "step": 27720 }, { "epoch": 3.287205027866714, "grad_norm": 0.42272266541354075, "learning_rate": 4.049374239467865e-06, "loss": 0.0239, "step": 27721 }, { "epoch": 3.287323609628839, "grad_norm": 0.7279440286170767, "learning_rate": 4.048064622700756e-06, "loss": 0.0328, "step": 27722 }, { "epoch": 3.287442191390964, "grad_norm": 0.5991969001860454, "learning_rate": 4.0467551990847894e-06, "loss": 0.0256, "step": 27723 }, { "epoch": 3.287560773153089, "grad_norm": 0.4457397271028291, "learning_rate": 4.045445968632059e-06, "loss": 0.0231, "step": 27724 }, { "epoch": 3.287679354915214, "grad_norm": 0.6154214337233967, "learning_rate": 4.044136931354611e-06, "loss": 0.0283, "step": 27725 }, { "epoch": 3.287797936677339, "grad_norm": 0.5166235935904505, "learning_rate": 4.042828087264522e-06, "loss": 0.0342, "step": 27726 }, { "epoch": 3.287916518439464, "grad_norm": 0.4985362450061206, "learning_rate": 4.041519436373861e-06, "loss": 0.024, "step": 27727 }, { "epoch": 3.288035100201589, "grad_norm": 0.4834925224755562, "learning_rate": 4.040210978694689e-06, "loss": 0.022, "step": 27728 }, { "epoch": 3.288153681963714, "grad_norm": 0.5543407393156582, "learning_rate": 4.038902714239076e-06, "loss": 0.0294, "step": 27729 }, { "epoch": 3.288272263725839, "grad_norm": 0.7141239943008958, "learning_rate": 4.037594643019063e-06, "loss": 0.0314, "step": 27730 }, { "epoch": 3.288390845487964, "grad_norm": 0.4994459019681836, "learning_rate": 4.036286765046734e-06, "loss": 0.0181, "step": 27731 }, { "epoch": 3.288509427250089, "grad_norm": 0.5381956698100948, "learning_rate": 4.034979080334128e-06, "loss": 0.0295, "step": 27732 }, { "epoch": 3.2886280090122137, "grad_norm": 0.7172920446413101, "learning_rate": 4.033671588893309e-06, "loss": 0.0296, "step": 27733 }, { "epoch": 3.288746590774339, "grad_norm": 0.4681107019976971, "learning_rate": 4.032364290736318e-06, "loss": 0.0237, "step": 27734 }, { "epoch": 3.2888651725364637, "grad_norm": 0.558171957216608, "learning_rate": 4.031057185875228e-06, "loss": 0.0389, "step": 27735 }, { "epoch": 3.288983754298589, "grad_norm": 0.5732088618497633, "learning_rate": 4.029750274322072e-06, "loss": 0.0248, "step": 27736 }, { "epoch": 3.2891023360607137, "grad_norm": 0.7193319987409266, "learning_rate": 4.0284435560889015e-06, "loss": 0.0389, "step": 27737 }, { "epoch": 3.289220917822839, "grad_norm": 0.40962269014407476, "learning_rate": 4.027137031187764e-06, "loss": 0.0231, "step": 27738 }, { "epoch": 3.2893394995849636, "grad_norm": 0.4419452349022108, "learning_rate": 4.025830699630706e-06, "loss": 0.0197, "step": 27739 }, { "epoch": 3.289458081347089, "grad_norm": 0.5482065361147989, "learning_rate": 4.024524561429771e-06, "loss": 0.0305, "step": 27740 }, { "epoch": 3.289576663109214, "grad_norm": 0.6739027269242459, "learning_rate": 4.023218616596991e-06, "loss": 0.0254, "step": 27741 }, { "epoch": 3.289695244871339, "grad_norm": 0.4378387809516163, "learning_rate": 4.02191286514442e-06, "loss": 0.0186, "step": 27742 }, { "epoch": 3.2898138266334636, "grad_norm": 0.8011710770851576, "learning_rate": 4.020607307084082e-06, "loss": 0.0367, "step": 27743 }, { "epoch": 3.2899324083955888, "grad_norm": 0.5882009870077753, "learning_rate": 4.019301942428025e-06, "loss": 0.0186, "step": 27744 }, { "epoch": 3.290050990157714, "grad_norm": 0.4588106214575373, "learning_rate": 4.017996771188265e-06, "loss": 0.0246, "step": 27745 }, { "epoch": 3.2901695719198387, "grad_norm": 0.5268251972578644, "learning_rate": 4.016691793376854e-06, "loss": 0.0333, "step": 27746 }, { "epoch": 3.2902881536819635, "grad_norm": 0.8531328819088594, "learning_rate": 4.0153870090058084e-06, "loss": 0.0417, "step": 27747 }, { "epoch": 3.2904067354440887, "grad_norm": 0.7576885192581095, "learning_rate": 4.014082418087162e-06, "loss": 0.0316, "step": 27748 }, { "epoch": 3.290525317206214, "grad_norm": 0.4792500169002475, "learning_rate": 4.0127780206329405e-06, "loss": 0.0269, "step": 27749 }, { "epoch": 3.2906438989683386, "grad_norm": 0.5309099529758844, "learning_rate": 4.01147381665517e-06, "loss": 0.0265, "step": 27750 }, { "epoch": 3.290762480730464, "grad_norm": 0.8577059490088345, "learning_rate": 4.010169806165881e-06, "loss": 0.046, "step": 27751 }, { "epoch": 3.2908810624925886, "grad_norm": 0.5787632145403245, "learning_rate": 4.0088659891770765e-06, "loss": 0.0349, "step": 27752 }, { "epoch": 3.290999644254714, "grad_norm": 0.4153987498563949, "learning_rate": 4.00756236570079e-06, "loss": 0.0252, "step": 27753 }, { "epoch": 3.2911182260168386, "grad_norm": 0.6431968422997445, "learning_rate": 4.006258935749035e-06, "loss": 0.0313, "step": 27754 }, { "epoch": 3.2912368077789638, "grad_norm": 0.36997461368760215, "learning_rate": 4.0049556993338285e-06, "loss": 0.0166, "step": 27755 }, { "epoch": 3.2913553895410885, "grad_norm": 0.6996197469297629, "learning_rate": 4.003652656467182e-06, "loss": 0.0381, "step": 27756 }, { "epoch": 3.2914739713032137, "grad_norm": 0.7554093089386799, "learning_rate": 4.0023498071611155e-06, "loss": 0.0417, "step": 27757 }, { "epoch": 3.2915925530653385, "grad_norm": 0.3956308730411375, "learning_rate": 4.001047151427636e-06, "loss": 0.0174, "step": 27758 }, { "epoch": 3.2917111348274637, "grad_norm": 0.45252887840317685, "learning_rate": 3.999744689278747e-06, "loss": 0.0201, "step": 27759 }, { "epoch": 3.2918297165895885, "grad_norm": 0.5679354043166773, "learning_rate": 3.998442420726462e-06, "loss": 0.029, "step": 27760 }, { "epoch": 3.2919482983517137, "grad_norm": 0.5241444755891376, "learning_rate": 3.9971403457827825e-06, "loss": 0.0269, "step": 27761 }, { "epoch": 3.2920668801138384, "grad_norm": 0.3271798310423574, "learning_rate": 3.99583846445972e-06, "loss": 0.0138, "step": 27762 }, { "epoch": 3.2921854618759636, "grad_norm": 0.516197758786711, "learning_rate": 3.994536776769262e-06, "loss": 0.0299, "step": 27763 }, { "epoch": 3.2923040436380884, "grad_norm": 0.7365348110517009, "learning_rate": 3.993235282723418e-06, "loss": 0.0426, "step": 27764 }, { "epoch": 3.2924226254002136, "grad_norm": 0.5844491279414128, "learning_rate": 3.991933982334184e-06, "loss": 0.0363, "step": 27765 }, { "epoch": 3.2925412071623383, "grad_norm": 0.4696012415859917, "learning_rate": 3.990632875613556e-06, "loss": 0.0287, "step": 27766 }, { "epoch": 3.2926597889244635, "grad_norm": 0.6033985244232584, "learning_rate": 3.989331962573537e-06, "loss": 0.0293, "step": 27767 }, { "epoch": 3.2927783706865883, "grad_norm": 0.6298768333840329, "learning_rate": 3.988031243226098e-06, "loss": 0.0286, "step": 27768 }, { "epoch": 3.2928969524487135, "grad_norm": 0.6358744743746384, "learning_rate": 3.986730717583259e-06, "loss": 0.027, "step": 27769 }, { "epoch": 3.2930155342108383, "grad_norm": 0.4888039476023514, "learning_rate": 3.985430385656988e-06, "loss": 0.034, "step": 27770 }, { "epoch": 3.2931341159729635, "grad_norm": 0.4992387391927069, "learning_rate": 3.984130247459278e-06, "loss": 0.0247, "step": 27771 }, { "epoch": 3.293252697735088, "grad_norm": 0.9088829784387544, "learning_rate": 3.9828303030021164e-06, "loss": 0.0336, "step": 27772 }, { "epoch": 3.2933712794972134, "grad_norm": 0.37421942171223666, "learning_rate": 3.981530552297491e-06, "loss": 0.0191, "step": 27773 }, { "epoch": 3.293489861259338, "grad_norm": 0.7045991120675945, "learning_rate": 3.980230995357375e-06, "loss": 0.0447, "step": 27774 }, { "epoch": 3.2936084430214634, "grad_norm": 0.7241157190924399, "learning_rate": 3.9789316321937525e-06, "loss": 0.0402, "step": 27775 }, { "epoch": 3.293727024783588, "grad_norm": 0.5813389589631031, "learning_rate": 3.977632462818603e-06, "loss": 0.0309, "step": 27776 }, { "epoch": 3.2938456065457133, "grad_norm": 0.37030177295754885, "learning_rate": 3.976333487243905e-06, "loss": 0.0185, "step": 27777 }, { "epoch": 3.293964188307838, "grad_norm": 0.3900694423561358, "learning_rate": 3.975034705481634e-06, "loss": 0.0225, "step": 27778 }, { "epoch": 3.2940827700699633, "grad_norm": 0.62396544176592, "learning_rate": 3.97373611754375e-06, "loss": 0.037, "step": 27779 }, { "epoch": 3.294201351832088, "grad_norm": 0.6955383114054494, "learning_rate": 3.972437723442246e-06, "loss": 0.0301, "step": 27780 }, { "epoch": 3.2943199335942133, "grad_norm": 0.6273715858948584, "learning_rate": 3.971139523189074e-06, "loss": 0.0365, "step": 27781 }, { "epoch": 3.294438515356338, "grad_norm": 0.5429033772450947, "learning_rate": 3.969841516796216e-06, "loss": 0.0289, "step": 27782 }, { "epoch": 3.2945570971184632, "grad_norm": 0.6964604741675752, "learning_rate": 3.968543704275618e-06, "loss": 0.0439, "step": 27783 }, { "epoch": 3.294675678880588, "grad_norm": 0.46652734328904405, "learning_rate": 3.967246085639268e-06, "loss": 0.0234, "step": 27784 }, { "epoch": 3.294794260642713, "grad_norm": 0.46719942972454875, "learning_rate": 3.965948660899113e-06, "loss": 0.0234, "step": 27785 }, { "epoch": 3.294912842404838, "grad_norm": 0.42637428897915847, "learning_rate": 3.964651430067115e-06, "loss": 0.0213, "step": 27786 }, { "epoch": 3.295031424166963, "grad_norm": 0.426115472914542, "learning_rate": 3.963354393155236e-06, "loss": 0.025, "step": 27787 }, { "epoch": 3.295150005929088, "grad_norm": 0.6538535922915355, "learning_rate": 3.9620575501754325e-06, "loss": 0.0266, "step": 27788 }, { "epoch": 3.295268587691213, "grad_norm": 0.6305592307742108, "learning_rate": 3.960760901139668e-06, "loss": 0.0285, "step": 27789 }, { "epoch": 3.2953871694533383, "grad_norm": 0.6139089662571792, "learning_rate": 3.959464446059874e-06, "loss": 0.0356, "step": 27790 }, { "epoch": 3.295505751215463, "grad_norm": 0.8413465565421957, "learning_rate": 3.95816818494803e-06, "loss": 0.0503, "step": 27791 }, { "epoch": 3.295624332977588, "grad_norm": 0.4996793817948496, "learning_rate": 3.956872117816063e-06, "loss": 0.0325, "step": 27792 }, { "epoch": 3.295742914739713, "grad_norm": 0.4870672201191756, "learning_rate": 3.9555762446759404e-06, "loss": 0.0139, "step": 27793 }, { "epoch": 3.2958614965018382, "grad_norm": 0.5307753689449944, "learning_rate": 3.954280565539584e-06, "loss": 0.0168, "step": 27794 }, { "epoch": 3.295980078263963, "grad_norm": 0.5463900707835186, "learning_rate": 3.952985080418964e-06, "loss": 0.0293, "step": 27795 }, { "epoch": 3.2960986600260878, "grad_norm": 0.6614333417879408, "learning_rate": 3.951689789326008e-06, "loss": 0.0347, "step": 27796 }, { "epoch": 3.296217241788213, "grad_norm": 0.4445445687185322, "learning_rate": 3.95039469227266e-06, "loss": 0.022, "step": 27797 }, { "epoch": 3.296335823550338, "grad_norm": 0.6435618245224741, "learning_rate": 3.94909978927086e-06, "loss": 0.0386, "step": 27798 }, { "epoch": 3.296454405312463, "grad_norm": 0.5546876480754466, "learning_rate": 3.947805080332545e-06, "loss": 0.0258, "step": 27799 }, { "epoch": 3.2965729870745877, "grad_norm": 0.5836845455451529, "learning_rate": 3.946510565469658e-06, "loss": 0.0324, "step": 27800 }, { "epoch": 3.296691568836713, "grad_norm": 0.787731443007729, "learning_rate": 3.945216244694114e-06, "loss": 0.04, "step": 27801 }, { "epoch": 3.296810150598838, "grad_norm": 0.8305971375565421, "learning_rate": 3.94392211801787e-06, "loss": 0.0305, "step": 27802 }, { "epoch": 3.296928732360963, "grad_norm": 0.6903739967928753, "learning_rate": 3.942628185452838e-06, "loss": 0.0259, "step": 27803 }, { "epoch": 3.297047314123088, "grad_norm": 0.5452145986691702, "learning_rate": 3.9413344470109496e-06, "loss": 0.025, "step": 27804 }, { "epoch": 3.297165895885213, "grad_norm": 0.5112625128806721, "learning_rate": 3.940040902704134e-06, "loss": 0.0173, "step": 27805 }, { "epoch": 3.297284477647338, "grad_norm": 0.4514315198237018, "learning_rate": 3.938747552544317e-06, "loss": 0.0241, "step": 27806 }, { "epoch": 3.2974030594094628, "grad_norm": 0.5590753230831595, "learning_rate": 3.937454396543428e-06, "loss": 0.027, "step": 27807 }, { "epoch": 3.297521641171588, "grad_norm": 0.6412332804049922, "learning_rate": 3.936161434713373e-06, "loss": 0.0392, "step": 27808 }, { "epoch": 3.2976402229337127, "grad_norm": 0.36464768601307296, "learning_rate": 3.93486866706608e-06, "loss": 0.014, "step": 27809 }, { "epoch": 3.297758804695838, "grad_norm": 0.39195231466537633, "learning_rate": 3.933576093613464e-06, "loss": 0.0199, "step": 27810 }, { "epoch": 3.2978773864579627, "grad_norm": 0.6918742000877832, "learning_rate": 3.932283714367452e-06, "loss": 0.0333, "step": 27811 }, { "epoch": 3.297995968220088, "grad_norm": 0.5021265549619037, "learning_rate": 3.9309915293399366e-06, "loss": 0.0242, "step": 27812 }, { "epoch": 3.2981145499822127, "grad_norm": 0.6531807208778259, "learning_rate": 3.929699538542853e-06, "loss": 0.0388, "step": 27813 }, { "epoch": 3.298233131744338, "grad_norm": 0.4642456982570778, "learning_rate": 3.9284077419880965e-06, "loss": 0.0303, "step": 27814 }, { "epoch": 3.2983517135064626, "grad_norm": 0.41791802939588896, "learning_rate": 3.927116139687581e-06, "loss": 0.0235, "step": 27815 }, { "epoch": 3.298470295268588, "grad_norm": 0.6630778768567833, "learning_rate": 3.925824731653213e-06, "loss": 0.0325, "step": 27816 }, { "epoch": 3.2985888770307126, "grad_norm": 0.6353172743539326, "learning_rate": 3.9245335178969e-06, "loss": 0.0341, "step": 27817 }, { "epoch": 3.2987074587928378, "grad_norm": 0.3027266064393657, "learning_rate": 3.923242498430546e-06, "loss": 0.0143, "step": 27818 }, { "epoch": 3.2988260405549625, "grad_norm": 0.3426885946348873, "learning_rate": 3.921951673266047e-06, "loss": 0.016, "step": 27819 }, { "epoch": 3.2989446223170877, "grad_norm": 0.685958422015653, "learning_rate": 3.920661042415305e-06, "loss": 0.0357, "step": 27820 }, { "epoch": 3.2990632040792125, "grad_norm": 0.5238278143076072, "learning_rate": 3.919370605890218e-06, "loss": 0.027, "step": 27821 }, { "epoch": 3.2991817858413377, "grad_norm": 0.6175004488135559, "learning_rate": 3.918080363702692e-06, "loss": 0.0306, "step": 27822 }, { "epoch": 3.2993003676034625, "grad_norm": 0.9648412482625215, "learning_rate": 3.916790315864605e-06, "loss": 0.0337, "step": 27823 }, { "epoch": 3.2994189493655877, "grad_norm": 0.4329454126429439, "learning_rate": 3.915500462387858e-06, "loss": 0.0235, "step": 27824 }, { "epoch": 3.2995375311277124, "grad_norm": 0.6970629368650261, "learning_rate": 3.9142108032843405e-06, "loss": 0.0338, "step": 27825 }, { "epoch": 3.2996561128898376, "grad_norm": 0.6546710137985855, "learning_rate": 3.912921338565942e-06, "loss": 0.026, "step": 27826 }, { "epoch": 3.2997746946519624, "grad_norm": 0.6519829603204154, "learning_rate": 3.91163206824455e-06, "loss": 0.0312, "step": 27827 }, { "epoch": 3.2998932764140876, "grad_norm": 0.8271313249382385, "learning_rate": 3.91034299233205e-06, "loss": 0.0505, "step": 27828 }, { "epoch": 3.3000118581762123, "grad_norm": 0.5581486851027265, "learning_rate": 3.909054110840335e-06, "loss": 0.0278, "step": 27829 }, { "epoch": 3.3001304399383375, "grad_norm": 0.589399228632936, "learning_rate": 3.907765423781268e-06, "loss": 0.0196, "step": 27830 }, { "epoch": 3.3002490217004623, "grad_norm": 0.5999171419890557, "learning_rate": 3.9064769311667385e-06, "loss": 0.0213, "step": 27831 }, { "epoch": 3.3003676034625875, "grad_norm": 0.8983944146526531, "learning_rate": 3.905188633008627e-06, "loss": 0.0387, "step": 27832 }, { "epoch": 3.3004861852247123, "grad_norm": 0.3575783770805807, "learning_rate": 3.903900529318813e-06, "loss": 0.0179, "step": 27833 }, { "epoch": 3.3006047669868375, "grad_norm": 0.557083422906829, "learning_rate": 3.9026126201091615e-06, "loss": 0.0206, "step": 27834 }, { "epoch": 3.3007233487489622, "grad_norm": 0.3488686185056428, "learning_rate": 3.901324905391551e-06, "loss": 0.0161, "step": 27835 }, { "epoch": 3.3008419305110874, "grad_norm": 0.39234119417957475, "learning_rate": 3.9000373851778485e-06, "loss": 0.0208, "step": 27836 }, { "epoch": 3.300960512273212, "grad_norm": 0.378918766959862, "learning_rate": 3.898750059479931e-06, "loss": 0.0209, "step": 27837 }, { "epoch": 3.3010790940353374, "grad_norm": 0.5886313612775618, "learning_rate": 3.897462928309667e-06, "loss": 0.0281, "step": 27838 }, { "epoch": 3.3011976757974626, "grad_norm": 0.657825216180427, "learning_rate": 3.896175991678902e-06, "loss": 0.0307, "step": 27839 }, { "epoch": 3.3013162575595874, "grad_norm": 0.4632513820508234, "learning_rate": 3.894889249599529e-06, "loss": 0.024, "step": 27840 }, { "epoch": 3.301434839321712, "grad_norm": 0.4561223298058198, "learning_rate": 3.8936027020833905e-06, "loss": 0.0256, "step": 27841 }, { "epoch": 3.3015534210838373, "grad_norm": 0.31634680519244085, "learning_rate": 3.892316349142352e-06, "loss": 0.0213, "step": 27842 }, { "epoch": 3.3016720028459625, "grad_norm": 0.3333540942565808, "learning_rate": 3.891030190788275e-06, "loss": 0.0154, "step": 27843 }, { "epoch": 3.3017905846080873, "grad_norm": 0.47228257656736544, "learning_rate": 3.889744227033018e-06, "loss": 0.0273, "step": 27844 }, { "epoch": 3.301909166370212, "grad_norm": 0.6771176448394813, "learning_rate": 3.888458457888428e-06, "loss": 0.0384, "step": 27845 }, { "epoch": 3.3020277481323372, "grad_norm": 0.337499677670817, "learning_rate": 3.887172883366361e-06, "loss": 0.0181, "step": 27846 }, { "epoch": 3.3021463298944624, "grad_norm": 0.5533353744129451, "learning_rate": 3.885887503478669e-06, "loss": 0.0246, "step": 27847 }, { "epoch": 3.302264911656587, "grad_norm": 0.4888134317014093, "learning_rate": 3.884602318237204e-06, "loss": 0.0283, "step": 27848 }, { "epoch": 3.302383493418712, "grad_norm": 0.8566908497483087, "learning_rate": 3.883317327653818e-06, "loss": 0.0293, "step": 27849 }, { "epoch": 3.302502075180837, "grad_norm": 0.43326556096191937, "learning_rate": 3.88203253174034e-06, "loss": 0.0186, "step": 27850 }, { "epoch": 3.3026206569429624, "grad_norm": 0.3243749713762092, "learning_rate": 3.880747930508636e-06, "loss": 0.0115, "step": 27851 }, { "epoch": 3.302739238705087, "grad_norm": 0.6510938987848732, "learning_rate": 3.879463523970531e-06, "loss": 0.0388, "step": 27852 }, { "epoch": 3.3028578204672123, "grad_norm": 0.3866774220224474, "learning_rate": 3.878179312137881e-06, "loss": 0.0198, "step": 27853 }, { "epoch": 3.302976402229337, "grad_norm": 0.6894751952840321, "learning_rate": 3.876895295022504e-06, "loss": 0.0394, "step": 27854 }, { "epoch": 3.3030949839914623, "grad_norm": 0.6680036710259506, "learning_rate": 3.875611472636259e-06, "loss": 0.0372, "step": 27855 }, { "epoch": 3.303213565753587, "grad_norm": 0.6052998660033125, "learning_rate": 3.8743278449909685e-06, "loss": 0.0223, "step": 27856 }, { "epoch": 3.3033321475157122, "grad_norm": 0.7312785801166084, "learning_rate": 3.873044412098467e-06, "loss": 0.0387, "step": 27857 }, { "epoch": 3.303450729277837, "grad_norm": 0.543093589923459, "learning_rate": 3.871761173970589e-06, "loss": 0.0214, "step": 27858 }, { "epoch": 3.303569311039962, "grad_norm": 0.5001807847313702, "learning_rate": 3.870478130619165e-06, "loss": 0.03, "step": 27859 }, { "epoch": 3.303687892802087, "grad_norm": 0.745356787303011, "learning_rate": 3.869195282056029e-06, "loss": 0.039, "step": 27860 }, { "epoch": 3.303806474564212, "grad_norm": 0.7269263723598205, "learning_rate": 3.867912628292986e-06, "loss": 0.0493, "step": 27861 }, { "epoch": 3.303925056326337, "grad_norm": 0.5697591655543556, "learning_rate": 3.866630169341886e-06, "loss": 0.0287, "step": 27862 }, { "epoch": 3.304043638088462, "grad_norm": 0.4354555459311577, "learning_rate": 3.865347905214536e-06, "loss": 0.0245, "step": 27863 }, { "epoch": 3.304162219850587, "grad_norm": 0.3771851576491067, "learning_rate": 3.864065835922762e-06, "loss": 0.0169, "step": 27864 }, { "epoch": 3.304280801612712, "grad_norm": 0.6518929042036157, "learning_rate": 3.862783961478381e-06, "loss": 0.03, "step": 27865 }, { "epoch": 3.304399383374837, "grad_norm": 0.98086454723485, "learning_rate": 3.861502281893212e-06, "loss": 0.0588, "step": 27866 }, { "epoch": 3.304517965136962, "grad_norm": 0.3569510966945982, "learning_rate": 3.860220797179076e-06, "loss": 0.0207, "step": 27867 }, { "epoch": 3.304636546899087, "grad_norm": 0.7468901867727762, "learning_rate": 3.8589395073477755e-06, "loss": 0.0552, "step": 27868 }, { "epoch": 3.304755128661212, "grad_norm": 0.3973288725246763, "learning_rate": 3.857658412411128e-06, "loss": 0.0187, "step": 27869 }, { "epoch": 3.3048737104233368, "grad_norm": 0.624036059361594, "learning_rate": 3.8563775123809435e-06, "loss": 0.0234, "step": 27870 }, { "epoch": 3.304992292185462, "grad_norm": 0.40139781836275046, "learning_rate": 3.855096807269034e-06, "loss": 0.0152, "step": 27871 }, { "epoch": 3.3051108739475867, "grad_norm": 0.5227923725918305, "learning_rate": 3.853816297087196e-06, "loss": 0.0297, "step": 27872 }, { "epoch": 3.305229455709712, "grad_norm": 0.4140921587289125, "learning_rate": 3.852535981847246e-06, "loss": 0.023, "step": 27873 }, { "epoch": 3.3053480374718367, "grad_norm": 0.39666652516720674, "learning_rate": 3.851255861560979e-06, "loss": 0.0185, "step": 27874 }, { "epoch": 3.305466619233962, "grad_norm": 0.4747413066574263, "learning_rate": 3.849975936240199e-06, "loss": 0.0201, "step": 27875 }, { "epoch": 3.3055852009960867, "grad_norm": 0.4115964837761341, "learning_rate": 3.8486962058967044e-06, "loss": 0.0193, "step": 27876 }, { "epoch": 3.305703782758212, "grad_norm": 0.6734284232647656, "learning_rate": 3.847416670542292e-06, "loss": 0.0269, "step": 27877 }, { "epoch": 3.3058223645203366, "grad_norm": 0.6142894842098806, "learning_rate": 3.846137330188765e-06, "loss": 0.0269, "step": 27878 }, { "epoch": 3.305940946282462, "grad_norm": 0.3640761980053133, "learning_rate": 3.844858184847907e-06, "loss": 0.0188, "step": 27879 }, { "epoch": 3.3060595280445866, "grad_norm": 0.6992080665469463, "learning_rate": 3.843579234531514e-06, "loss": 0.0364, "step": 27880 }, { "epoch": 3.306178109806712, "grad_norm": 0.5935694884313321, "learning_rate": 3.842300479251376e-06, "loss": 0.0364, "step": 27881 }, { "epoch": 3.3062966915688365, "grad_norm": 0.4654099168897005, "learning_rate": 3.841021919019288e-06, "loss": 0.0257, "step": 27882 }, { "epoch": 3.3064152733309617, "grad_norm": 0.4537323713790174, "learning_rate": 3.839743553847025e-06, "loss": 0.0184, "step": 27883 }, { "epoch": 3.3065338550930865, "grad_norm": 0.560057861069202, "learning_rate": 3.838465383746378e-06, "loss": 0.0285, "step": 27884 }, { "epoch": 3.3066524368552117, "grad_norm": 0.5332051876442494, "learning_rate": 3.837187408729131e-06, "loss": 0.0302, "step": 27885 }, { "epoch": 3.3067710186173365, "grad_norm": 0.8356829350842514, "learning_rate": 3.835909628807066e-06, "loss": 0.0474, "step": 27886 }, { "epoch": 3.3068896003794617, "grad_norm": 0.7482376414644991, "learning_rate": 3.834632043991959e-06, "loss": 0.0425, "step": 27887 }, { "epoch": 3.307008182141587, "grad_norm": 0.5329230226977014, "learning_rate": 3.83335465429559e-06, "loss": 0.0316, "step": 27888 }, { "epoch": 3.3071267639037116, "grad_norm": 0.585591377876086, "learning_rate": 3.832077459729741e-06, "loss": 0.0354, "step": 27889 }, { "epoch": 3.3072453456658364, "grad_norm": 0.47452468268278597, "learning_rate": 3.830800460306175e-06, "loss": 0.0311, "step": 27890 }, { "epoch": 3.3073639274279616, "grad_norm": 0.35063622290476326, "learning_rate": 3.829523656036668e-06, "loss": 0.0158, "step": 27891 }, { "epoch": 3.307482509190087, "grad_norm": 0.8340636896561271, "learning_rate": 3.828247046932992e-06, "loss": 0.0537, "step": 27892 }, { "epoch": 3.3076010909522116, "grad_norm": 0.4909588225640828, "learning_rate": 3.826970633006924e-06, "loss": 0.02, "step": 27893 }, { "epoch": 3.3077196727143363, "grad_norm": 0.38574762295601633, "learning_rate": 3.8256944142702144e-06, "loss": 0.0203, "step": 27894 }, { "epoch": 3.3078382544764615, "grad_norm": 0.6968905900207709, "learning_rate": 3.824418390734635e-06, "loss": 0.0331, "step": 27895 }, { "epoch": 3.3079568362385867, "grad_norm": 0.6818185629619743, "learning_rate": 3.823142562411955e-06, "loss": 0.0353, "step": 27896 }, { "epoch": 3.3080754180007115, "grad_norm": 0.6181435335328476, "learning_rate": 3.821866929313928e-06, "loss": 0.0278, "step": 27897 }, { "epoch": 3.3081939997628362, "grad_norm": 0.6927571052664493, "learning_rate": 3.820591491452319e-06, "loss": 0.0292, "step": 27898 }, { "epoch": 3.3083125815249614, "grad_norm": 0.4047346786667762, "learning_rate": 3.819316248838886e-06, "loss": 0.0224, "step": 27899 }, { "epoch": 3.3084311632870866, "grad_norm": 0.8144580383826798, "learning_rate": 3.818041201485389e-06, "loss": 0.0369, "step": 27900 }, { "epoch": 3.3085497450492114, "grad_norm": 0.527573287767143, "learning_rate": 3.816766349403569e-06, "loss": 0.0358, "step": 27901 }, { "epoch": 3.3086683268113366, "grad_norm": 0.6595568572185324, "learning_rate": 3.815491692605189e-06, "loss": 0.0396, "step": 27902 }, { "epoch": 3.3087869085734614, "grad_norm": 0.6385850560888783, "learning_rate": 3.8142172311019965e-06, "loss": 0.0343, "step": 27903 }, { "epoch": 3.3089054903355866, "grad_norm": 0.5776590529038925, "learning_rate": 3.8129429649057467e-06, "loss": 0.0255, "step": 27904 }, { "epoch": 3.3090240720977113, "grad_norm": 0.6524259125783789, "learning_rate": 3.811668894028178e-06, "loss": 0.0231, "step": 27905 }, { "epoch": 3.3091426538598365, "grad_norm": 0.46160850857623853, "learning_rate": 3.8103950184810363e-06, "loss": 0.0251, "step": 27906 }, { "epoch": 3.3092612356219613, "grad_norm": 0.5261313736746881, "learning_rate": 3.80912133827607e-06, "loss": 0.032, "step": 27907 }, { "epoch": 3.3093798173840865, "grad_norm": 0.23434591027801066, "learning_rate": 3.80784785342502e-06, "loss": 0.0113, "step": 27908 }, { "epoch": 3.3094983991462112, "grad_norm": 0.5415793872298806, "learning_rate": 3.806574563939627e-06, "loss": 0.0267, "step": 27909 }, { "epoch": 3.3096169809083364, "grad_norm": 0.7621216440034788, "learning_rate": 3.8053014698316207e-06, "loss": 0.0371, "step": 27910 }, { "epoch": 3.309735562670461, "grad_norm": 0.48411387642646014, "learning_rate": 3.8040285711127527e-06, "loss": 0.018, "step": 27911 }, { "epoch": 3.3098541444325864, "grad_norm": 0.6568984415784147, "learning_rate": 3.802755867794744e-06, "loss": 0.0339, "step": 27912 }, { "epoch": 3.309972726194711, "grad_norm": 0.722707309946253, "learning_rate": 3.8014833598893323e-06, "loss": 0.0334, "step": 27913 }, { "epoch": 3.3100913079568364, "grad_norm": 0.8729296192925622, "learning_rate": 3.8002110474082474e-06, "loss": 0.046, "step": 27914 }, { "epoch": 3.310209889718961, "grad_norm": 0.627558006627391, "learning_rate": 3.798938930363222e-06, "loss": 0.0289, "step": 27915 }, { "epoch": 3.3103284714810863, "grad_norm": 0.832709869288307, "learning_rate": 3.797667008765987e-06, "loss": 0.0317, "step": 27916 }, { "epoch": 3.310447053243211, "grad_norm": 0.7570380161351903, "learning_rate": 3.7963952826282484e-06, "loss": 0.0353, "step": 27917 }, { "epoch": 3.3105656350053363, "grad_norm": 0.5126721161391322, "learning_rate": 3.7951237519617574e-06, "loss": 0.0302, "step": 27918 }, { "epoch": 3.310684216767461, "grad_norm": 0.5091880228970953, "learning_rate": 3.7938524167782153e-06, "loss": 0.0221, "step": 27919 }, { "epoch": 3.3108027985295863, "grad_norm": 0.5127165599117681, "learning_rate": 3.792581277089355e-06, "loss": 0.0247, "step": 27920 }, { "epoch": 3.310921380291711, "grad_norm": 0.3711855818167647, "learning_rate": 3.7913103329068778e-06, "loss": 0.0177, "step": 27921 }, { "epoch": 3.311039962053836, "grad_norm": 0.565958552658857, "learning_rate": 3.7900395842425247e-06, "loss": 0.0286, "step": 27922 }, { "epoch": 3.311158543815961, "grad_norm": 0.5180750146375289, "learning_rate": 3.7887690311079887e-06, "loss": 0.0335, "step": 27923 }, { "epoch": 3.311277125578086, "grad_norm": 0.6490747241949512, "learning_rate": 3.7874986735149942e-06, "loss": 0.0302, "step": 27924 }, { "epoch": 3.311395707340211, "grad_norm": 0.5256419334847869, "learning_rate": 3.7862285114752457e-06, "loss": 0.0263, "step": 27925 }, { "epoch": 3.311514289102336, "grad_norm": 0.6091180760840573, "learning_rate": 3.7849585450004583e-06, "loss": 0.0322, "step": 27926 }, { "epoch": 3.311632870864461, "grad_norm": 0.5576979669178772, "learning_rate": 3.7836887741023453e-06, "loss": 0.0325, "step": 27927 }, { "epoch": 3.311751452626586, "grad_norm": 0.4160367442812694, "learning_rate": 3.7824191987925916e-06, "loss": 0.0179, "step": 27928 }, { "epoch": 3.311870034388711, "grad_norm": 0.6040453182187787, "learning_rate": 3.781149819082924e-06, "loss": 0.0362, "step": 27929 }, { "epoch": 3.311988616150836, "grad_norm": 0.4860110262950691, "learning_rate": 3.7798806349850306e-06, "loss": 0.0219, "step": 27930 }, { "epoch": 3.312107197912961, "grad_norm": 0.6134605242545235, "learning_rate": 3.7786116465106213e-06, "loss": 0.0414, "step": 27931 }, { "epoch": 3.312225779675086, "grad_norm": 0.5148964931884741, "learning_rate": 3.7773428536713784e-06, "loss": 0.0264, "step": 27932 }, { "epoch": 3.312344361437211, "grad_norm": 0.38317398980751277, "learning_rate": 3.7760742564790204e-06, "loss": 0.0155, "step": 27933 }, { "epoch": 3.312462943199336, "grad_norm": 0.45294860618672217, "learning_rate": 3.7748058549452237e-06, "loss": 0.0186, "step": 27934 }, { "epoch": 3.3125815249614607, "grad_norm": 0.7672103858899045, "learning_rate": 3.773537649081693e-06, "loss": 0.0322, "step": 27935 }, { "epoch": 3.312700106723586, "grad_norm": 0.3688690698581376, "learning_rate": 3.772269638900111e-06, "loss": 0.022, "step": 27936 }, { "epoch": 3.3128186884857107, "grad_norm": 0.5910304009546803, "learning_rate": 3.7710018244121762e-06, "loss": 0.0371, "step": 27937 }, { "epoch": 3.312937270247836, "grad_norm": 0.6568927116830952, "learning_rate": 3.769734205629574e-06, "loss": 0.0271, "step": 27938 }, { "epoch": 3.3130558520099607, "grad_norm": 0.39636780096759283, "learning_rate": 3.768466782563984e-06, "loss": 0.0203, "step": 27939 }, { "epoch": 3.313174433772086, "grad_norm": 0.3724161498945175, "learning_rate": 3.767199555227094e-06, "loss": 0.0162, "step": 27940 }, { "epoch": 3.313293015534211, "grad_norm": 0.461646269002203, "learning_rate": 3.7659325236305887e-06, "loss": 0.0163, "step": 27941 }, { "epoch": 3.313411597296336, "grad_norm": 0.5201020385226258, "learning_rate": 3.7646656877861506e-06, "loss": 0.0229, "step": 27942 }, { "epoch": 3.3135301790584606, "grad_norm": 0.9601974688238142, "learning_rate": 3.763399047705443e-06, "loss": 0.0402, "step": 27943 }, { "epoch": 3.313648760820586, "grad_norm": 0.4007365487725, "learning_rate": 3.7621326034001648e-06, "loss": 0.0193, "step": 27944 }, { "epoch": 3.313767342582711, "grad_norm": 0.5455009808621921, "learning_rate": 3.7608663548819756e-06, "loss": 0.0341, "step": 27945 }, { "epoch": 3.3138859243448358, "grad_norm": 0.8105290835436278, "learning_rate": 3.7596003021625524e-06, "loss": 0.0383, "step": 27946 }, { "epoch": 3.3140045061069605, "grad_norm": 0.6134451411182563, "learning_rate": 3.7583344452535695e-06, "loss": 0.0304, "step": 27947 }, { "epoch": 3.3141230878690857, "grad_norm": 0.39208792806985315, "learning_rate": 3.7570687841666925e-06, "loss": 0.0198, "step": 27948 }, { "epoch": 3.314241669631211, "grad_norm": 0.6280270950561221, "learning_rate": 3.7558033189136005e-06, "loss": 0.0286, "step": 27949 }, { "epoch": 3.3143602513933357, "grad_norm": 0.5295908016233951, "learning_rate": 3.754538049505943e-06, "loss": 0.0269, "step": 27950 }, { "epoch": 3.314478833155461, "grad_norm": 0.4430885723135758, "learning_rate": 3.753272975955391e-06, "loss": 0.0235, "step": 27951 }, { "epoch": 3.3145974149175856, "grad_norm": 0.4565798859297612, "learning_rate": 3.752008098273607e-06, "loss": 0.0222, "step": 27952 }, { "epoch": 3.314715996679711, "grad_norm": 0.452390423435813, "learning_rate": 3.7507434164722606e-06, "loss": 0.0271, "step": 27953 }, { "epoch": 3.3148345784418356, "grad_norm": 0.5491389616672491, "learning_rate": 3.749478930562997e-06, "loss": 0.0268, "step": 27954 }, { "epoch": 3.314953160203961, "grad_norm": 0.6327876353527695, "learning_rate": 3.7482146405574766e-06, "loss": 0.031, "step": 27955 }, { "epoch": 3.3150717419660856, "grad_norm": 0.5262766835145923, "learning_rate": 3.7469505464673566e-06, "loss": 0.0274, "step": 27956 }, { "epoch": 3.3151903237282108, "grad_norm": 0.5342180136252416, "learning_rate": 3.7456866483042918e-06, "loss": 0.0295, "step": 27957 }, { "epoch": 3.3153089054903355, "grad_norm": 0.7203904943065375, "learning_rate": 3.7444229460799312e-06, "loss": 0.0413, "step": 27958 }, { "epoch": 3.3154274872524607, "grad_norm": 0.5521678039143647, "learning_rate": 3.743159439805924e-06, "loss": 0.0301, "step": 27959 }, { "epoch": 3.3155460690145855, "grad_norm": 0.8986933295897125, "learning_rate": 3.741896129493927e-06, "loss": 0.0311, "step": 27960 }, { "epoch": 3.3156646507767107, "grad_norm": 0.7510228054718425, "learning_rate": 3.7406330151555756e-06, "loss": 0.0338, "step": 27961 }, { "epoch": 3.3157832325388354, "grad_norm": 0.507020557803987, "learning_rate": 3.7393700968025164e-06, "loss": 0.0232, "step": 27962 }, { "epoch": 3.3159018143009606, "grad_norm": 0.4047937163101113, "learning_rate": 3.7381073744463924e-06, "loss": 0.021, "step": 27963 }, { "epoch": 3.3160203960630854, "grad_norm": 0.5645040765000825, "learning_rate": 3.736844848098847e-06, "loss": 0.021, "step": 27964 }, { "epoch": 3.3161389778252106, "grad_norm": 0.2990751095477559, "learning_rate": 3.7355825177715215e-06, "loss": 0.0176, "step": 27965 }, { "epoch": 3.3162575595873354, "grad_norm": 0.4515612505105922, "learning_rate": 3.734320383476039e-06, "loss": 0.0259, "step": 27966 }, { "epoch": 3.3163761413494606, "grad_norm": 0.44923047929978716, "learning_rate": 3.7330584452240553e-06, "loss": 0.0241, "step": 27967 }, { "epoch": 3.3164947231115853, "grad_norm": 0.6248955303979373, "learning_rate": 3.7317967030271876e-06, "loss": 0.0341, "step": 27968 }, { "epoch": 3.3166133048737105, "grad_norm": 0.3768532408233588, "learning_rate": 3.730535156897075e-06, "loss": 0.0181, "step": 27969 }, { "epoch": 3.3167318866358353, "grad_norm": 0.52098556363743, "learning_rate": 3.7292738068453457e-06, "loss": 0.0288, "step": 27970 }, { "epoch": 3.3168504683979605, "grad_norm": 0.6402990543803196, "learning_rate": 3.7280126528836335e-06, "loss": 0.0366, "step": 27971 }, { "epoch": 3.3169690501600853, "grad_norm": 0.34162692921760895, "learning_rate": 3.7267516950235533e-06, "loss": 0.0179, "step": 27972 }, { "epoch": 3.3170876319222105, "grad_norm": 0.6495491965102689, "learning_rate": 3.7254909332767348e-06, "loss": 0.0355, "step": 27973 }, { "epoch": 3.317206213684335, "grad_norm": 0.3856662679671859, "learning_rate": 3.7242303676548018e-06, "loss": 0.0181, "step": 27974 }, { "epoch": 3.3173247954464604, "grad_norm": 0.5192057491326741, "learning_rate": 3.722969998169373e-06, "loss": 0.0369, "step": 27975 }, { "epoch": 3.317443377208585, "grad_norm": 0.5499691820784068, "learning_rate": 3.7217098248320785e-06, "loss": 0.0309, "step": 27976 }, { "epoch": 3.3175619589707104, "grad_norm": 0.44292906587810515, "learning_rate": 3.7204498476545113e-06, "loss": 0.0198, "step": 27977 }, { "epoch": 3.317680540732835, "grad_norm": 0.3642732347154108, "learning_rate": 3.7191900666483174e-06, "loss": 0.0184, "step": 27978 }, { "epoch": 3.3177991224949603, "grad_norm": 0.6392097452913126, "learning_rate": 3.7179304818250854e-06, "loss": 0.0288, "step": 27979 }, { "epoch": 3.317917704257085, "grad_norm": 0.37353774655458777, "learning_rate": 3.7166710931964443e-06, "loss": 0.0178, "step": 27980 }, { "epoch": 3.3180362860192103, "grad_norm": 0.6568021049640583, "learning_rate": 3.7154119007739823e-06, "loss": 0.0356, "step": 27981 }, { "epoch": 3.318154867781335, "grad_norm": 0.6719948972887031, "learning_rate": 3.7141529045693372e-06, "loss": 0.0247, "step": 27982 }, { "epoch": 3.3182734495434603, "grad_norm": 0.48846611067661794, "learning_rate": 3.712894104594092e-06, "loss": 0.0257, "step": 27983 }, { "epoch": 3.318392031305585, "grad_norm": 0.3645375297764137, "learning_rate": 3.7116355008598585e-06, "loss": 0.0221, "step": 27984 }, { "epoch": 3.3185106130677102, "grad_norm": 0.4016251333855859, "learning_rate": 3.710377093378242e-06, "loss": 0.0212, "step": 27985 }, { "epoch": 3.318629194829835, "grad_norm": 0.5803359385969297, "learning_rate": 3.709118882160839e-06, "loss": 0.0315, "step": 27986 }, { "epoch": 3.31874777659196, "grad_norm": 0.5182117272106874, "learning_rate": 3.7078608672192593e-06, "loss": 0.0299, "step": 27987 }, { "epoch": 3.318866358354085, "grad_norm": 0.8642325597230283, "learning_rate": 3.70660304856508e-06, "loss": 0.0333, "step": 27988 }, { "epoch": 3.31898494011621, "grad_norm": 0.7072066805686882, "learning_rate": 3.705345426209919e-06, "loss": 0.0229, "step": 27989 }, { "epoch": 3.3191035218783353, "grad_norm": 0.5188054324533439, "learning_rate": 3.7040880001653564e-06, "loss": 0.0271, "step": 27990 }, { "epoch": 3.31922210364046, "grad_norm": 0.8404015351481868, "learning_rate": 3.7028307704429914e-06, "loss": 0.045, "step": 27991 }, { "epoch": 3.319340685402585, "grad_norm": 0.5761565366378449, "learning_rate": 3.701573737054401e-06, "loss": 0.0254, "step": 27992 }, { "epoch": 3.31945926716471, "grad_norm": 0.5668059123847877, "learning_rate": 3.7003169000111942e-06, "loss": 0.0228, "step": 27993 }, { "epoch": 3.3195778489268353, "grad_norm": 0.3838966081566302, "learning_rate": 3.6990602593249407e-06, "loss": 0.0192, "step": 27994 }, { "epoch": 3.31969643068896, "grad_norm": 0.7357055343700173, "learning_rate": 3.697803815007228e-06, "loss": 0.0447, "step": 27995 }, { "epoch": 3.319815012451085, "grad_norm": 0.4847369706902986, "learning_rate": 3.6965475670696443e-06, "loss": 0.0226, "step": 27996 }, { "epoch": 3.31993359421321, "grad_norm": 0.5773935482585504, "learning_rate": 3.695291515523766e-06, "loss": 0.0258, "step": 27997 }, { "epoch": 3.320052175975335, "grad_norm": 0.7005667581402969, "learning_rate": 3.6940356603811817e-06, "loss": 0.0461, "step": 27998 }, { "epoch": 3.32017075773746, "grad_norm": 0.7088733035395607, "learning_rate": 3.692780001653451e-06, "loss": 0.0302, "step": 27999 }, { "epoch": 3.320289339499585, "grad_norm": 0.7014771802458767, "learning_rate": 3.6915245393521674e-06, "loss": 0.0397, "step": 28000 }, { "epoch": 3.32040792126171, "grad_norm": 0.6710692705087972, "learning_rate": 3.6902692734888946e-06, "loss": 0.031, "step": 28001 }, { "epoch": 3.320526503023835, "grad_norm": 0.5804125193351594, "learning_rate": 3.689014204075214e-06, "loss": 0.0236, "step": 28002 }, { "epoch": 3.32064508478596, "grad_norm": 0.5786323265658855, "learning_rate": 3.6877593311226754e-06, "loss": 0.0376, "step": 28003 }, { "epoch": 3.320763666548085, "grad_norm": 0.4680126713698062, "learning_rate": 3.686504654642875e-06, "loss": 0.0234, "step": 28004 }, { "epoch": 3.32088224831021, "grad_norm": 0.3739822594621729, "learning_rate": 3.685250174647356e-06, "loss": 0.0169, "step": 28005 }, { "epoch": 3.321000830072335, "grad_norm": 0.6005542144108119, "learning_rate": 3.6839958911476957e-06, "loss": 0.0293, "step": 28006 }, { "epoch": 3.32111941183446, "grad_norm": 0.600122151768237, "learning_rate": 3.682741804155454e-06, "loss": 0.0204, "step": 28007 }, { "epoch": 3.321237993596585, "grad_norm": 0.5169456174965069, "learning_rate": 3.6814879136821914e-06, "loss": 0.0267, "step": 28008 }, { "epoch": 3.3213565753587098, "grad_norm": 0.41952371030024277, "learning_rate": 3.68023421973947e-06, "loss": 0.0251, "step": 28009 }, { "epoch": 3.321475157120835, "grad_norm": 0.613380403844406, "learning_rate": 3.678980722338843e-06, "loss": 0.0248, "step": 28010 }, { "epoch": 3.3215937388829597, "grad_norm": 0.41477745565639795, "learning_rate": 3.6777274214918667e-06, "loss": 0.0205, "step": 28011 }, { "epoch": 3.321712320645085, "grad_norm": 0.5767983827835985, "learning_rate": 3.676474317210099e-06, "loss": 0.0323, "step": 28012 }, { "epoch": 3.3218309024072097, "grad_norm": 0.527543279785441, "learning_rate": 3.675221409505086e-06, "loss": 0.0265, "step": 28013 }, { "epoch": 3.321949484169335, "grad_norm": 0.5263532042670281, "learning_rate": 3.6739686983883833e-06, "loss": 0.0253, "step": 28014 }, { "epoch": 3.3220680659314596, "grad_norm": 0.5003301499720302, "learning_rate": 3.6727161838715384e-06, "loss": 0.0347, "step": 28015 }, { "epoch": 3.322186647693585, "grad_norm": 0.4526756548145851, "learning_rate": 3.6714638659661017e-06, "loss": 0.0204, "step": 28016 }, { "epoch": 3.3223052294557096, "grad_norm": 0.34054247255804443, "learning_rate": 3.6702117446836075e-06, "loss": 0.0191, "step": 28017 }, { "epoch": 3.322423811217835, "grad_norm": 0.5299170046057428, "learning_rate": 3.6689598200356028e-06, "loss": 0.029, "step": 28018 }, { "epoch": 3.3225423929799596, "grad_norm": 0.6543489115925588, "learning_rate": 3.6677080920336337e-06, "loss": 0.0274, "step": 28019 }, { "epoch": 3.3226609747420848, "grad_norm": 0.3786048111735766, "learning_rate": 3.666456560689241e-06, "loss": 0.0136, "step": 28020 }, { "epoch": 3.3227795565042095, "grad_norm": 0.565295433327078, "learning_rate": 3.6652052260139512e-06, "loss": 0.024, "step": 28021 }, { "epoch": 3.3228981382663347, "grad_norm": 0.6161892910005843, "learning_rate": 3.6639540880193084e-06, "loss": 0.0414, "step": 28022 }, { "epoch": 3.3230167200284595, "grad_norm": 0.7606252932461626, "learning_rate": 3.6627031467168447e-06, "loss": 0.0301, "step": 28023 }, { "epoch": 3.3231353017905847, "grad_norm": 0.5706944668441977, "learning_rate": 3.661452402118093e-06, "loss": 0.02, "step": 28024 }, { "epoch": 3.3232538835527095, "grad_norm": 0.399520030938864, "learning_rate": 3.6602018542345878e-06, "loss": 0.0194, "step": 28025 }, { "epoch": 3.3233724653148347, "grad_norm": 0.5906620495133359, "learning_rate": 3.6589515030778427e-06, "loss": 0.0343, "step": 28026 }, { "epoch": 3.3234910470769594, "grad_norm": 0.3771141678485449, "learning_rate": 3.6577013486594037e-06, "loss": 0.0204, "step": 28027 }, { "epoch": 3.3236096288390846, "grad_norm": 0.5426691128695411, "learning_rate": 3.656451390990784e-06, "loss": 0.0202, "step": 28028 }, { "epoch": 3.3237282106012094, "grad_norm": 0.40352428287080816, "learning_rate": 3.6552016300835072e-06, "loss": 0.0293, "step": 28029 }, { "epoch": 3.3238467923633346, "grad_norm": 0.46783888485327807, "learning_rate": 3.6539520659490955e-06, "loss": 0.023, "step": 28030 }, { "epoch": 3.3239653741254593, "grad_norm": 0.438977499987937, "learning_rate": 3.652702698599078e-06, "loss": 0.0188, "step": 28031 }, { "epoch": 3.3240839558875845, "grad_norm": 0.8397915316957508, "learning_rate": 3.6514535280449568e-06, "loss": 0.038, "step": 28032 }, { "epoch": 3.3242025376497093, "grad_norm": 0.5300869601500785, "learning_rate": 3.650204554298256e-06, "loss": 0.0282, "step": 28033 }, { "epoch": 3.3243211194118345, "grad_norm": 0.5841538624461043, "learning_rate": 3.648955777370486e-06, "loss": 0.0259, "step": 28034 }, { "epoch": 3.3244397011739593, "grad_norm": 0.6893586953947631, "learning_rate": 3.6477071972731618e-06, "loss": 0.0249, "step": 28035 }, { "epoch": 3.3245582829360845, "grad_norm": 0.919266362745719, "learning_rate": 3.6464588140178e-06, "loss": 0.0451, "step": 28036 }, { "epoch": 3.3246768646982092, "grad_norm": 0.31234468306981, "learning_rate": 3.6452106276158916e-06, "loss": 0.0164, "step": 28037 }, { "epoch": 3.3247954464603344, "grad_norm": 0.46134009630138384, "learning_rate": 3.6439626380789655e-06, "loss": 0.0225, "step": 28038 }, { "epoch": 3.3249140282224596, "grad_norm": 0.5894183127593473, "learning_rate": 3.6427148454185074e-06, "loss": 0.0359, "step": 28039 }, { "epoch": 3.3250326099845844, "grad_norm": 0.5077930595401028, "learning_rate": 3.6414672496460357e-06, "loss": 0.0314, "step": 28040 }, { "epoch": 3.325151191746709, "grad_norm": 0.3203834881904532, "learning_rate": 3.640219850773033e-06, "loss": 0.0142, "step": 28041 }, { "epoch": 3.3252697735088343, "grad_norm": 0.5648886628427189, "learning_rate": 3.638972648811023e-06, "loss": 0.0333, "step": 28042 }, { "epoch": 3.3253883552709596, "grad_norm": 0.45560795236507723, "learning_rate": 3.6377256437714834e-06, "loss": 0.0268, "step": 28043 }, { "epoch": 3.3255069370330843, "grad_norm": 0.5407681446324131, "learning_rate": 3.636478835665916e-06, "loss": 0.0253, "step": 28044 }, { "epoch": 3.325625518795209, "grad_norm": 0.5845661656153494, "learning_rate": 3.6352322245058192e-06, "loss": 0.029, "step": 28045 }, { "epoch": 3.3257441005573343, "grad_norm": 0.3730653408019406, "learning_rate": 3.6339858103026786e-06, "loss": 0.0196, "step": 28046 }, { "epoch": 3.3258626823194595, "grad_norm": 0.4115972939778285, "learning_rate": 3.6327395930679963e-06, "loss": 0.0211, "step": 28047 }, { "epoch": 3.3259812640815842, "grad_norm": 0.6232534330903102, "learning_rate": 3.631493572813241e-06, "loss": 0.0267, "step": 28048 }, { "epoch": 3.326099845843709, "grad_norm": 0.7927292965195416, "learning_rate": 3.6302477495499225e-06, "loss": 0.0514, "step": 28049 }, { "epoch": 3.326218427605834, "grad_norm": 0.3276090554670096, "learning_rate": 3.629002123289507e-06, "loss": 0.0211, "step": 28050 }, { "epoch": 3.3263370093679594, "grad_norm": 0.4115501736728005, "learning_rate": 3.627756694043491e-06, "loss": 0.0218, "step": 28051 }, { "epoch": 3.326455591130084, "grad_norm": 0.4794069091680505, "learning_rate": 3.6265114618233396e-06, "loss": 0.0191, "step": 28052 }, { "epoch": 3.3265741728922094, "grad_norm": 0.7565726390411996, "learning_rate": 3.6252664266405558e-06, "loss": 0.047, "step": 28053 }, { "epoch": 3.326692754654334, "grad_norm": 0.3491216550689933, "learning_rate": 3.624021588506596e-06, "loss": 0.0192, "step": 28054 }, { "epoch": 3.3268113364164593, "grad_norm": 0.4610518970494569, "learning_rate": 3.622776947432946e-06, "loss": 0.0277, "step": 28055 }, { "epoch": 3.326929918178584, "grad_norm": 0.6109379171361768, "learning_rate": 3.6215325034310744e-06, "loss": 0.0303, "step": 28056 }, { "epoch": 3.3270484999407093, "grad_norm": 0.3677642104906407, "learning_rate": 3.620288256512461e-06, "loss": 0.0252, "step": 28057 }, { "epoch": 3.327167081702834, "grad_norm": 0.48913753054323195, "learning_rate": 3.6190442066885764e-06, "loss": 0.0292, "step": 28058 }, { "epoch": 3.3272856634649592, "grad_norm": 0.41172662800093573, "learning_rate": 3.6178003539708736e-06, "loss": 0.0248, "step": 28059 }, { "epoch": 3.327404245227084, "grad_norm": 0.5996753738938778, "learning_rate": 3.6165566983708434e-06, "loss": 0.0199, "step": 28060 }, { "epoch": 3.327522826989209, "grad_norm": 0.43481290664754274, "learning_rate": 3.615313239899931e-06, "loss": 0.0167, "step": 28061 }, { "epoch": 3.327641408751334, "grad_norm": 0.6214964218063276, "learning_rate": 3.6140699785696093e-06, "loss": 0.0253, "step": 28062 }, { "epoch": 3.327759990513459, "grad_norm": 0.7815446503245452, "learning_rate": 3.6128269143913347e-06, "loss": 0.0353, "step": 28063 }, { "epoch": 3.327878572275584, "grad_norm": 0.51755396997207, "learning_rate": 3.6115840473765706e-06, "loss": 0.0262, "step": 28064 }, { "epoch": 3.327997154037709, "grad_norm": 0.3830294818192797, "learning_rate": 3.610341377536777e-06, "loss": 0.0197, "step": 28065 }, { "epoch": 3.328115735799834, "grad_norm": 0.6815812274689803, "learning_rate": 3.609098904883404e-06, "loss": 0.0473, "step": 28066 }, { "epoch": 3.328234317561959, "grad_norm": 0.5163129764345256, "learning_rate": 3.6078566294279053e-06, "loss": 0.0288, "step": 28067 }, { "epoch": 3.328352899324084, "grad_norm": 0.4489444154681484, "learning_rate": 3.6066145511817363e-06, "loss": 0.0195, "step": 28068 }, { "epoch": 3.328471481086209, "grad_norm": 0.6444009358173334, "learning_rate": 3.605372670156354e-06, "loss": 0.0295, "step": 28069 }, { "epoch": 3.328590062848334, "grad_norm": 0.5561095939441943, "learning_rate": 3.6041309863631856e-06, "loss": 0.0301, "step": 28070 }, { "epoch": 3.328708644610459, "grad_norm": 0.4936679653552854, "learning_rate": 3.602889499813705e-06, "loss": 0.0241, "step": 28071 }, { "epoch": 3.3288272263725838, "grad_norm": 0.5673202536835673, "learning_rate": 3.60164821051934e-06, "loss": 0.0313, "step": 28072 }, { "epoch": 3.328945808134709, "grad_norm": 0.6326998938234467, "learning_rate": 3.6004071184915362e-06, "loss": 0.0243, "step": 28073 }, { "epoch": 3.3290643898968337, "grad_norm": 0.41470148879702035, "learning_rate": 3.599166223741737e-06, "loss": 0.0183, "step": 28074 }, { "epoch": 3.329182971658959, "grad_norm": 0.4031317866663014, "learning_rate": 3.5979255262813818e-06, "loss": 0.0249, "step": 28075 }, { "epoch": 3.3293015534210837, "grad_norm": 0.29308393475607697, "learning_rate": 3.5966850261219155e-06, "loss": 0.0195, "step": 28076 }, { "epoch": 3.329420135183209, "grad_norm": 0.585692206951662, "learning_rate": 3.59544472327476e-06, "loss": 0.0216, "step": 28077 }, { "epoch": 3.3295387169453337, "grad_norm": 0.45450503984381924, "learning_rate": 3.5942046177513567e-06, "loss": 0.0167, "step": 28078 }, { "epoch": 3.329657298707459, "grad_norm": 0.9707911779093686, "learning_rate": 3.5929647095631375e-06, "loss": 0.0245, "step": 28079 }, { "epoch": 3.3297758804695836, "grad_norm": 0.47294574095832953, "learning_rate": 3.591724998721538e-06, "loss": 0.0241, "step": 28080 }, { "epoch": 3.329894462231709, "grad_norm": 0.6231830627285894, "learning_rate": 3.590485485237974e-06, "loss": 0.0276, "step": 28081 }, { "epoch": 3.3300130439938336, "grad_norm": 0.6248541162297487, "learning_rate": 3.5892461691238806e-06, "loss": 0.0451, "step": 28082 }, { "epoch": 3.330131625755959, "grad_norm": 0.7587986412801594, "learning_rate": 3.5880070503906822e-06, "loss": 0.0369, "step": 28083 }, { "epoch": 3.3302502075180835, "grad_norm": 0.4578101648548543, "learning_rate": 3.586768129049803e-06, "loss": 0.0209, "step": 28084 }, { "epoch": 3.3303687892802087, "grad_norm": 0.8573893538332871, "learning_rate": 3.5855294051126614e-06, "loss": 0.0446, "step": 28085 }, { "epoch": 3.3304873710423335, "grad_norm": 0.262722341495648, "learning_rate": 3.584290878590679e-06, "loss": 0.0088, "step": 28086 }, { "epoch": 3.3306059528044587, "grad_norm": 0.6404630914846108, "learning_rate": 3.5830525494952803e-06, "loss": 0.0303, "step": 28087 }, { "epoch": 3.330724534566584, "grad_norm": 0.33515964144961946, "learning_rate": 3.5818144178378645e-06, "loss": 0.0161, "step": 28088 }, { "epoch": 3.3308431163287087, "grad_norm": 0.5179544145299119, "learning_rate": 3.580576483629858e-06, "loss": 0.0204, "step": 28089 }, { "epoch": 3.3309616980908334, "grad_norm": 0.6688358466838135, "learning_rate": 3.5793387468826684e-06, "loss": 0.0318, "step": 28090 }, { "epoch": 3.3310802798529586, "grad_norm": 0.5676513853555679, "learning_rate": 3.5781012076077147e-06, "loss": 0.0326, "step": 28091 }, { "epoch": 3.331198861615084, "grad_norm": 0.4341343819812523, "learning_rate": 3.5768638658163933e-06, "loss": 0.0234, "step": 28092 }, { "epoch": 3.3313174433772086, "grad_norm": 0.6747039097722346, "learning_rate": 3.575626721520117e-06, "loss": 0.029, "step": 28093 }, { "epoch": 3.3314360251393333, "grad_norm": 0.6010494040445135, "learning_rate": 3.5743897747302885e-06, "loss": 0.0295, "step": 28094 }, { "epoch": 3.3315546069014585, "grad_norm": 0.5279676563012841, "learning_rate": 3.573153025458312e-06, "loss": 0.0329, "step": 28095 }, { "epoch": 3.3316731886635838, "grad_norm": 0.5850467769433929, "learning_rate": 3.5719164737155946e-06, "loss": 0.0304, "step": 28096 }, { "epoch": 3.3317917704257085, "grad_norm": 0.5723921264600655, "learning_rate": 3.5706801195135196e-06, "loss": 0.0217, "step": 28097 }, { "epoch": 3.3319103521878333, "grad_norm": 0.425707664748483, "learning_rate": 3.5694439628635083e-06, "loss": 0.0231, "step": 28098 }, { "epoch": 3.3320289339499585, "grad_norm": 0.5068019911973674, "learning_rate": 3.568208003776938e-06, "loss": 0.0245, "step": 28099 }, { "epoch": 3.3321475157120837, "grad_norm": 0.5053680555584119, "learning_rate": 3.566972242265207e-06, "loss": 0.0209, "step": 28100 }, { "epoch": 3.3322660974742084, "grad_norm": 0.9782055387383342, "learning_rate": 3.5657366783397096e-06, "loss": 0.0416, "step": 28101 }, { "epoch": 3.3323846792363336, "grad_norm": 0.7590311539037063, "learning_rate": 3.564501312011842e-06, "loss": 0.0405, "step": 28102 }, { "epoch": 3.3325032609984584, "grad_norm": 0.47014093349953795, "learning_rate": 3.563266143292979e-06, "loss": 0.0177, "step": 28103 }, { "epoch": 3.3326218427605836, "grad_norm": 0.4654670820014841, "learning_rate": 3.562031172194516e-06, "loss": 0.0275, "step": 28104 }, { "epoch": 3.3327404245227084, "grad_norm": 0.5769480322795792, "learning_rate": 3.5607963987278365e-06, "loss": 0.0291, "step": 28105 }, { "epoch": 3.3328590062848336, "grad_norm": 0.6771737861561109, "learning_rate": 3.5595618229043224e-06, "loss": 0.027, "step": 28106 }, { "epoch": 3.3329775880469583, "grad_norm": 0.5253928546738901, "learning_rate": 3.5583274447353655e-06, "loss": 0.0226, "step": 28107 }, { "epoch": 3.3330961698090835, "grad_norm": 0.5746649306775725, "learning_rate": 3.5570932642323224e-06, "loss": 0.0215, "step": 28108 }, { "epoch": 3.3332147515712083, "grad_norm": 0.3147747205986944, "learning_rate": 3.5558592814065954e-06, "loss": 0.016, "step": 28109 }, { "epoch": 3.3333333333333335, "grad_norm": 0.8348174857758743, "learning_rate": 3.5546254962695424e-06, "loss": 0.0176, "step": 28110 }, { "epoch": 3.3334519150954582, "grad_norm": 0.6678627128345974, "learning_rate": 3.5533919088325512e-06, "loss": 0.0344, "step": 28111 }, { "epoch": 3.3335704968575834, "grad_norm": 0.5152558499657043, "learning_rate": 3.552158519106977e-06, "loss": 0.032, "step": 28112 }, { "epoch": 3.333689078619708, "grad_norm": 0.47597827539886134, "learning_rate": 3.55092532710421e-06, "loss": 0.0231, "step": 28113 }, { "epoch": 3.3338076603818334, "grad_norm": 0.3453121272420612, "learning_rate": 3.5496923328356025e-06, "loss": 0.0156, "step": 28114 }, { "epoch": 3.333926242143958, "grad_norm": 0.4920706437033826, "learning_rate": 3.548459536312529e-06, "loss": 0.0281, "step": 28115 }, { "epoch": 3.3340448239060834, "grad_norm": 0.49169109796493404, "learning_rate": 3.5472269375463522e-06, "loss": 0.0262, "step": 28116 }, { "epoch": 3.334163405668208, "grad_norm": 0.5203376014657446, "learning_rate": 3.5459945365484387e-06, "loss": 0.0233, "step": 28117 }, { "epoch": 3.3342819874303333, "grad_norm": 0.6001043844221207, "learning_rate": 3.5447623333301484e-06, "loss": 0.036, "step": 28118 }, { "epoch": 3.334400569192458, "grad_norm": 0.7681787758018628, "learning_rate": 3.5435303279028304e-06, "loss": 0.0513, "step": 28119 }, { "epoch": 3.3345191509545833, "grad_norm": 0.6154166445130955, "learning_rate": 3.542298520277862e-06, "loss": 0.0204, "step": 28120 }, { "epoch": 3.334637732716708, "grad_norm": 0.4382384387240891, "learning_rate": 3.5410669104665812e-06, "loss": 0.0236, "step": 28121 }, { "epoch": 3.3347563144788333, "grad_norm": 0.5801112478509725, "learning_rate": 3.5398354984803513e-06, "loss": 0.0283, "step": 28122 }, { "epoch": 3.334874896240958, "grad_norm": 0.5480644680053263, "learning_rate": 3.5386042843305184e-06, "loss": 0.0148, "step": 28123 }, { "epoch": 3.334993478003083, "grad_norm": 0.34893208255103386, "learning_rate": 3.5373732680284406e-06, "loss": 0.025, "step": 28124 }, { "epoch": 3.335112059765208, "grad_norm": 1.052235588902905, "learning_rate": 3.5361424495854637e-06, "loss": 0.0308, "step": 28125 }, { "epoch": 3.335230641527333, "grad_norm": 0.8706279129193831, "learning_rate": 3.53491182901293e-06, "loss": 0.0525, "step": 28126 }, { "epoch": 3.335349223289458, "grad_norm": 0.36105536489882395, "learning_rate": 3.533681406322184e-06, "loss": 0.0146, "step": 28127 }, { "epoch": 3.335467805051583, "grad_norm": 0.4947296149863641, "learning_rate": 3.5324511815245738e-06, "loss": 0.0216, "step": 28128 }, { "epoch": 3.335586386813708, "grad_norm": 0.5136903866008311, "learning_rate": 3.531221154631445e-06, "loss": 0.024, "step": 28129 }, { "epoch": 3.335704968575833, "grad_norm": 0.6793228375537872, "learning_rate": 3.529991325654117e-06, "loss": 0.036, "step": 28130 }, { "epoch": 3.335823550337958, "grad_norm": 0.5338875034102847, "learning_rate": 3.528761694603955e-06, "loss": 0.0209, "step": 28131 }, { "epoch": 3.335942132100083, "grad_norm": 0.5862167220335227, "learning_rate": 3.5275322614922724e-06, "loss": 0.0214, "step": 28132 }, { "epoch": 3.336060713862208, "grad_norm": 0.6480100057532668, "learning_rate": 3.5263030263304104e-06, "loss": 0.0413, "step": 28133 }, { "epoch": 3.336179295624333, "grad_norm": 0.6003407440108759, "learning_rate": 3.5250739891297014e-06, "loss": 0.0256, "step": 28134 }, { "epoch": 3.3362978773864578, "grad_norm": 0.3843561991177664, "learning_rate": 3.523845149901478e-06, "loss": 0.0171, "step": 28135 }, { "epoch": 3.336416459148583, "grad_norm": 0.4323950652925302, "learning_rate": 3.522616508657073e-06, "loss": 0.0265, "step": 28136 }, { "epoch": 3.3365350409107077, "grad_norm": 0.43568400628398984, "learning_rate": 3.5213880654077965e-06, "loss": 0.0201, "step": 28137 }, { "epoch": 3.336653622672833, "grad_norm": 0.3827981640825874, "learning_rate": 3.5201598201649865e-06, "loss": 0.0169, "step": 28138 }, { "epoch": 3.3367722044349577, "grad_norm": 0.6505375407955951, "learning_rate": 3.518931772939962e-06, "loss": 0.0262, "step": 28139 }, { "epoch": 3.336890786197083, "grad_norm": 0.6144644519321392, "learning_rate": 3.517703923744051e-06, "loss": 0.0343, "step": 28140 }, { "epoch": 3.337009367959208, "grad_norm": 0.49083967986302557, "learning_rate": 3.5164762725885537e-06, "loss": 0.0202, "step": 28141 }, { "epoch": 3.337127949721333, "grad_norm": 0.4664760165563314, "learning_rate": 3.515248819484812e-06, "loss": 0.0244, "step": 28142 }, { "epoch": 3.3372465314834576, "grad_norm": 0.5187731389267322, "learning_rate": 3.5140215644441257e-06, "loss": 0.0238, "step": 28143 }, { "epoch": 3.337365113245583, "grad_norm": 0.5505640223032221, "learning_rate": 3.5127945074778123e-06, "loss": 0.0265, "step": 28144 }, { "epoch": 3.337483695007708, "grad_norm": 0.5460465033822745, "learning_rate": 3.511567648597183e-06, "loss": 0.0204, "step": 28145 }, { "epoch": 3.337602276769833, "grad_norm": 0.679222728203132, "learning_rate": 3.5103409878135512e-06, "loss": 0.0246, "step": 28146 }, { "epoch": 3.3377208585319575, "grad_norm": 0.5812425900420349, "learning_rate": 3.509114525138227e-06, "loss": 0.0258, "step": 28147 }, { "epoch": 3.3378394402940827, "grad_norm": 0.5783232244920313, "learning_rate": 3.5078882605825063e-06, "loss": 0.027, "step": 28148 }, { "epoch": 3.337958022056208, "grad_norm": 0.6811948586624061, "learning_rate": 3.5066621941577032e-06, "loss": 0.037, "step": 28149 }, { "epoch": 3.3380766038183327, "grad_norm": 0.4999890305271869, "learning_rate": 3.5054363258751166e-06, "loss": 0.0255, "step": 28150 }, { "epoch": 3.338195185580458, "grad_norm": 0.431087261683757, "learning_rate": 3.504210655746054e-06, "loss": 0.0211, "step": 28151 }, { "epoch": 3.3383137673425827, "grad_norm": 0.5176278894171434, "learning_rate": 3.502985183781804e-06, "loss": 0.0285, "step": 28152 }, { "epoch": 3.338432349104708, "grad_norm": 0.6703594464056748, "learning_rate": 3.501759909993668e-06, "loss": 0.0246, "step": 28153 }, { "epoch": 3.3385509308668326, "grad_norm": 0.35800519513987067, "learning_rate": 3.5005348343929434e-06, "loss": 0.0129, "step": 28154 }, { "epoch": 3.338669512628958, "grad_norm": 0.4615562480378645, "learning_rate": 3.4993099569909233e-06, "loss": 0.026, "step": 28155 }, { "epoch": 3.3387880943910826, "grad_norm": 0.4370132220629775, "learning_rate": 3.4980852777989014e-06, "loss": 0.0205, "step": 28156 }, { "epoch": 3.338906676153208, "grad_norm": 0.6119920583473725, "learning_rate": 3.4968607968281636e-06, "loss": 0.031, "step": 28157 }, { "epoch": 3.3390252579153326, "grad_norm": 0.8388948435316774, "learning_rate": 3.495636514090006e-06, "loss": 0.0447, "step": 28158 }, { "epoch": 3.3391438396774578, "grad_norm": 0.6066256916276969, "learning_rate": 3.494412429595706e-06, "loss": 0.0263, "step": 28159 }, { "epoch": 3.3392624214395825, "grad_norm": 0.4469551643054615, "learning_rate": 3.4931885433565487e-06, "loss": 0.0112, "step": 28160 }, { "epoch": 3.3393810032017077, "grad_norm": 0.7365513839262728, "learning_rate": 3.4919648553838223e-06, "loss": 0.044, "step": 28161 }, { "epoch": 3.3394995849638325, "grad_norm": 0.39811602381549427, "learning_rate": 3.490741365688807e-06, "loss": 0.0187, "step": 28162 }, { "epoch": 3.3396181667259577, "grad_norm": 0.45403447739426084, "learning_rate": 3.4895180742827772e-06, "loss": 0.0251, "step": 28163 }, { "epoch": 3.3397367484880824, "grad_norm": 0.42748700205440665, "learning_rate": 3.488294981177012e-06, "loss": 0.0168, "step": 28164 }, { "epoch": 3.3398553302502076, "grad_norm": 0.4324414018590449, "learning_rate": 3.4870720863827892e-06, "loss": 0.0144, "step": 28165 }, { "epoch": 3.3399739120123324, "grad_norm": 0.5446977498032737, "learning_rate": 3.4858493899113804e-06, "loss": 0.0341, "step": 28166 }, { "epoch": 3.3400924937744576, "grad_norm": 0.5072348671264669, "learning_rate": 3.484626891774062e-06, "loss": 0.0221, "step": 28167 }, { "epoch": 3.3402110755365824, "grad_norm": 0.39573862127927545, "learning_rate": 3.4834045919820896e-06, "loss": 0.0245, "step": 28168 }, { "epoch": 3.3403296572987076, "grad_norm": 0.3123242508177821, "learning_rate": 3.482182490546751e-06, "loss": 0.0123, "step": 28169 }, { "epoch": 3.3404482390608323, "grad_norm": 0.44129500391893756, "learning_rate": 3.4809605874792983e-06, "loss": 0.0194, "step": 28170 }, { "epoch": 3.3405668208229575, "grad_norm": 0.8298588746452855, "learning_rate": 3.4797388827910006e-06, "loss": 0.0407, "step": 28171 }, { "epoch": 3.3406854025850823, "grad_norm": 0.5176232142187198, "learning_rate": 3.4785173764931207e-06, "loss": 0.0313, "step": 28172 }, { "epoch": 3.3408039843472075, "grad_norm": 0.6224652256119334, "learning_rate": 3.4772960685969192e-06, "loss": 0.0279, "step": 28173 }, { "epoch": 3.3409225661093322, "grad_norm": 0.7424210238117682, "learning_rate": 3.476074959113662e-06, "loss": 0.0363, "step": 28174 }, { "epoch": 3.3410411478714575, "grad_norm": 0.5250708697314654, "learning_rate": 3.474854048054585e-06, "loss": 0.0332, "step": 28175 }, { "epoch": 3.341159729633582, "grad_norm": 0.49578679792123215, "learning_rate": 3.473633335430973e-06, "loss": 0.0336, "step": 28176 }, { "epoch": 3.3412783113957074, "grad_norm": 0.7874190527711381, "learning_rate": 3.4724128212540563e-06, "loss": 0.0348, "step": 28177 }, { "epoch": 3.341396893157832, "grad_norm": 0.771815688609307, "learning_rate": 3.471192505535098e-06, "loss": 0.0311, "step": 28178 }, { "epoch": 3.3415154749199574, "grad_norm": 0.5518377335865029, "learning_rate": 3.469972388285336e-06, "loss": 0.0239, "step": 28179 }, { "epoch": 3.341634056682082, "grad_norm": 0.7384662469267087, "learning_rate": 3.4687524695160375e-06, "loss": 0.0299, "step": 28180 }, { "epoch": 3.3417526384442073, "grad_norm": 0.6198869956149893, "learning_rate": 3.4675327492384334e-06, "loss": 0.0272, "step": 28181 }, { "epoch": 3.341871220206332, "grad_norm": 0.6633283820599924, "learning_rate": 3.4663132274637717e-06, "loss": 0.0299, "step": 28182 }, { "epoch": 3.3419898019684573, "grad_norm": 0.6970979278826498, "learning_rate": 3.4650939042032955e-06, "loss": 0.0414, "step": 28183 }, { "epoch": 3.342108383730582, "grad_norm": 0.31735932413214174, "learning_rate": 3.463874779468246e-06, "loss": 0.0161, "step": 28184 }, { "epoch": 3.3422269654927073, "grad_norm": 0.297029650320158, "learning_rate": 3.462655853269864e-06, "loss": 0.0124, "step": 28185 }, { "epoch": 3.342345547254832, "grad_norm": 0.421756469928052, "learning_rate": 3.4614371256193766e-06, "loss": 0.025, "step": 28186 }, { "epoch": 3.342464129016957, "grad_norm": 0.4182433039941755, "learning_rate": 3.4602185965280366e-06, "loss": 0.0184, "step": 28187 }, { "epoch": 3.342582710779082, "grad_norm": 0.41626635490778385, "learning_rate": 3.459000266007062e-06, "loss": 0.0242, "step": 28188 }, { "epoch": 3.342701292541207, "grad_norm": 0.4107922159423503, "learning_rate": 3.457782134067697e-06, "loss": 0.0214, "step": 28189 }, { "epoch": 3.3428198743033324, "grad_norm": 0.46892582271562294, "learning_rate": 3.4565642007211493e-06, "loss": 0.0209, "step": 28190 }, { "epoch": 3.342938456065457, "grad_norm": 0.6913526791788209, "learning_rate": 3.455346465978676e-06, "loss": 0.0309, "step": 28191 }, { "epoch": 3.343057037827582, "grad_norm": 0.6507970778573601, "learning_rate": 3.45412892985148e-06, "loss": 0.0344, "step": 28192 }, { "epoch": 3.343175619589707, "grad_norm": 0.7055543410222099, "learning_rate": 3.4529115923507937e-06, "loss": 0.0401, "step": 28193 }, { "epoch": 3.3432942013518323, "grad_norm": 0.9199076220500583, "learning_rate": 3.4516944534878414e-06, "loss": 0.0323, "step": 28194 }, { "epoch": 3.343412783113957, "grad_norm": 0.4713967122509957, "learning_rate": 3.4504775132738423e-06, "loss": 0.0257, "step": 28195 }, { "epoch": 3.343531364876082, "grad_norm": 0.43834867512691517, "learning_rate": 3.4492607717200198e-06, "loss": 0.0177, "step": 28196 }, { "epoch": 3.343649946638207, "grad_norm": 0.5692708131064051, "learning_rate": 3.44804422883758e-06, "loss": 0.0323, "step": 28197 }, { "epoch": 3.3437685284003322, "grad_norm": 0.6414294748845712, "learning_rate": 3.4468278846377435e-06, "loss": 0.0297, "step": 28198 }, { "epoch": 3.343887110162457, "grad_norm": 0.3716064267615827, "learning_rate": 3.445611739131724e-06, "loss": 0.0138, "step": 28199 }, { "epoch": 3.344005691924582, "grad_norm": 0.4221718005718703, "learning_rate": 3.4443957923307376e-06, "loss": 0.0211, "step": 28200 }, { "epoch": 3.344124273686707, "grad_norm": 0.5930673146564932, "learning_rate": 3.443180044245978e-06, "loss": 0.029, "step": 28201 }, { "epoch": 3.344242855448832, "grad_norm": 0.8560265164156348, "learning_rate": 3.4419644948886748e-06, "loss": 0.0304, "step": 28202 }, { "epoch": 3.344361437210957, "grad_norm": 0.6882309866297076, "learning_rate": 3.4407491442700196e-06, "loss": 0.0336, "step": 28203 }, { "epoch": 3.344480018973082, "grad_norm": 0.3099078721293835, "learning_rate": 3.439533992401217e-06, "loss": 0.0146, "step": 28204 }, { "epoch": 3.344598600735207, "grad_norm": 0.5778606235366471, "learning_rate": 3.4383190392934693e-06, "loss": 0.0328, "step": 28205 }, { "epoch": 3.344717182497332, "grad_norm": 0.43267152790615265, "learning_rate": 3.4371042849579845e-06, "loss": 0.0205, "step": 28206 }, { "epoch": 3.344835764259457, "grad_norm": 0.3736479895656624, "learning_rate": 3.4358897294059582e-06, "loss": 0.0218, "step": 28207 }, { "epoch": 3.344954346021582, "grad_norm": 0.4059210344631899, "learning_rate": 3.43467537264858e-06, "loss": 0.0229, "step": 28208 }, { "epoch": 3.345072927783707, "grad_norm": 0.31038804618952504, "learning_rate": 3.433461214697051e-06, "loss": 0.0136, "step": 28209 }, { "epoch": 3.345191509545832, "grad_norm": 0.7357423986705598, "learning_rate": 3.4322472555625624e-06, "loss": 0.043, "step": 28210 }, { "epoch": 3.3453100913079568, "grad_norm": 0.5754216981827954, "learning_rate": 3.4310334952563112e-06, "loss": 0.0331, "step": 28211 }, { "epoch": 3.345428673070082, "grad_norm": 0.393781598787282, "learning_rate": 3.4298199337894685e-06, "loss": 0.0177, "step": 28212 }, { "epoch": 3.3455472548322067, "grad_norm": 0.5349602623237609, "learning_rate": 3.4286065711732486e-06, "loss": 0.0357, "step": 28213 }, { "epoch": 3.345665836594332, "grad_norm": 0.41427778777841306, "learning_rate": 3.427393407418816e-06, "loss": 0.017, "step": 28214 }, { "epoch": 3.3457844183564567, "grad_norm": 0.6126424996321812, "learning_rate": 3.426180442537366e-06, "loss": 0.0344, "step": 28215 }, { "epoch": 3.345903000118582, "grad_norm": 0.5297127237189342, "learning_rate": 3.4249676765400723e-06, "loss": 0.02, "step": 28216 }, { "epoch": 3.3460215818807066, "grad_norm": 0.7270795082679435, "learning_rate": 3.423755109438123e-06, "loss": 0.0348, "step": 28217 }, { "epoch": 3.346140163642832, "grad_norm": 0.4665862566807893, "learning_rate": 3.422542741242696e-06, "loss": 0.025, "step": 28218 }, { "epoch": 3.3462587454049566, "grad_norm": 0.398971612099775, "learning_rate": 3.421330571964959e-06, "loss": 0.0163, "step": 28219 }, { "epoch": 3.346377327167082, "grad_norm": 0.37470988849226766, "learning_rate": 3.420118601616096e-06, "loss": 0.0193, "step": 28220 }, { "epoch": 3.3464959089292066, "grad_norm": 0.637888952899598, "learning_rate": 3.4189068302072745e-06, "loss": 0.0326, "step": 28221 }, { "epoch": 3.3466144906913318, "grad_norm": 0.5589683634580547, "learning_rate": 3.4176952577496673e-06, "loss": 0.0244, "step": 28222 }, { "epoch": 3.3467330724534565, "grad_norm": 0.5538158794332383, "learning_rate": 3.4164838842544485e-06, "loss": 0.0299, "step": 28223 }, { "epoch": 3.3468516542155817, "grad_norm": 0.4858986764339397, "learning_rate": 3.4152727097327726e-06, "loss": 0.0242, "step": 28224 }, { "epoch": 3.3469702359777065, "grad_norm": 0.4365959285271395, "learning_rate": 3.4140617341958225e-06, "loss": 0.0197, "step": 28225 }, { "epoch": 3.3470888177398317, "grad_norm": 0.688776736755611, "learning_rate": 3.412850957654748e-06, "loss": 0.0263, "step": 28226 }, { "epoch": 3.3472073995019564, "grad_norm": 0.821301857466055, "learning_rate": 3.411640380120715e-06, "loss": 0.05, "step": 28227 }, { "epoch": 3.3473259812640817, "grad_norm": 0.5875648660628294, "learning_rate": 3.4104300016048835e-06, "loss": 0.027, "step": 28228 }, { "epoch": 3.3474445630262064, "grad_norm": 0.37481771779192163, "learning_rate": 3.4092198221184203e-06, "loss": 0.0218, "step": 28229 }, { "epoch": 3.3475631447883316, "grad_norm": 0.6710333324951044, "learning_rate": 3.408009841672466e-06, "loss": 0.0307, "step": 28230 }, { "epoch": 3.3476817265504564, "grad_norm": 0.7815898042557843, "learning_rate": 3.406800060278184e-06, "loss": 0.0605, "step": 28231 }, { "epoch": 3.3478003083125816, "grad_norm": 0.3620847863955289, "learning_rate": 3.4055904779467267e-06, "loss": 0.0189, "step": 28232 }, { "epoch": 3.3479188900747063, "grad_norm": 0.5697120988659441, "learning_rate": 3.4043810946892436e-06, "loss": 0.0279, "step": 28233 }, { "epoch": 3.3480374718368315, "grad_norm": 0.7277329855884003, "learning_rate": 3.403171910516892e-06, "loss": 0.033, "step": 28234 }, { "epoch": 3.3481560535989563, "grad_norm": 0.48107042099226494, "learning_rate": 3.401962925440799e-06, "loss": 0.0233, "step": 28235 }, { "epoch": 3.3482746353610815, "grad_norm": 0.814924486188585, "learning_rate": 3.4007541394721314e-06, "loss": 0.0348, "step": 28236 }, { "epoch": 3.3483932171232063, "grad_norm": 0.3300562348123815, "learning_rate": 3.3995455526220215e-06, "loss": 0.0167, "step": 28237 }, { "epoch": 3.3485117988853315, "grad_norm": 0.5334158980364886, "learning_rate": 3.398337164901619e-06, "loss": 0.0247, "step": 28238 }, { "epoch": 3.3486303806474567, "grad_norm": 0.6884143914578076, "learning_rate": 3.397128976322045e-06, "loss": 0.0386, "step": 28239 }, { "epoch": 3.3487489624095814, "grad_norm": 0.3016313024102435, "learning_rate": 3.395920986894463e-06, "loss": 0.0155, "step": 28240 }, { "epoch": 3.348867544171706, "grad_norm": 0.5251405141079009, "learning_rate": 3.394713196629992e-06, "loss": 0.0306, "step": 28241 }, { "epoch": 3.3489861259338314, "grad_norm": 0.500267518370771, "learning_rate": 3.3935056055397706e-06, "loss": 0.0227, "step": 28242 }, { "epoch": 3.3491047076959566, "grad_norm": 0.48006827956788833, "learning_rate": 3.392298213634931e-06, "loss": 0.0285, "step": 28243 }, { "epoch": 3.3492232894580813, "grad_norm": 0.9194337498369566, "learning_rate": 3.3910910209266037e-06, "loss": 0.0517, "step": 28244 }, { "epoch": 3.349341871220206, "grad_norm": 0.617679520410546, "learning_rate": 3.389884027425927e-06, "loss": 0.0348, "step": 28245 }, { "epoch": 3.3494604529823313, "grad_norm": 0.46605060881192145, "learning_rate": 3.388677233144008e-06, "loss": 0.0155, "step": 28246 }, { "epoch": 3.3495790347444565, "grad_norm": 0.3551502916553192, "learning_rate": 3.387470638091994e-06, "loss": 0.027, "step": 28247 }, { "epoch": 3.3496976165065813, "grad_norm": 0.8831668822300798, "learning_rate": 3.38626424228099e-06, "loss": 0.0487, "step": 28248 }, { "epoch": 3.349816198268706, "grad_norm": 0.4704423394553142, "learning_rate": 3.385058045722134e-06, "loss": 0.0242, "step": 28249 }, { "epoch": 3.3499347800308312, "grad_norm": 0.387456952230621, "learning_rate": 3.383852048426525e-06, "loss": 0.0211, "step": 28250 }, { "epoch": 3.3500533617929564, "grad_norm": 0.7397142725057542, "learning_rate": 3.3826462504053026e-06, "loss": 0.0289, "step": 28251 }, { "epoch": 3.350171943555081, "grad_norm": 0.41774932061219394, "learning_rate": 3.381440651669568e-06, "loss": 0.0161, "step": 28252 }, { "epoch": 3.3502905253172064, "grad_norm": 0.45560828738774256, "learning_rate": 3.3802352522304377e-06, "loss": 0.0216, "step": 28253 }, { "epoch": 3.350409107079331, "grad_norm": 0.7691938157595236, "learning_rate": 3.3790300520990304e-06, "loss": 0.0216, "step": 28254 }, { "epoch": 3.3505276888414564, "grad_norm": 0.7069236850242283, "learning_rate": 3.3778250512864513e-06, "loss": 0.0502, "step": 28255 }, { "epoch": 3.350646270603581, "grad_norm": 0.7503153169472407, "learning_rate": 3.3766202498038136e-06, "loss": 0.0337, "step": 28256 }, { "epoch": 3.3507648523657063, "grad_norm": 0.742307100488776, "learning_rate": 3.3754156476622112e-06, "loss": 0.0338, "step": 28257 }, { "epoch": 3.350883434127831, "grad_norm": 0.5482687384236419, "learning_rate": 3.3742112448727713e-06, "loss": 0.0185, "step": 28258 }, { "epoch": 3.3510020158899563, "grad_norm": 0.4111845962248236, "learning_rate": 3.373007041446577e-06, "loss": 0.0167, "step": 28259 }, { "epoch": 3.351120597652081, "grad_norm": 0.5923859132309279, "learning_rate": 3.371803037394744e-06, "loss": 0.0305, "step": 28260 }, { "epoch": 3.3512391794142062, "grad_norm": 0.4291911402438124, "learning_rate": 3.3705992327283523e-06, "loss": 0.0235, "step": 28261 }, { "epoch": 3.351357761176331, "grad_norm": 0.7255319151473992, "learning_rate": 3.369395627458524e-06, "loss": 0.0341, "step": 28262 }, { "epoch": 3.351476342938456, "grad_norm": 0.43760583108322504, "learning_rate": 3.3681922215963364e-06, "loss": 0.0174, "step": 28263 }, { "epoch": 3.351594924700581, "grad_norm": 0.4573631757499597, "learning_rate": 3.366989015152891e-06, "loss": 0.0218, "step": 28264 }, { "epoch": 3.351713506462706, "grad_norm": 0.4527573300220819, "learning_rate": 3.3657860081392794e-06, "loss": 0.0187, "step": 28265 }, { "epoch": 3.351832088224831, "grad_norm": 0.4182659925900208, "learning_rate": 3.36458320056659e-06, "loss": 0.0201, "step": 28266 }, { "epoch": 3.351950669986956, "grad_norm": 0.3959800589725202, "learning_rate": 3.3633805924459164e-06, "loss": 0.018, "step": 28267 }, { "epoch": 3.352069251749081, "grad_norm": 0.5786280379278526, "learning_rate": 3.3621781837883365e-06, "loss": 0.0345, "step": 28268 }, { "epoch": 3.352187833511206, "grad_norm": 0.5227350376922945, "learning_rate": 3.3609759746049407e-06, "loss": 0.0228, "step": 28269 }, { "epoch": 3.352306415273331, "grad_norm": 0.7540154162815684, "learning_rate": 3.3597739649068123e-06, "loss": 0.0437, "step": 28270 }, { "epoch": 3.352424997035456, "grad_norm": 0.39977722437406304, "learning_rate": 3.358572154705028e-06, "loss": 0.0135, "step": 28271 }, { "epoch": 3.352543578797581, "grad_norm": 0.6107006057939457, "learning_rate": 3.357370544010671e-06, "loss": 0.0317, "step": 28272 }, { "epoch": 3.352662160559706, "grad_norm": 0.9037926939413755, "learning_rate": 3.3561691328348187e-06, "loss": 0.045, "step": 28273 }, { "epoch": 3.3527807423218308, "grad_norm": 0.3476388378877911, "learning_rate": 3.3549679211885483e-06, "loss": 0.0233, "step": 28274 }, { "epoch": 3.352899324083956, "grad_norm": 0.3751134512711658, "learning_rate": 3.353766909082928e-06, "loss": 0.0162, "step": 28275 }, { "epoch": 3.3530179058460807, "grad_norm": 0.6389063091231484, "learning_rate": 3.3525660965290307e-06, "loss": 0.0284, "step": 28276 }, { "epoch": 3.353136487608206, "grad_norm": 0.8694600952881567, "learning_rate": 3.35136548353793e-06, "loss": 0.04, "step": 28277 }, { "epoch": 3.3532550693703307, "grad_norm": 0.5785908495121403, "learning_rate": 3.350165070120698e-06, "loss": 0.0277, "step": 28278 }, { "epoch": 3.353373651132456, "grad_norm": 0.6922427362905814, "learning_rate": 3.3489648562883864e-06, "loss": 0.0281, "step": 28279 }, { "epoch": 3.3534922328945806, "grad_norm": 0.3948664622085124, "learning_rate": 3.34776484205207e-06, "loss": 0.02, "step": 28280 }, { "epoch": 3.353610814656706, "grad_norm": 0.45422896888424386, "learning_rate": 3.3465650274228127e-06, "loss": 0.0197, "step": 28281 }, { "epoch": 3.3537293964188306, "grad_norm": 0.375289348074269, "learning_rate": 3.345365412411669e-06, "loss": 0.0178, "step": 28282 }, { "epoch": 3.353847978180956, "grad_norm": 0.5028740842507217, "learning_rate": 3.344165997029711e-06, "loss": 0.0235, "step": 28283 }, { "epoch": 3.3539665599430806, "grad_norm": 0.5904841985632487, "learning_rate": 3.3429667812879706e-06, "loss": 0.021, "step": 28284 }, { "epoch": 3.3540851417052058, "grad_norm": 0.6903163965935314, "learning_rate": 3.341767765197534e-06, "loss": 0.0402, "step": 28285 }, { "epoch": 3.3542037234673305, "grad_norm": 0.48220343875443233, "learning_rate": 3.340568948769432e-06, "loss": 0.0176, "step": 28286 }, { "epoch": 3.3543223052294557, "grad_norm": 0.4369683633055692, "learning_rate": 3.339370332014724e-06, "loss": 0.0204, "step": 28287 }, { "epoch": 3.354440886991581, "grad_norm": 0.7765882707750833, "learning_rate": 3.33817191494446e-06, "loss": 0.0413, "step": 28288 }, { "epoch": 3.3545594687537057, "grad_norm": 0.3875280823679829, "learning_rate": 3.3369736975696925e-06, "loss": 0.0222, "step": 28289 }, { "epoch": 3.3546780505158305, "grad_norm": 0.35713168227977854, "learning_rate": 3.335775679901457e-06, "loss": 0.0144, "step": 28290 }, { "epoch": 3.3547966322779557, "grad_norm": 0.6828633275817672, "learning_rate": 3.3345778619508024e-06, "loss": 0.0307, "step": 28291 }, { "epoch": 3.354915214040081, "grad_norm": 0.6983418686527744, "learning_rate": 3.3333802437287733e-06, "loss": 0.0383, "step": 28292 }, { "epoch": 3.3550337958022056, "grad_norm": 0.44862152932031757, "learning_rate": 3.3321828252464077e-06, "loss": 0.0164, "step": 28293 }, { "epoch": 3.3551523775643304, "grad_norm": 0.296317269117406, "learning_rate": 3.3309856065147526e-06, "loss": 0.0157, "step": 28294 }, { "epoch": 3.3552709593264556, "grad_norm": 0.5663689096481667, "learning_rate": 3.329788587544827e-06, "loss": 0.0337, "step": 28295 }, { "epoch": 3.355389541088581, "grad_norm": 0.5112536538973177, "learning_rate": 3.328591768347686e-06, "loss": 0.0251, "step": 28296 }, { "epoch": 3.3555081228507055, "grad_norm": 0.5742272987044011, "learning_rate": 3.327395148934351e-06, "loss": 0.0351, "step": 28297 }, { "epoch": 3.3556267046128303, "grad_norm": 0.4700243949169704, "learning_rate": 3.3261987293158547e-06, "loss": 0.0234, "step": 28298 }, { "epoch": 3.3557452863749555, "grad_norm": 0.6300278638954473, "learning_rate": 3.325002509503228e-06, "loss": 0.0274, "step": 28299 }, { "epoch": 3.3558638681370807, "grad_norm": 0.6884447473363666, "learning_rate": 3.3238064895075027e-06, "loss": 0.0342, "step": 28300 }, { "epoch": 3.3559824498992055, "grad_norm": 0.733897120409, "learning_rate": 3.3226106693396956e-06, "loss": 0.0423, "step": 28301 }, { "epoch": 3.3561010316613307, "grad_norm": 0.40880298227747935, "learning_rate": 3.321415049010837e-06, "loss": 0.0159, "step": 28302 }, { "epoch": 3.3562196134234554, "grad_norm": 0.6712995291648227, "learning_rate": 3.320219628531945e-06, "loss": 0.0554, "step": 28303 }, { "epoch": 3.3563381951855806, "grad_norm": 0.37541223100322735, "learning_rate": 3.319024407914045e-06, "loss": 0.0152, "step": 28304 }, { "epoch": 3.3564567769477054, "grad_norm": 0.5588199146739184, "learning_rate": 3.3178293871681587e-06, "loss": 0.0284, "step": 28305 }, { "epoch": 3.3565753587098306, "grad_norm": 0.564553830834946, "learning_rate": 3.316634566305285e-06, "loss": 0.0291, "step": 28306 }, { "epoch": 3.3566939404719554, "grad_norm": 0.8319811555024731, "learning_rate": 3.3154399453364597e-06, "loss": 0.0368, "step": 28307 }, { "epoch": 3.3568125222340806, "grad_norm": 0.41711500013233127, "learning_rate": 3.3142455242726827e-06, "loss": 0.0228, "step": 28308 }, { "epoch": 3.3569311039962053, "grad_norm": 0.5840921157854478, "learning_rate": 3.3130513031249756e-06, "loss": 0.0248, "step": 28309 }, { "epoch": 3.3570496857583305, "grad_norm": 0.3116702604707591, "learning_rate": 3.311857281904329e-06, "loss": 0.0153, "step": 28310 }, { "epoch": 3.3571682675204553, "grad_norm": 0.23759318195728477, "learning_rate": 3.3106634606217767e-06, "loss": 0.0166, "step": 28311 }, { "epoch": 3.3572868492825805, "grad_norm": 0.5914615049529913, "learning_rate": 3.309469839288301e-06, "loss": 0.0238, "step": 28312 }, { "epoch": 3.3574054310447052, "grad_norm": 0.9034112453667742, "learning_rate": 3.3082764179149155e-06, "loss": 0.0395, "step": 28313 }, { "epoch": 3.3575240128068304, "grad_norm": 0.6999090978603308, "learning_rate": 3.3070831965126227e-06, "loss": 0.0333, "step": 28314 }, { "epoch": 3.357642594568955, "grad_norm": 0.2973270249284338, "learning_rate": 3.3058901750924185e-06, "loss": 0.0175, "step": 28315 }, { "epoch": 3.3577611763310804, "grad_norm": 0.5279634400338282, "learning_rate": 3.304697353665312e-06, "loss": 0.0238, "step": 28316 }, { "epoch": 3.357879758093205, "grad_norm": 0.5494786807672036, "learning_rate": 3.30350473224228e-06, "loss": 0.0287, "step": 28317 }, { "epoch": 3.3579983398553304, "grad_norm": 0.4623087306515806, "learning_rate": 3.3023123108343377e-06, "loss": 0.0243, "step": 28318 }, { "epoch": 3.358116921617455, "grad_norm": 0.614307465774112, "learning_rate": 3.301120089452464e-06, "loss": 0.0396, "step": 28319 }, { "epoch": 3.3582355033795803, "grad_norm": 0.6861971562801845, "learning_rate": 3.2999280681076576e-06, "loss": 0.0431, "step": 28320 }, { "epoch": 3.358354085141705, "grad_norm": 0.47554542646041204, "learning_rate": 3.2987362468108962e-06, "loss": 0.0273, "step": 28321 }, { "epoch": 3.3584726669038303, "grad_norm": 0.4194668112933761, "learning_rate": 3.2975446255731844e-06, "loss": 0.0195, "step": 28322 }, { "epoch": 3.358591248665955, "grad_norm": 0.3759538621645294, "learning_rate": 3.2963532044054916e-06, "loss": 0.0174, "step": 28323 }, { "epoch": 3.3587098304280802, "grad_norm": 0.6566019076367076, "learning_rate": 3.2951619833188093e-06, "loss": 0.0351, "step": 28324 }, { "epoch": 3.358828412190205, "grad_norm": 0.5353165260980502, "learning_rate": 3.2939709623241168e-06, "loss": 0.0296, "step": 28325 }, { "epoch": 3.35894699395233, "grad_norm": 0.6298458865649835, "learning_rate": 3.2927801414323916e-06, "loss": 0.0326, "step": 28326 }, { "epoch": 3.359065575714455, "grad_norm": 0.52118428891993, "learning_rate": 3.2915895206546227e-06, "loss": 0.0201, "step": 28327 }, { "epoch": 3.35918415747658, "grad_norm": 0.6077360486698166, "learning_rate": 3.2903991000017678e-06, "loss": 0.0304, "step": 28328 }, { "epoch": 3.359302739238705, "grad_norm": 0.6339115303663793, "learning_rate": 3.2892088794848204e-06, "loss": 0.0327, "step": 28329 }, { "epoch": 3.35942132100083, "grad_norm": 0.5415655500471742, "learning_rate": 3.288018859114736e-06, "loss": 0.0257, "step": 28330 }, { "epoch": 3.359539902762955, "grad_norm": 0.7228887635321692, "learning_rate": 3.2868290389024947e-06, "loss": 0.0312, "step": 28331 }, { "epoch": 3.35965848452508, "grad_norm": 0.5098700609539045, "learning_rate": 3.2856394188590655e-06, "loss": 0.0282, "step": 28332 }, { "epoch": 3.359777066287205, "grad_norm": 0.3709009305161092, "learning_rate": 3.284449998995409e-06, "loss": 0.0132, "step": 28333 }, { "epoch": 3.35989564804933, "grad_norm": 0.4035985589407649, "learning_rate": 3.283260779322503e-06, "loss": 0.0199, "step": 28334 }, { "epoch": 3.360014229811455, "grad_norm": 0.5082633641489566, "learning_rate": 3.2820717598512936e-06, "loss": 0.0228, "step": 28335 }, { "epoch": 3.36013281157358, "grad_norm": 0.24543513422896332, "learning_rate": 3.2808829405927525e-06, "loss": 0.011, "step": 28336 }, { "epoch": 3.360251393335705, "grad_norm": 0.8444372617941103, "learning_rate": 3.2796943215578357e-06, "loss": 0.0381, "step": 28337 }, { "epoch": 3.36036997509783, "grad_norm": 0.4035033745690534, "learning_rate": 3.278505902757506e-06, "loss": 0.0168, "step": 28338 }, { "epoch": 3.3604885568599547, "grad_norm": 0.3906168972241103, "learning_rate": 3.277317684202713e-06, "loss": 0.0168, "step": 28339 }, { "epoch": 3.36060713862208, "grad_norm": 0.4043237033032577, "learning_rate": 3.276129665904412e-06, "loss": 0.0254, "step": 28340 }, { "epoch": 3.360725720384205, "grad_norm": 0.46609714245986206, "learning_rate": 3.2749418478735557e-06, "loss": 0.0198, "step": 28341 }, { "epoch": 3.36084430214633, "grad_norm": 0.5850114594795462, "learning_rate": 3.2737542301210928e-06, "loss": 0.0284, "step": 28342 }, { "epoch": 3.3609628839084547, "grad_norm": 0.6194569099358317, "learning_rate": 3.272566812657973e-06, "loss": 0.0309, "step": 28343 }, { "epoch": 3.36108146567058, "grad_norm": 0.6296536219313433, "learning_rate": 3.271379595495147e-06, "loss": 0.0207, "step": 28344 }, { "epoch": 3.361200047432705, "grad_norm": 0.3002777906276834, "learning_rate": 3.270192578643558e-06, "loss": 0.0178, "step": 28345 }, { "epoch": 3.36131862919483, "grad_norm": 0.36538319137322256, "learning_rate": 3.269005762114144e-06, "loss": 0.0202, "step": 28346 }, { "epoch": 3.3614372109569546, "grad_norm": 0.6875228571399811, "learning_rate": 3.267819145917847e-06, "loss": 0.0274, "step": 28347 }, { "epoch": 3.36155579271908, "grad_norm": 0.48708531276649436, "learning_rate": 3.2666327300656054e-06, "loss": 0.0213, "step": 28348 }, { "epoch": 3.361674374481205, "grad_norm": 0.6233920366427661, "learning_rate": 3.265446514568368e-06, "loss": 0.0294, "step": 28349 }, { "epoch": 3.3617929562433297, "grad_norm": 0.485936129631044, "learning_rate": 3.2642604994370523e-06, "loss": 0.017, "step": 28350 }, { "epoch": 3.361911538005455, "grad_norm": 0.6490520041181146, "learning_rate": 3.263074684682604e-06, "loss": 0.0349, "step": 28351 }, { "epoch": 3.3620301197675797, "grad_norm": 0.3778299589979411, "learning_rate": 3.261889070315949e-06, "loss": 0.018, "step": 28352 }, { "epoch": 3.362148701529705, "grad_norm": 0.24763201856572672, "learning_rate": 3.2607036563480214e-06, "loss": 0.01, "step": 28353 }, { "epoch": 3.3622672832918297, "grad_norm": 0.6222852681457076, "learning_rate": 3.2595184427897524e-06, "loss": 0.0313, "step": 28354 }, { "epoch": 3.362385865053955, "grad_norm": 0.38295087602238503, "learning_rate": 3.2583334296520526e-06, "loss": 0.0274, "step": 28355 }, { "epoch": 3.3625044468160796, "grad_norm": 0.8306044752864288, "learning_rate": 3.257148616945868e-06, "loss": 0.0362, "step": 28356 }, { "epoch": 3.362623028578205, "grad_norm": 0.6050457322072268, "learning_rate": 3.2559640046821047e-06, "loss": 0.0263, "step": 28357 }, { "epoch": 3.3627416103403296, "grad_norm": 0.49553428361638757, "learning_rate": 3.2547795928716874e-06, "loss": 0.0203, "step": 28358 }, { "epoch": 3.362860192102455, "grad_norm": 0.6923794698993894, "learning_rate": 3.25359538152554e-06, "loss": 0.0345, "step": 28359 }, { "epoch": 3.3629787738645796, "grad_norm": 0.371948471771343, "learning_rate": 3.252411370654579e-06, "loss": 0.0215, "step": 28360 }, { "epoch": 3.3630973556267048, "grad_norm": 0.9802008723587586, "learning_rate": 3.251227560269712e-06, "loss": 0.0463, "step": 28361 }, { "epoch": 3.3632159373888295, "grad_norm": 0.41142018496002064, "learning_rate": 3.250043950381856e-06, "loss": 0.0253, "step": 28362 }, { "epoch": 3.3633345191509547, "grad_norm": 0.24279926466844917, "learning_rate": 3.2488605410019247e-06, "loss": 0.0108, "step": 28363 }, { "epoch": 3.3634531009130795, "grad_norm": 0.5211991166016366, "learning_rate": 3.247677332140825e-06, "loss": 0.0221, "step": 28364 }, { "epoch": 3.3635716826752047, "grad_norm": 0.4058903933030355, "learning_rate": 3.246494323809471e-06, "loss": 0.0235, "step": 28365 }, { "epoch": 3.3636902644373294, "grad_norm": 0.4727023635819523, "learning_rate": 3.2453115160187537e-06, "loss": 0.0237, "step": 28366 }, { "epoch": 3.3638088461994546, "grad_norm": 0.5214287888246492, "learning_rate": 3.244128908779595e-06, "loss": 0.0205, "step": 28367 }, { "epoch": 3.3639274279615794, "grad_norm": 0.3063117908460914, "learning_rate": 3.2429465021028864e-06, "loss": 0.0144, "step": 28368 }, { "epoch": 3.3640460097237046, "grad_norm": 0.5487428350876885, "learning_rate": 3.241764295999536e-06, "loss": 0.0352, "step": 28369 }, { "epoch": 3.3641645914858294, "grad_norm": 0.33563162550274317, "learning_rate": 3.2405822904804234e-06, "loss": 0.0151, "step": 28370 }, { "epoch": 3.3642831732479546, "grad_norm": 0.4709968675504259, "learning_rate": 3.2394004855564738e-06, "loss": 0.0251, "step": 28371 }, { "epoch": 3.3644017550100793, "grad_norm": 0.3893529879935164, "learning_rate": 3.2382188812385583e-06, "loss": 0.0183, "step": 28372 }, { "epoch": 3.3645203367722045, "grad_norm": 0.7769089957808121, "learning_rate": 3.23703747753758e-06, "loss": 0.0444, "step": 28373 }, { "epoch": 3.3646389185343293, "grad_norm": 0.30520676194137253, "learning_rate": 3.23585627446443e-06, "loss": 0.0126, "step": 28374 }, { "epoch": 3.3647575002964545, "grad_norm": 0.5260896824323504, "learning_rate": 3.234675272029994e-06, "loss": 0.02, "step": 28375 }, { "epoch": 3.3648760820585792, "grad_norm": 0.6397910708141028, "learning_rate": 3.2334944702451687e-06, "loss": 0.0311, "step": 28376 }, { "epoch": 3.3649946638207044, "grad_norm": 0.574580948635534, "learning_rate": 3.2323138691208205e-06, "loss": 0.021, "step": 28377 }, { "epoch": 3.365113245582829, "grad_norm": 0.6402744039577356, "learning_rate": 3.2311334686678572e-06, "loss": 0.0241, "step": 28378 }, { "epoch": 3.3652318273449544, "grad_norm": 0.5493922805226412, "learning_rate": 3.229953268897143e-06, "loss": 0.0339, "step": 28379 }, { "epoch": 3.365350409107079, "grad_norm": 0.7148063384692843, "learning_rate": 3.2287732698195626e-06, "loss": 0.0227, "step": 28380 }, { "epoch": 3.3654689908692044, "grad_norm": 0.5831210903309609, "learning_rate": 3.227593471445994e-06, "loss": 0.0313, "step": 28381 }, { "epoch": 3.365587572631329, "grad_norm": 0.5182779706377152, "learning_rate": 3.226413873787318e-06, "loss": 0.023, "step": 28382 }, { "epoch": 3.3657061543934543, "grad_norm": 0.7266698346250949, "learning_rate": 3.225234476854408e-06, "loss": 0.0516, "step": 28383 }, { "epoch": 3.365824736155579, "grad_norm": 0.37294944838555166, "learning_rate": 3.2240552806581227e-06, "loss": 0.015, "step": 28384 }, { "epoch": 3.3659433179177043, "grad_norm": 0.7225756434909976, "learning_rate": 3.2228762852093585e-06, "loss": 0.0349, "step": 28385 }, { "epoch": 3.366061899679829, "grad_norm": 0.5849529573635673, "learning_rate": 3.2216974905189627e-06, "loss": 0.0317, "step": 28386 }, { "epoch": 3.3661804814419543, "grad_norm": 0.7224602217793022, "learning_rate": 3.2205188965978154e-06, "loss": 0.035, "step": 28387 }, { "epoch": 3.366299063204079, "grad_norm": 0.5856139729928586, "learning_rate": 3.2193405034567637e-06, "loss": 0.0269, "step": 28388 }, { "epoch": 3.366417644966204, "grad_norm": 0.5514711247898894, "learning_rate": 3.2181623111066954e-06, "loss": 0.0348, "step": 28389 }, { "epoch": 3.3665362267283294, "grad_norm": 0.47791182279855193, "learning_rate": 3.2169843195584554e-06, "loss": 0.028, "step": 28390 }, { "epoch": 3.366654808490454, "grad_norm": 0.4659678552073218, "learning_rate": 3.2158065288229066e-06, "loss": 0.0276, "step": 28391 }, { "epoch": 3.366773390252579, "grad_norm": 0.5579796158504952, "learning_rate": 3.2146289389109075e-06, "loss": 0.0302, "step": 28392 }, { "epoch": 3.366891972014704, "grad_norm": 0.6652894891985357, "learning_rate": 3.213451549833316e-06, "loss": 0.0333, "step": 28393 }, { "epoch": 3.3670105537768293, "grad_norm": 0.5923615910784152, "learning_rate": 3.212274361600989e-06, "loss": 0.0271, "step": 28394 }, { "epoch": 3.367129135538954, "grad_norm": 0.9496742970484627, "learning_rate": 3.2110973742247725e-06, "loss": 0.0412, "step": 28395 }, { "epoch": 3.367247717301079, "grad_norm": 0.46517234064011076, "learning_rate": 3.2099205877155148e-06, "loss": 0.0228, "step": 28396 }, { "epoch": 3.367366299063204, "grad_norm": 0.4596104134516196, "learning_rate": 3.2087440020840714e-06, "loss": 0.0214, "step": 28397 }, { "epoch": 3.3674848808253293, "grad_norm": 0.4636169274473629, "learning_rate": 3.2075676173412892e-06, "loss": 0.021, "step": 28398 }, { "epoch": 3.367603462587454, "grad_norm": 0.7865933778335288, "learning_rate": 3.2063914334979987e-06, "loss": 0.0452, "step": 28399 }, { "epoch": 3.3677220443495792, "grad_norm": 0.3817054006028995, "learning_rate": 3.205215450565069e-06, "loss": 0.0237, "step": 28400 }, { "epoch": 3.367840626111704, "grad_norm": 0.7577335077109038, "learning_rate": 3.204039668553316e-06, "loss": 0.0415, "step": 28401 }, { "epoch": 3.367959207873829, "grad_norm": 0.3921403080608631, "learning_rate": 3.202864087473592e-06, "loss": 0.0185, "step": 28402 }, { "epoch": 3.368077789635954, "grad_norm": 0.418327651407074, "learning_rate": 3.2016887073367307e-06, "loss": 0.0149, "step": 28403 }, { "epoch": 3.368196371398079, "grad_norm": 0.4551825217157843, "learning_rate": 3.2005135281535705e-06, "loss": 0.0243, "step": 28404 }, { "epoch": 3.368314953160204, "grad_norm": 0.5975004323497811, "learning_rate": 3.1993385499349465e-06, "loss": 0.0237, "step": 28405 }, { "epoch": 3.368433534922329, "grad_norm": 0.5470517835276452, "learning_rate": 3.198163772691681e-06, "loss": 0.0282, "step": 28406 }, { "epoch": 3.368552116684454, "grad_norm": 0.36391297975092624, "learning_rate": 3.1969891964346133e-06, "loss": 0.0242, "step": 28407 }, { "epoch": 3.368670698446579, "grad_norm": 0.4382320821050168, "learning_rate": 3.195814821174567e-06, "loss": 0.0203, "step": 28408 }, { "epoch": 3.368789280208704, "grad_norm": 0.6135033731861153, "learning_rate": 3.1946406469223754e-06, "loss": 0.0287, "step": 28409 }, { "epoch": 3.368907861970829, "grad_norm": 0.552983449461249, "learning_rate": 3.1934666736888526e-06, "loss": 0.0279, "step": 28410 }, { "epoch": 3.369026443732954, "grad_norm": 0.5532775749500256, "learning_rate": 3.192292901484828e-06, "loss": 0.0272, "step": 28411 }, { "epoch": 3.369145025495079, "grad_norm": 0.6528065539001182, "learning_rate": 3.1911193303211185e-06, "loss": 0.0211, "step": 28412 }, { "epoch": 3.3692636072572038, "grad_norm": 0.4817292502055002, "learning_rate": 3.189945960208543e-06, "loss": 0.0234, "step": 28413 }, { "epoch": 3.369382189019329, "grad_norm": 0.36256497051687425, "learning_rate": 3.1887727911579233e-06, "loss": 0.0188, "step": 28414 }, { "epoch": 3.3695007707814537, "grad_norm": 0.4700065915498995, "learning_rate": 3.187599823180071e-06, "loss": 0.0244, "step": 28415 }, { "epoch": 3.369619352543579, "grad_norm": 0.8898319715785457, "learning_rate": 3.186427056285804e-06, "loss": 0.0134, "step": 28416 }, { "epoch": 3.3697379343057037, "grad_norm": 0.4802155988614911, "learning_rate": 3.185254490485928e-06, "loss": 0.0286, "step": 28417 }, { "epoch": 3.369856516067829, "grad_norm": 0.549676768963708, "learning_rate": 3.1840821257912516e-06, "loss": 0.0288, "step": 28418 }, { "epoch": 3.3699750978299536, "grad_norm": 0.4917155585101706, "learning_rate": 3.182909962212588e-06, "loss": 0.0294, "step": 28419 }, { "epoch": 3.370093679592079, "grad_norm": 0.6074538745944409, "learning_rate": 3.181737999760745e-06, "loss": 0.0302, "step": 28420 }, { "epoch": 3.3702122613542036, "grad_norm": 0.42688868180646494, "learning_rate": 3.180566238446514e-06, "loss": 0.0238, "step": 28421 }, { "epoch": 3.370330843116329, "grad_norm": 0.49622425942491805, "learning_rate": 3.179394678280709e-06, "loss": 0.0243, "step": 28422 }, { "epoch": 3.3704494248784536, "grad_norm": 0.7121674063510728, "learning_rate": 3.178223319274126e-06, "loss": 0.0461, "step": 28423 }, { "epoch": 3.3705680066405788, "grad_norm": 0.39433712911151714, "learning_rate": 3.177052161437566e-06, "loss": 0.0302, "step": 28424 }, { "epoch": 3.3706865884027035, "grad_norm": 1.02933401916156, "learning_rate": 3.1758812047818275e-06, "loss": 0.0408, "step": 28425 }, { "epoch": 3.3708051701648287, "grad_norm": 0.5824596302666162, "learning_rate": 3.1747104493176915e-06, "loss": 0.0344, "step": 28426 }, { "epoch": 3.3709237519269535, "grad_norm": 0.7127793958917581, "learning_rate": 3.1735398950559713e-06, "loss": 0.0343, "step": 28427 }, { "epoch": 3.3710423336890787, "grad_norm": 0.4823744436861155, "learning_rate": 3.1723695420074418e-06, "loss": 0.0277, "step": 28428 }, { "epoch": 3.3711609154512034, "grad_norm": 0.7032338123401534, "learning_rate": 3.1711993901828997e-06, "loss": 0.0261, "step": 28429 }, { "epoch": 3.3712794972133286, "grad_norm": 0.5143227126304037, "learning_rate": 3.170029439593131e-06, "loss": 0.0216, "step": 28430 }, { "epoch": 3.3713980789754534, "grad_norm": 0.33369514321936794, "learning_rate": 3.168859690248921e-06, "loss": 0.0117, "step": 28431 }, { "epoch": 3.3715166607375786, "grad_norm": 0.5760406734103235, "learning_rate": 3.167690142161059e-06, "loss": 0.0308, "step": 28432 }, { "epoch": 3.3716352424997034, "grad_norm": 0.5545315596725181, "learning_rate": 3.166520795340311e-06, "loss": 0.027, "step": 28433 }, { "epoch": 3.3717538242618286, "grad_norm": 0.36870387464145044, "learning_rate": 3.1653516497974765e-06, "loss": 0.0171, "step": 28434 }, { "epoch": 3.3718724060239533, "grad_norm": 0.6730036991692576, "learning_rate": 3.16418270554332e-06, "loss": 0.0286, "step": 28435 }, { "epoch": 3.3719909877860785, "grad_norm": 0.5376573924377342, "learning_rate": 3.163013962588629e-06, "loss": 0.0246, "step": 28436 }, { "epoch": 3.3721095695482033, "grad_norm": 0.5114635542827333, "learning_rate": 3.1618454209441567e-06, "loss": 0.02, "step": 28437 }, { "epoch": 3.3722281513103285, "grad_norm": 0.44257961798276235, "learning_rate": 3.1606770806207027e-06, "loss": 0.0225, "step": 28438 }, { "epoch": 3.3723467330724537, "grad_norm": 0.4420748220061696, "learning_rate": 3.159508941629019e-06, "loss": 0.0228, "step": 28439 }, { "epoch": 3.3724653148345785, "grad_norm": 0.5399353813283471, "learning_rate": 3.1583410039798813e-06, "loss": 0.0235, "step": 28440 }, { "epoch": 3.372583896596703, "grad_norm": 0.4596826377107692, "learning_rate": 3.157173267684055e-06, "loss": 0.0188, "step": 28441 }, { "epoch": 3.3727024783588284, "grad_norm": 0.5556010185875379, "learning_rate": 3.1560057327523067e-06, "loss": 0.0243, "step": 28442 }, { "epoch": 3.3728210601209536, "grad_norm": 0.4124598612697137, "learning_rate": 3.154838399195403e-06, "loss": 0.0188, "step": 28443 }, { "epoch": 3.3729396418830784, "grad_norm": 0.9292966510454848, "learning_rate": 3.153671267024089e-06, "loss": 0.0458, "step": 28444 }, { "epoch": 3.373058223645203, "grad_norm": 0.6636125021113544, "learning_rate": 3.1525043362491483e-06, "loss": 0.0348, "step": 28445 }, { "epoch": 3.3731768054073283, "grad_norm": 0.4417635986660614, "learning_rate": 3.1513376068813217e-06, "loss": 0.0194, "step": 28446 }, { "epoch": 3.3732953871694535, "grad_norm": 0.5292581327973719, "learning_rate": 3.1501710789313744e-06, "loss": 0.0267, "step": 28447 }, { "epoch": 3.3734139689315783, "grad_norm": 0.6896506311269708, "learning_rate": 3.1490047524100454e-06, "loss": 0.0351, "step": 28448 }, { "epoch": 3.3735325506937035, "grad_norm": 0.5166754080899334, "learning_rate": 3.1478386273281065e-06, "loss": 0.0287, "step": 28449 }, { "epoch": 3.3736511324558283, "grad_norm": 0.5190649859280394, "learning_rate": 3.1466727036962933e-06, "loss": 0.0289, "step": 28450 }, { "epoch": 3.3737697142179535, "grad_norm": 0.7088445789581067, "learning_rate": 3.145506981525362e-06, "loss": 0.037, "step": 28451 }, { "epoch": 3.3738882959800782, "grad_norm": 0.7678390554756007, "learning_rate": 3.144341460826053e-06, "loss": 0.0479, "step": 28452 }, { "epoch": 3.3740068777422034, "grad_norm": 0.688499594544058, "learning_rate": 3.1431761416091166e-06, "loss": 0.0236, "step": 28453 }, { "epoch": 3.374125459504328, "grad_norm": 0.4473603931090516, "learning_rate": 3.1420110238852994e-06, "loss": 0.0218, "step": 28454 }, { "epoch": 3.3742440412664534, "grad_norm": 0.8679182307745145, "learning_rate": 3.1408461076653297e-06, "loss": 0.0309, "step": 28455 }, { "epoch": 3.374362623028578, "grad_norm": 0.4122406273323114, "learning_rate": 3.139681392959956e-06, "loss": 0.0197, "step": 28456 }, { "epoch": 3.3744812047907033, "grad_norm": 0.6535763755778339, "learning_rate": 3.13851687977991e-06, "loss": 0.0352, "step": 28457 }, { "epoch": 3.374599786552828, "grad_norm": 0.6526485985418939, "learning_rate": 3.137352568135937e-06, "loss": 0.0243, "step": 28458 }, { "epoch": 3.3747183683149533, "grad_norm": 0.6067298192825716, "learning_rate": 3.1361884580387526e-06, "loss": 0.0368, "step": 28459 }, { "epoch": 3.374836950077078, "grad_norm": 0.527718591706365, "learning_rate": 3.1350245494991104e-06, "loss": 0.0261, "step": 28460 }, { "epoch": 3.3749555318392033, "grad_norm": 0.569320488075551, "learning_rate": 3.133860842527722e-06, "loss": 0.0261, "step": 28461 }, { "epoch": 3.375074113601328, "grad_norm": 0.5693536778135765, "learning_rate": 3.1326973371353263e-06, "loss": 0.0234, "step": 28462 }, { "epoch": 3.3751926953634532, "grad_norm": 1.4072890344695894, "learning_rate": 3.1315340333326422e-06, "loss": 0.0374, "step": 28463 }, { "epoch": 3.375311277125578, "grad_norm": 0.4142097093638965, "learning_rate": 3.1303709311303997e-06, "loss": 0.0195, "step": 28464 }, { "epoch": 3.375429858887703, "grad_norm": 0.43787847355136983, "learning_rate": 3.1292080305393213e-06, "loss": 0.0193, "step": 28465 }, { "epoch": 3.375548440649828, "grad_norm": 0.48339464682531585, "learning_rate": 3.1280453315701232e-06, "loss": 0.0357, "step": 28466 }, { "epoch": 3.375667022411953, "grad_norm": 0.26733116868304896, "learning_rate": 3.1268828342335243e-06, "loss": 0.0113, "step": 28467 }, { "epoch": 3.375785604174078, "grad_norm": 0.6215951816401758, "learning_rate": 3.1257205385402446e-06, "loss": 0.0374, "step": 28468 }, { "epoch": 3.375904185936203, "grad_norm": 0.43580281474752947, "learning_rate": 3.1245584445009994e-06, "loss": 0.022, "step": 28469 }, { "epoch": 3.376022767698328, "grad_norm": 0.23459955385165523, "learning_rate": 3.1233965521264923e-06, "loss": 0.0123, "step": 28470 }, { "epoch": 3.376141349460453, "grad_norm": 0.6369846488020747, "learning_rate": 3.1222348614274506e-06, "loss": 0.0307, "step": 28471 }, { "epoch": 3.376259931222578, "grad_norm": 0.3498761004309446, "learning_rate": 3.1210733724145735e-06, "loss": 0.0169, "step": 28472 }, { "epoch": 3.376378512984703, "grad_norm": 0.4570431395795493, "learning_rate": 3.1199120850985668e-06, "loss": 0.0319, "step": 28473 }, { "epoch": 3.376497094746828, "grad_norm": 0.4887829870089527, "learning_rate": 3.1187509994901415e-06, "loss": 0.023, "step": 28474 }, { "epoch": 3.376615676508953, "grad_norm": 0.6417993175401068, "learning_rate": 3.117590115599997e-06, "loss": 0.0344, "step": 28475 }, { "epoch": 3.3767342582710778, "grad_norm": 0.6244304186758831, "learning_rate": 3.1164294334388442e-06, "loss": 0.0237, "step": 28476 }, { "epoch": 3.376852840033203, "grad_norm": 0.3518242874507578, "learning_rate": 3.115268953017372e-06, "loss": 0.0155, "step": 28477 }, { "epoch": 3.3769714217953277, "grad_norm": 0.32975117877057153, "learning_rate": 3.1141086743462835e-06, "loss": 0.0129, "step": 28478 }, { "epoch": 3.377090003557453, "grad_norm": 0.47302257218793275, "learning_rate": 3.1129485974362744e-06, "loss": 0.0234, "step": 28479 }, { "epoch": 3.3772085853195777, "grad_norm": 0.6532258642357543, "learning_rate": 3.1117887222980395e-06, "loss": 0.0214, "step": 28480 }, { "epoch": 3.377327167081703, "grad_norm": 0.6454416432368102, "learning_rate": 3.110629048942276e-06, "loss": 0.0283, "step": 28481 }, { "epoch": 3.3774457488438276, "grad_norm": 0.5086395791158544, "learning_rate": 3.109469577379659e-06, "loss": 0.0294, "step": 28482 }, { "epoch": 3.377564330605953, "grad_norm": 0.4356813096903233, "learning_rate": 3.108310307620901e-06, "loss": 0.0169, "step": 28483 }, { "epoch": 3.3776829123680776, "grad_norm": 0.7241730774057775, "learning_rate": 3.10715123967667e-06, "loss": 0.0375, "step": 28484 }, { "epoch": 3.377801494130203, "grad_norm": 0.7138234509594876, "learning_rate": 3.105992373557659e-06, "loss": 0.0207, "step": 28485 }, { "epoch": 3.3779200758923276, "grad_norm": 0.5548472126614545, "learning_rate": 3.104833709274549e-06, "loss": 0.025, "step": 28486 }, { "epoch": 3.3780386576544528, "grad_norm": 0.4566426878968168, "learning_rate": 3.103675246838028e-06, "loss": 0.0152, "step": 28487 }, { "epoch": 3.378157239416578, "grad_norm": 0.43267343007422204, "learning_rate": 3.102516986258766e-06, "loss": 0.0223, "step": 28488 }, { "epoch": 3.3782758211787027, "grad_norm": 0.2683118039541651, "learning_rate": 3.101358927547443e-06, "loss": 0.0121, "step": 28489 }, { "epoch": 3.3783944029408275, "grad_norm": 0.36478009995873045, "learning_rate": 3.1002010707147394e-06, "loss": 0.0168, "step": 28490 }, { "epoch": 3.3785129847029527, "grad_norm": 0.5371761480370094, "learning_rate": 3.0990434157713242e-06, "loss": 0.0218, "step": 28491 }, { "epoch": 3.378631566465078, "grad_norm": 0.5675836006070906, "learning_rate": 3.0978859627278784e-06, "loss": 0.0325, "step": 28492 }, { "epoch": 3.3787501482272027, "grad_norm": 0.4616901656889574, "learning_rate": 3.0967287115950538e-06, "loss": 0.0185, "step": 28493 }, { "epoch": 3.3788687299893274, "grad_norm": 0.3669979475060291, "learning_rate": 3.0955716623835424e-06, "loss": 0.0165, "step": 28494 }, { "epoch": 3.3789873117514526, "grad_norm": 0.543638411427695, "learning_rate": 3.0944148151039966e-06, "loss": 0.0248, "step": 28495 }, { "epoch": 3.379105893513578, "grad_norm": 0.3697925606854395, "learning_rate": 3.093258169767085e-06, "loss": 0.0177, "step": 28496 }, { "epoch": 3.3792244752757026, "grad_norm": 0.8976596592572345, "learning_rate": 3.0921017263834617e-06, "loss": 0.043, "step": 28497 }, { "epoch": 3.3793430570378273, "grad_norm": 0.31517713460237234, "learning_rate": 3.090945484963803e-06, "loss": 0.0192, "step": 28498 }, { "epoch": 3.3794616387999525, "grad_norm": 0.6067885846577701, "learning_rate": 3.0897894455187566e-06, "loss": 0.0314, "step": 28499 }, { "epoch": 3.3795802205620777, "grad_norm": 0.6468297468193335, "learning_rate": 3.0886336080589856e-06, "loss": 0.0295, "step": 28500 }, { "epoch": 3.3796988023242025, "grad_norm": 0.4220850341505235, "learning_rate": 3.0874779725951404e-06, "loss": 0.0203, "step": 28501 }, { "epoch": 3.3798173840863277, "grad_norm": 0.5511033620321742, "learning_rate": 3.0863225391378785e-06, "loss": 0.0225, "step": 28502 }, { "epoch": 3.3799359658484525, "grad_norm": 0.5922528944882812, "learning_rate": 3.085167307697856e-06, "loss": 0.0279, "step": 28503 }, { "epoch": 3.3800545476105777, "grad_norm": 0.37008938199835095, "learning_rate": 3.084012278285706e-06, "loss": 0.0166, "step": 28504 }, { "epoch": 3.3801731293727024, "grad_norm": 0.4149916496624296, "learning_rate": 3.0828574509120967e-06, "loss": 0.0215, "step": 28505 }, { "epoch": 3.3802917111348276, "grad_norm": 0.5543020112087341, "learning_rate": 3.081702825587662e-06, "loss": 0.0244, "step": 28506 }, { "epoch": 3.3804102928969524, "grad_norm": 0.49438674061110593, "learning_rate": 3.0805484023230525e-06, "loss": 0.027, "step": 28507 }, { "epoch": 3.3805288746590776, "grad_norm": 0.6006308766364582, "learning_rate": 3.0793941811288973e-06, "loss": 0.0267, "step": 28508 }, { "epoch": 3.3806474564212023, "grad_norm": 0.44558145820396783, "learning_rate": 3.078240162015861e-06, "loss": 0.0168, "step": 28509 }, { "epoch": 3.3807660381833275, "grad_norm": 0.4928828141509471, "learning_rate": 3.077086344994559e-06, "loss": 0.0242, "step": 28510 }, { "epoch": 3.3808846199454523, "grad_norm": 0.6114988503180125, "learning_rate": 3.075932730075637e-06, "loss": 0.031, "step": 28511 }, { "epoch": 3.3810032017075775, "grad_norm": 0.38883852734764823, "learning_rate": 3.0747793172697326e-06, "loss": 0.0185, "step": 28512 }, { "epoch": 3.3811217834697023, "grad_norm": 0.7298093245026348, "learning_rate": 3.0736261065874768e-06, "loss": 0.0379, "step": 28513 }, { "epoch": 3.3812403652318275, "grad_norm": 0.7660670564277019, "learning_rate": 3.072473098039502e-06, "loss": 0.0426, "step": 28514 }, { "epoch": 3.3813589469939522, "grad_norm": 1.3600806465206503, "learning_rate": 3.0713202916364285e-06, "loss": 0.035, "step": 28515 }, { "epoch": 3.3814775287560774, "grad_norm": 0.4374321269039058, "learning_rate": 3.0701676873889e-06, "loss": 0.0204, "step": 28516 }, { "epoch": 3.381596110518202, "grad_norm": 0.5846536735631263, "learning_rate": 3.0690152853075306e-06, "loss": 0.0343, "step": 28517 }, { "epoch": 3.3817146922803274, "grad_norm": 0.5876342603898199, "learning_rate": 3.06786308540295e-06, "loss": 0.0273, "step": 28518 }, { "epoch": 3.381833274042452, "grad_norm": 0.5855117882747183, "learning_rate": 3.066711087685767e-06, "loss": 0.0302, "step": 28519 }, { "epoch": 3.3819518558045774, "grad_norm": 0.6912885486769447, "learning_rate": 3.06555929216662e-06, "loss": 0.0222, "step": 28520 }, { "epoch": 3.382070437566702, "grad_norm": 0.6877245691881002, "learning_rate": 3.0644076988561144e-06, "loss": 0.0251, "step": 28521 }, { "epoch": 3.3821890193288273, "grad_norm": 0.7840184341305153, "learning_rate": 3.0632563077648724e-06, "loss": 0.0404, "step": 28522 }, { "epoch": 3.382307601090952, "grad_norm": 0.5788076471788576, "learning_rate": 3.062105118903505e-06, "loss": 0.0262, "step": 28523 }, { "epoch": 3.3824261828530773, "grad_norm": 0.394066506606561, "learning_rate": 3.0609541322826257e-06, "loss": 0.0183, "step": 28524 }, { "epoch": 3.382544764615202, "grad_norm": 0.40551340727886437, "learning_rate": 3.059803347912851e-06, "loss": 0.0218, "step": 28525 }, { "epoch": 3.3826633463773272, "grad_norm": 0.6251794056244776, "learning_rate": 3.0586527658047814e-06, "loss": 0.033, "step": 28526 }, { "epoch": 3.382781928139452, "grad_norm": 0.7156211673781362, "learning_rate": 3.0575023859690273e-06, "loss": 0.03, "step": 28527 }, { "epoch": 3.382900509901577, "grad_norm": 0.6141573079290787, "learning_rate": 3.0563522084161913e-06, "loss": 0.0246, "step": 28528 }, { "epoch": 3.383019091663702, "grad_norm": 0.7519397938833405, "learning_rate": 3.0552022331568785e-06, "loss": 0.0374, "step": 28529 }, { "epoch": 3.383137673425827, "grad_norm": 0.68458664684316, "learning_rate": 3.054052460201692e-06, "loss": 0.0417, "step": 28530 }, { "epoch": 3.383256255187952, "grad_norm": 0.45910169399247563, "learning_rate": 3.0529028895612294e-06, "loss": 0.0276, "step": 28531 }, { "epoch": 3.383374836950077, "grad_norm": 0.6582651522704862, "learning_rate": 3.0517535212460953e-06, "loss": 0.0331, "step": 28532 }, { "epoch": 3.383493418712202, "grad_norm": 0.3792838573999262, "learning_rate": 3.050604355266873e-06, "loss": 0.0186, "step": 28533 }, { "epoch": 3.383612000474327, "grad_norm": 0.5153481532205052, "learning_rate": 3.0494553916341624e-06, "loss": 0.0154, "step": 28534 }, { "epoch": 3.383730582236452, "grad_norm": 0.6332568811313432, "learning_rate": 3.048306630358555e-06, "loss": 0.02, "step": 28535 }, { "epoch": 3.383849163998577, "grad_norm": 0.5699044294198763, "learning_rate": 3.047158071450648e-06, "loss": 0.0233, "step": 28536 }, { "epoch": 3.3839677457607023, "grad_norm": 0.4780191795962937, "learning_rate": 3.0460097149210166e-06, "loss": 0.0183, "step": 28537 }, { "epoch": 3.384086327522827, "grad_norm": 0.8017907093842397, "learning_rate": 3.0448615607802543e-06, "loss": 0.0352, "step": 28538 }, { "epoch": 3.3842049092849518, "grad_norm": 0.6495251710810469, "learning_rate": 3.0437136090389477e-06, "loss": 0.0428, "step": 28539 }, { "epoch": 3.384323491047077, "grad_norm": 0.39437320077632904, "learning_rate": 3.0425658597076747e-06, "loss": 0.0193, "step": 28540 }, { "epoch": 3.384442072809202, "grad_norm": 0.6892678525007567, "learning_rate": 3.0414183127970175e-06, "loss": 0.0266, "step": 28541 }, { "epoch": 3.384560654571327, "grad_norm": 0.4914058219162297, "learning_rate": 3.0402709683175574e-06, "loss": 0.0195, "step": 28542 }, { "epoch": 3.3846792363334517, "grad_norm": 0.8762415428217719, "learning_rate": 3.039123826279874e-06, "loss": 0.0422, "step": 28543 }, { "epoch": 3.384797818095577, "grad_norm": 0.46491471289713715, "learning_rate": 3.037976886694535e-06, "loss": 0.0176, "step": 28544 }, { "epoch": 3.384916399857702, "grad_norm": 0.6101522633312086, "learning_rate": 3.0368301495721173e-06, "loss": 0.0263, "step": 28545 }, { "epoch": 3.385034981619827, "grad_norm": 0.5173887925331193, "learning_rate": 3.035683614923193e-06, "loss": 0.0236, "step": 28546 }, { "epoch": 3.3851535633819516, "grad_norm": 0.468876658715073, "learning_rate": 3.0345372827583374e-06, "loss": 0.0206, "step": 28547 }, { "epoch": 3.385272145144077, "grad_norm": 0.6702770349196162, "learning_rate": 3.0333911530881054e-06, "loss": 0.0421, "step": 28548 }, { "epoch": 3.385390726906202, "grad_norm": 0.5032724958269145, "learning_rate": 3.0322452259230694e-06, "loss": 0.0263, "step": 28549 }, { "epoch": 3.3855093086683268, "grad_norm": 0.5221901842255368, "learning_rate": 3.031099501273793e-06, "loss": 0.0167, "step": 28550 }, { "epoch": 3.385627890430452, "grad_norm": 0.6018449247977332, "learning_rate": 3.0299539791508403e-06, "loss": 0.0288, "step": 28551 }, { "epoch": 3.3857464721925767, "grad_norm": 0.5978504920010301, "learning_rate": 3.0288086595647773e-06, "loss": 0.0225, "step": 28552 }, { "epoch": 3.385865053954702, "grad_norm": 0.28515297412779, "learning_rate": 3.0276635425261433e-06, "loss": 0.0142, "step": 28553 }, { "epoch": 3.3859836357168267, "grad_norm": 0.480453568934314, "learning_rate": 3.0265186280455187e-06, "loss": 0.0258, "step": 28554 }, { "epoch": 3.386102217478952, "grad_norm": 0.7541082327008836, "learning_rate": 3.025373916133439e-06, "loss": 0.0342, "step": 28555 }, { "epoch": 3.3862207992410767, "grad_norm": 0.37458512520233683, "learning_rate": 3.024229406800469e-06, "loss": 0.0143, "step": 28556 }, { "epoch": 3.386339381003202, "grad_norm": 0.9195313265089391, "learning_rate": 3.023085100057152e-06, "loss": 0.0399, "step": 28557 }, { "epoch": 3.3864579627653266, "grad_norm": 0.3366782488704065, "learning_rate": 3.0219409959140464e-06, "loss": 0.0144, "step": 28558 }, { "epoch": 3.386576544527452, "grad_norm": 0.6836191543354585, "learning_rate": 3.020797094381689e-06, "loss": 0.0285, "step": 28559 }, { "epoch": 3.3866951262895766, "grad_norm": 0.44249499486776117, "learning_rate": 3.0196533954706285e-06, "loss": 0.0273, "step": 28560 }, { "epoch": 3.386813708051702, "grad_norm": 0.5175912213418122, "learning_rate": 3.01850989919141e-06, "loss": 0.0307, "step": 28561 }, { "epoch": 3.3869322898138265, "grad_norm": 0.36283318955126653, "learning_rate": 3.0173666055545753e-06, "loss": 0.017, "step": 28562 }, { "epoch": 3.3870508715759517, "grad_norm": 0.6129970344475152, "learning_rate": 3.0162235145706706e-06, "loss": 0.0261, "step": 28563 }, { "epoch": 3.3871694533380765, "grad_norm": 0.6120879170777062, "learning_rate": 3.0150806262502135e-06, "loss": 0.0227, "step": 28564 }, { "epoch": 3.3872880351002017, "grad_norm": 0.649002866624606, "learning_rate": 3.0139379406037643e-06, "loss": 0.0277, "step": 28565 }, { "epoch": 3.3874066168623265, "grad_norm": 0.811578493061749, "learning_rate": 3.0127954576418427e-06, "loss": 0.0543, "step": 28566 }, { "epoch": 3.3875251986244517, "grad_norm": 0.4934331245213893, "learning_rate": 3.011653177374987e-06, "loss": 0.0227, "step": 28567 }, { "epoch": 3.3876437803865764, "grad_norm": 0.7799368960525657, "learning_rate": 3.0105110998137174e-06, "loss": 0.0437, "step": 28568 }, { "epoch": 3.3877623621487016, "grad_norm": 0.8159969418077485, "learning_rate": 3.009369224968578e-06, "loss": 0.032, "step": 28569 }, { "epoch": 3.3878809439108264, "grad_norm": 0.5669185020726558, "learning_rate": 3.0082275528500846e-06, "loss": 0.0288, "step": 28570 }, { "epoch": 3.3879995256729516, "grad_norm": 0.6318801417039888, "learning_rate": 3.0070860834687633e-06, "loss": 0.0314, "step": 28571 }, { "epoch": 3.3881181074350764, "grad_norm": 0.4438036568868938, "learning_rate": 3.0059448168351404e-06, "loss": 0.028, "step": 28572 }, { "epoch": 3.3882366891972016, "grad_norm": 0.42910173861823303, "learning_rate": 3.004803752959734e-06, "loss": 0.0258, "step": 28573 }, { "epoch": 3.3883552709593263, "grad_norm": 0.5552018690210471, "learning_rate": 3.003662891853071e-06, "loss": 0.0274, "step": 28574 }, { "epoch": 3.3884738527214515, "grad_norm": 0.47877168693822686, "learning_rate": 3.002522233525651e-06, "loss": 0.0211, "step": 28575 }, { "epoch": 3.3885924344835763, "grad_norm": 0.44307893959992223, "learning_rate": 3.001381777988013e-06, "loss": 0.0164, "step": 28576 }, { "epoch": 3.3887110162457015, "grad_norm": 0.6094306459838031, "learning_rate": 3.000241525250652e-06, "loss": 0.019, "step": 28577 }, { "epoch": 3.3888295980078262, "grad_norm": 0.6326983375447719, "learning_rate": 2.9991014753240928e-06, "loss": 0.0245, "step": 28578 }, { "epoch": 3.3889481797699514, "grad_norm": 0.48277089995279043, "learning_rate": 2.997961628218826e-06, "loss": 0.0195, "step": 28579 }, { "epoch": 3.389066761532076, "grad_norm": 0.4491159158360087, "learning_rate": 2.9968219839453833e-06, "loss": 0.0257, "step": 28580 }, { "epoch": 3.3891853432942014, "grad_norm": 0.6088936275573151, "learning_rate": 2.9956825425142553e-06, "loss": 0.0244, "step": 28581 }, { "epoch": 3.389303925056326, "grad_norm": 0.43700254742065653, "learning_rate": 2.994543303935951e-06, "loss": 0.0211, "step": 28582 }, { "epoch": 3.3894225068184514, "grad_norm": 0.5873547805583724, "learning_rate": 2.993404268220973e-06, "loss": 0.0261, "step": 28583 }, { "epoch": 3.389541088580576, "grad_norm": 0.5687995995867917, "learning_rate": 2.992265435379821e-06, "loss": 0.0212, "step": 28584 }, { "epoch": 3.3896596703427013, "grad_norm": 0.33213835616664406, "learning_rate": 2.991126805423e-06, "loss": 0.0179, "step": 28585 }, { "epoch": 3.389778252104826, "grad_norm": 0.6139577074634958, "learning_rate": 2.989988378360989e-06, "loss": 0.0262, "step": 28586 }, { "epoch": 3.3898968338669513, "grad_norm": 0.395223094504496, "learning_rate": 2.9888501542043036e-06, "loss": 0.0165, "step": 28587 }, { "epoch": 3.390015415629076, "grad_norm": 0.38388007384236544, "learning_rate": 2.9877121329634243e-06, "loss": 0.0207, "step": 28588 }, { "epoch": 3.3901339973912012, "grad_norm": 0.5811314458945505, "learning_rate": 2.986574314648846e-06, "loss": 0.0246, "step": 28589 }, { "epoch": 3.3902525791533265, "grad_norm": 0.458380743905067, "learning_rate": 2.9854366992710604e-06, "loss": 0.0233, "step": 28590 }, { "epoch": 3.390371160915451, "grad_norm": 0.6895834324025868, "learning_rate": 2.98429928684055e-06, "loss": 0.0323, "step": 28591 }, { "epoch": 3.390489742677576, "grad_norm": 0.38056761231045466, "learning_rate": 2.983162077367807e-06, "loss": 0.0241, "step": 28592 }, { "epoch": 3.390608324439701, "grad_norm": 0.7494543189333758, "learning_rate": 2.9820250708633062e-06, "loss": 0.0412, "step": 28593 }, { "epoch": 3.3907269062018264, "grad_norm": 0.5935450247205594, "learning_rate": 2.9808882673375338e-06, "loss": 0.0224, "step": 28594 }, { "epoch": 3.390845487963951, "grad_norm": 0.659041926415701, "learning_rate": 2.979751666800973e-06, "loss": 0.0328, "step": 28595 }, { "epoch": 3.390964069726076, "grad_norm": 0.9736729824588117, "learning_rate": 2.978615269264101e-06, "loss": 0.0485, "step": 28596 }, { "epoch": 3.391082651488201, "grad_norm": 0.7846735911940006, "learning_rate": 2.9774790747373854e-06, "loss": 0.0343, "step": 28597 }, { "epoch": 3.3912012332503263, "grad_norm": 0.35992576057555065, "learning_rate": 2.976343083231309e-06, "loss": 0.0159, "step": 28598 }, { "epoch": 3.391319815012451, "grad_norm": 0.5397481909677259, "learning_rate": 2.9752072947563436e-06, "loss": 0.0248, "step": 28599 }, { "epoch": 3.3914383967745763, "grad_norm": 0.3529270221009212, "learning_rate": 2.9740717093229565e-06, "loss": 0.0163, "step": 28600 }, { "epoch": 3.391556978536701, "grad_norm": 0.8788380016502906, "learning_rate": 2.9729363269416167e-06, "loss": 0.0346, "step": 28601 }, { "epoch": 3.391675560298826, "grad_norm": 0.502652697296164, "learning_rate": 2.9718011476227963e-06, "loss": 0.0257, "step": 28602 }, { "epoch": 3.391794142060951, "grad_norm": 0.4638454187805047, "learning_rate": 2.9706661713769597e-06, "loss": 0.019, "step": 28603 }, { "epoch": 3.391912723823076, "grad_norm": 0.48740349038982894, "learning_rate": 2.9695313982145618e-06, "loss": 0.0194, "step": 28604 }, { "epoch": 3.392031305585201, "grad_norm": 0.6524430506421464, "learning_rate": 2.9683968281460668e-06, "loss": 0.0258, "step": 28605 }, { "epoch": 3.392149887347326, "grad_norm": 0.43633497038033103, "learning_rate": 2.9672624611819385e-06, "loss": 0.0183, "step": 28606 }, { "epoch": 3.392268469109451, "grad_norm": 0.6807255380889069, "learning_rate": 2.966128297332638e-06, "loss": 0.0239, "step": 28607 }, { "epoch": 3.392387050871576, "grad_norm": 0.40499563380239534, "learning_rate": 2.9649943366086065e-06, "loss": 0.0228, "step": 28608 }, { "epoch": 3.392505632633701, "grad_norm": 0.6769493052106506, "learning_rate": 2.9638605790203084e-06, "loss": 0.0276, "step": 28609 }, { "epoch": 3.392624214395826, "grad_norm": 0.4864641800728007, "learning_rate": 2.9627270245781934e-06, "loss": 0.0235, "step": 28610 }, { "epoch": 3.392742796157951, "grad_norm": 0.3157594933234778, "learning_rate": 2.9615936732927117e-06, "loss": 0.0167, "step": 28611 }, { "epoch": 3.392861377920076, "grad_norm": 0.7681573134272002, "learning_rate": 2.9604605251743136e-06, "loss": 0.0366, "step": 28612 }, { "epoch": 3.392979959682201, "grad_norm": 0.3143475894178532, "learning_rate": 2.959327580233434e-06, "loss": 0.0114, "step": 28613 }, { "epoch": 3.393098541444326, "grad_norm": 0.38110571613980815, "learning_rate": 2.958194838480538e-06, "loss": 0.019, "step": 28614 }, { "epoch": 3.3932171232064507, "grad_norm": 0.9002936133825741, "learning_rate": 2.95706229992605e-06, "loss": 0.0485, "step": 28615 }, { "epoch": 3.393335704968576, "grad_norm": 0.518610290603584, "learning_rate": 2.9559299645804143e-06, "loss": 0.034, "step": 28616 }, { "epoch": 3.3934542867307007, "grad_norm": 0.7262468305035318, "learning_rate": 2.954797832454076e-06, "loss": 0.0275, "step": 28617 }, { "epoch": 3.393572868492826, "grad_norm": 1.0683394529543673, "learning_rate": 2.9536659035574704e-06, "loss": 0.0455, "step": 28618 }, { "epoch": 3.3936914502549507, "grad_norm": 0.44493616487249404, "learning_rate": 2.9525341779010257e-06, "loss": 0.0218, "step": 28619 }, { "epoch": 3.393810032017076, "grad_norm": 0.6416362502701769, "learning_rate": 2.9514026554951808e-06, "loss": 0.0239, "step": 28620 }, { "epoch": 3.3939286137792006, "grad_norm": 0.6421022340867363, "learning_rate": 2.950271336350366e-06, "loss": 0.028, "step": 28621 }, { "epoch": 3.394047195541326, "grad_norm": 0.5458582211641794, "learning_rate": 2.9491402204770062e-06, "loss": 0.0196, "step": 28622 }, { "epoch": 3.3941657773034506, "grad_norm": 0.5681306567718764, "learning_rate": 2.9480093078855437e-06, "loss": 0.0232, "step": 28623 }, { "epoch": 3.394284359065576, "grad_norm": 0.39089727283308057, "learning_rate": 2.946878598586378e-06, "loss": 0.0183, "step": 28624 }, { "epoch": 3.3944029408277006, "grad_norm": 0.481361705634616, "learning_rate": 2.945748092589962e-06, "loss": 0.0244, "step": 28625 }, { "epoch": 3.3945215225898258, "grad_norm": 0.6297202883627535, "learning_rate": 2.9446177899066977e-06, "loss": 0.0299, "step": 28626 }, { "epoch": 3.3946401043519505, "grad_norm": 0.577305025225877, "learning_rate": 2.9434876905470116e-06, "loss": 0.0293, "step": 28627 }, { "epoch": 3.3947586861140757, "grad_norm": 0.5123969946188941, "learning_rate": 2.942357794521319e-06, "loss": 0.0284, "step": 28628 }, { "epoch": 3.3948772678762005, "grad_norm": 0.7803996576933145, "learning_rate": 2.941228101840049e-06, "loss": 0.0411, "step": 28629 }, { "epoch": 3.3949958496383257, "grad_norm": 0.3482383536339463, "learning_rate": 2.9400986125135947e-06, "loss": 0.0183, "step": 28630 }, { "epoch": 3.3951144314004504, "grad_norm": 0.45614559561568346, "learning_rate": 2.938969326552385e-06, "loss": 0.0216, "step": 28631 }, { "epoch": 3.3952330131625756, "grad_norm": 0.5521323949368797, "learning_rate": 2.9378402439668217e-06, "loss": 0.0233, "step": 28632 }, { "epoch": 3.3953515949247004, "grad_norm": 0.36526326137449017, "learning_rate": 2.9367113647673165e-06, "loss": 0.0207, "step": 28633 }, { "epoch": 3.3954701766868256, "grad_norm": 0.5737516588126739, "learning_rate": 2.9355826889642833e-06, "loss": 0.024, "step": 28634 }, { "epoch": 3.3955887584489504, "grad_norm": 0.3087021232916764, "learning_rate": 2.934454216568111e-06, "loss": 0.0143, "step": 28635 }, { "epoch": 3.3957073402110756, "grad_norm": 0.3622767919799822, "learning_rate": 2.9333259475892217e-06, "loss": 0.0138, "step": 28636 }, { "epoch": 3.3958259219732003, "grad_norm": 0.5712224121150481, "learning_rate": 2.9321978820380015e-06, "loss": 0.0228, "step": 28637 }, { "epoch": 3.3959445037353255, "grad_norm": 0.7700770611386845, "learning_rate": 2.9310700199248558e-06, "loss": 0.0458, "step": 28638 }, { "epoch": 3.3960630854974507, "grad_norm": 0.3992156089903412, "learning_rate": 2.929942361260182e-06, "loss": 0.0199, "step": 28639 }, { "epoch": 3.3961816672595755, "grad_norm": 0.5946220910984557, "learning_rate": 2.928814906054375e-06, "loss": 0.032, "step": 28640 }, { "epoch": 3.3963002490217002, "grad_norm": 0.5511204183933522, "learning_rate": 2.9276876543178345e-06, "loss": 0.0209, "step": 28641 }, { "epoch": 3.3964188307838254, "grad_norm": 0.5646106224472268, "learning_rate": 2.9265606060609378e-06, "loss": 0.0375, "step": 28642 }, { "epoch": 3.3965374125459507, "grad_norm": 0.603810356242826, "learning_rate": 2.9254337612940915e-06, "loss": 0.0385, "step": 28643 }, { "epoch": 3.3966559943080754, "grad_norm": 0.48866882788206484, "learning_rate": 2.924307120027675e-06, "loss": 0.0232, "step": 28644 }, { "epoch": 3.3967745760702, "grad_norm": 0.6315066786602354, "learning_rate": 2.9231806822720782e-06, "loss": 0.0407, "step": 28645 }, { "epoch": 3.3968931578323254, "grad_norm": 0.2957208871750297, "learning_rate": 2.9220544480376725e-06, "loss": 0.011, "step": 28646 }, { "epoch": 3.3970117395944506, "grad_norm": 0.6021405693110974, "learning_rate": 2.9209284173348646e-06, "loss": 0.0226, "step": 28647 }, { "epoch": 3.3971303213565753, "grad_norm": 0.7734052768641267, "learning_rate": 2.9198025901740143e-06, "loss": 0.0305, "step": 28648 }, { "epoch": 3.3972489031187005, "grad_norm": 0.3960983640295445, "learning_rate": 2.918676966565506e-06, "loss": 0.0155, "step": 28649 }, { "epoch": 3.3973674848808253, "grad_norm": 0.4659437203646461, "learning_rate": 2.91755154651972e-06, "loss": 0.0285, "step": 28650 }, { "epoch": 3.3974860666429505, "grad_norm": 0.46839622667816094, "learning_rate": 2.9164263300470312e-06, "loss": 0.0207, "step": 28651 }, { "epoch": 3.3976046484050753, "grad_norm": 0.5332526289906219, "learning_rate": 2.915301317157812e-06, "loss": 0.0219, "step": 28652 }, { "epoch": 3.3977232301672005, "grad_norm": 0.7311272150138705, "learning_rate": 2.9141765078624316e-06, "loss": 0.0252, "step": 28653 }, { "epoch": 3.397841811929325, "grad_norm": 0.5621958718433572, "learning_rate": 2.9130519021712573e-06, "loss": 0.0233, "step": 28654 }, { "epoch": 3.3979603936914504, "grad_norm": 0.5005612486192839, "learning_rate": 2.9119275000946604e-06, "loss": 0.028, "step": 28655 }, { "epoch": 3.398078975453575, "grad_norm": 0.5006970573126122, "learning_rate": 2.9108033016430112e-06, "loss": 0.022, "step": 28656 }, { "epoch": 3.3981975572157004, "grad_norm": 0.39696698543708714, "learning_rate": 2.909679306826657e-06, "loss": 0.0203, "step": 28657 }, { "epoch": 3.398316138977825, "grad_norm": 0.47673646129947733, "learning_rate": 2.9085555156559835e-06, "loss": 0.0218, "step": 28658 }, { "epoch": 3.3984347207399503, "grad_norm": 0.6086722832765928, "learning_rate": 2.9074319281413297e-06, "loss": 0.032, "step": 28659 }, { "epoch": 3.398553302502075, "grad_norm": 0.6567427263626158, "learning_rate": 2.9063085442930625e-06, "loss": 0.032, "step": 28660 }, { "epoch": 3.3986718842642003, "grad_norm": 0.6744118020882897, "learning_rate": 2.905185364121538e-06, "loss": 0.0268, "step": 28661 }, { "epoch": 3.398790466026325, "grad_norm": 0.4697440022651021, "learning_rate": 2.9040623876371106e-06, "loss": 0.0211, "step": 28662 }, { "epoch": 3.3989090477884503, "grad_norm": 0.6018959151604935, "learning_rate": 2.9029396148501366e-06, "loss": 0.0395, "step": 28663 }, { "epoch": 3.399027629550575, "grad_norm": 1.2210200696799538, "learning_rate": 2.9018170457709577e-06, "loss": 0.046, "step": 28664 }, { "epoch": 3.3991462113127002, "grad_norm": 0.5280978147158887, "learning_rate": 2.9006946804099265e-06, "loss": 0.0236, "step": 28665 }, { "epoch": 3.399264793074825, "grad_norm": 0.5848045490975013, "learning_rate": 2.899572518777391e-06, "loss": 0.0204, "step": 28666 }, { "epoch": 3.39938337483695, "grad_norm": 0.48309621305410255, "learning_rate": 2.8984505608836977e-06, "loss": 0.0402, "step": 28667 }, { "epoch": 3.399501956599075, "grad_norm": 0.6583308532617692, "learning_rate": 2.8973288067391856e-06, "loss": 0.0383, "step": 28668 }, { "epoch": 3.3996205383612, "grad_norm": 0.5734113580963313, "learning_rate": 2.896207256354197e-06, "loss": 0.0298, "step": 28669 }, { "epoch": 3.399739120123325, "grad_norm": 0.47443384616648276, "learning_rate": 2.895085909739073e-06, "loss": 0.0233, "step": 28670 }, { "epoch": 3.39985770188545, "grad_norm": 0.36451156341291313, "learning_rate": 2.8939647669041476e-06, "loss": 0.0224, "step": 28671 }, { "epoch": 3.399976283647575, "grad_norm": 0.5124414345982546, "learning_rate": 2.892843827859759e-06, "loss": 0.0357, "step": 28672 }, { "epoch": 3.4000948654097, "grad_norm": 0.5683773318496417, "learning_rate": 2.891723092616244e-06, "loss": 0.0272, "step": 28673 }, { "epoch": 3.400213447171825, "grad_norm": 0.45506808913124047, "learning_rate": 2.8906025611839334e-06, "loss": 0.0187, "step": 28674 }, { "epoch": 3.40033202893395, "grad_norm": 0.3133849655005813, "learning_rate": 2.8894822335731515e-06, "loss": 0.0146, "step": 28675 }, { "epoch": 3.400450610696075, "grad_norm": 0.4830947410719658, "learning_rate": 2.888362109794229e-06, "loss": 0.0257, "step": 28676 }, { "epoch": 3.4005691924582, "grad_norm": 0.5825171109347153, "learning_rate": 2.887242189857492e-06, "loss": 0.0331, "step": 28677 }, { "epoch": 3.4006877742203248, "grad_norm": 0.6193478947462927, "learning_rate": 2.8861224737732702e-06, "loss": 0.0232, "step": 28678 }, { "epoch": 3.40080635598245, "grad_norm": 0.7518627334069364, "learning_rate": 2.8850029615518775e-06, "loss": 0.0236, "step": 28679 }, { "epoch": 3.4009249377445747, "grad_norm": 1.0199032000572348, "learning_rate": 2.883883653203637e-06, "loss": 0.0356, "step": 28680 }, { "epoch": 3.4010435195067, "grad_norm": 0.6217015311258766, "learning_rate": 2.88276454873887e-06, "loss": 0.0274, "step": 28681 }, { "epoch": 3.4011621012688247, "grad_norm": 0.4994664170498962, "learning_rate": 2.881645648167891e-06, "loss": 0.0322, "step": 28682 }, { "epoch": 3.40128068303095, "grad_norm": 0.4968503222000168, "learning_rate": 2.8805269515010223e-06, "loss": 0.0351, "step": 28683 }, { "epoch": 3.4013992647930746, "grad_norm": 0.8536120207142458, "learning_rate": 2.879408458748559e-06, "loss": 0.0367, "step": 28684 }, { "epoch": 3.4015178465552, "grad_norm": 0.415301804105204, "learning_rate": 2.878290169920836e-06, "loss": 0.0179, "step": 28685 }, { "epoch": 3.4016364283173246, "grad_norm": 0.591959388509042, "learning_rate": 2.877172085028143e-06, "loss": 0.0292, "step": 28686 }, { "epoch": 3.40175501007945, "grad_norm": 0.5125800281400903, "learning_rate": 2.876054204080794e-06, "loss": 0.0263, "step": 28687 }, { "epoch": 3.401873591841575, "grad_norm": 0.4914447814770814, "learning_rate": 2.874936527089095e-06, "loss": 0.025, "step": 28688 }, { "epoch": 3.4019921736036998, "grad_norm": 0.8579544891791435, "learning_rate": 2.8738190540633507e-06, "loss": 0.0392, "step": 28689 }, { "epoch": 3.4021107553658245, "grad_norm": 0.3896298029138979, "learning_rate": 2.8727017850138677e-06, "loss": 0.0152, "step": 28690 }, { "epoch": 3.4022293371279497, "grad_norm": 0.7430567314400559, "learning_rate": 2.8715847199509286e-06, "loss": 0.0411, "step": 28691 }, { "epoch": 3.402347918890075, "grad_norm": 0.3837008082579921, "learning_rate": 2.8704678588848538e-06, "loss": 0.0138, "step": 28692 }, { "epoch": 3.4024665006521997, "grad_norm": 0.6069874958664384, "learning_rate": 2.8693512018259235e-06, "loss": 0.0327, "step": 28693 }, { "epoch": 3.4025850824143244, "grad_norm": 0.4837945792605973, "learning_rate": 2.8682347487844404e-06, "loss": 0.0166, "step": 28694 }, { "epoch": 3.4027036641764496, "grad_norm": 0.41542222613773194, "learning_rate": 2.867118499770682e-06, "loss": 0.0158, "step": 28695 }, { "epoch": 3.402822245938575, "grad_norm": 0.5687530140782903, "learning_rate": 2.8660024547949637e-06, "loss": 0.0204, "step": 28696 }, { "epoch": 3.4029408277006996, "grad_norm": 0.6161707337708398, "learning_rate": 2.8648866138675514e-06, "loss": 0.0258, "step": 28697 }, { "epoch": 3.4030594094628244, "grad_norm": 0.47970539394193024, "learning_rate": 2.863770976998742e-06, "loss": 0.0267, "step": 28698 }, { "epoch": 3.4031779912249496, "grad_norm": 0.5275151579705294, "learning_rate": 2.8626555441988196e-06, "loss": 0.0255, "step": 28699 }, { "epoch": 3.4032965729870748, "grad_norm": 0.4362116865883787, "learning_rate": 2.8615403154780674e-06, "loss": 0.02, "step": 28700 }, { "epoch": 3.4034151547491995, "grad_norm": 0.36044038665816625, "learning_rate": 2.860425290846769e-06, "loss": 0.0204, "step": 28701 }, { "epoch": 3.4035337365113247, "grad_norm": 0.40496441429394114, "learning_rate": 2.8593104703151907e-06, "loss": 0.0197, "step": 28702 }, { "epoch": 3.4036523182734495, "grad_norm": 0.5220459545180913, "learning_rate": 2.8581958538936275e-06, "loss": 0.0268, "step": 28703 }, { "epoch": 3.4037709000355747, "grad_norm": 0.6526097441864515, "learning_rate": 2.8570814415923437e-06, "loss": 0.0273, "step": 28704 }, { "epoch": 3.4038894817976995, "grad_norm": 0.5790065441913853, "learning_rate": 2.855967233421619e-06, "loss": 0.0256, "step": 28705 }, { "epoch": 3.4040080635598247, "grad_norm": 0.5436311261108875, "learning_rate": 2.854853229391713e-06, "loss": 0.025, "step": 28706 }, { "epoch": 3.4041266453219494, "grad_norm": 0.4343107809978202, "learning_rate": 2.8537394295129137e-06, "loss": 0.0205, "step": 28707 }, { "epoch": 3.4042452270840746, "grad_norm": 0.5620867583354788, "learning_rate": 2.852625833795475e-06, "loss": 0.0274, "step": 28708 }, { "epoch": 3.4043638088461994, "grad_norm": 0.41442277673357447, "learning_rate": 2.8515124422496653e-06, "loss": 0.0179, "step": 28709 }, { "epoch": 3.4044823906083246, "grad_norm": 0.3443210585825308, "learning_rate": 2.8503992548857494e-06, "loss": 0.0191, "step": 28710 }, { "epoch": 3.4046009723704493, "grad_norm": 0.8257153205727438, "learning_rate": 2.849286271713994e-06, "loss": 0.045, "step": 28711 }, { "epoch": 3.4047195541325745, "grad_norm": 0.5283162930432177, "learning_rate": 2.84817349274466e-06, "loss": 0.0232, "step": 28712 }, { "epoch": 3.4048381358946993, "grad_norm": 0.523276236365917, "learning_rate": 2.8470609179879926e-06, "loss": 0.0289, "step": 28713 }, { "epoch": 3.4049567176568245, "grad_norm": 0.6940582442898007, "learning_rate": 2.8459485474542665e-06, "loss": 0.0368, "step": 28714 }, { "epoch": 3.4050752994189493, "grad_norm": 0.48787674195045616, "learning_rate": 2.8448363811537234e-06, "loss": 0.0182, "step": 28715 }, { "epoch": 3.4051938811810745, "grad_norm": 0.6928582701375778, "learning_rate": 2.8437244190966246e-06, "loss": 0.036, "step": 28716 }, { "epoch": 3.4053124629431992, "grad_norm": 0.45113608373093816, "learning_rate": 2.842612661293209e-06, "loss": 0.0196, "step": 28717 }, { "epoch": 3.4054310447053244, "grad_norm": 0.372123163089793, "learning_rate": 2.8415011077537413e-06, "loss": 0.0149, "step": 28718 }, { "epoch": 3.405549626467449, "grad_norm": 0.3774003618418143, "learning_rate": 2.840389758488457e-06, "loss": 0.0176, "step": 28719 }, { "epoch": 3.4056682082295744, "grad_norm": 0.5791807664568188, "learning_rate": 2.839278613507604e-06, "loss": 0.0314, "step": 28720 }, { "epoch": 3.405786789991699, "grad_norm": 0.5356534075734177, "learning_rate": 2.838167672821429e-06, "loss": 0.0225, "step": 28721 }, { "epoch": 3.4059053717538244, "grad_norm": 0.6012224087747974, "learning_rate": 2.8370569364401687e-06, "loss": 0.0288, "step": 28722 }, { "epoch": 3.406023953515949, "grad_norm": 0.4410836305893754, "learning_rate": 2.8359464043740735e-06, "loss": 0.0205, "step": 28723 }, { "epoch": 3.4061425352780743, "grad_norm": 0.5974094891854677, "learning_rate": 2.8348360766333654e-06, "loss": 0.0316, "step": 28724 }, { "epoch": 3.406261117040199, "grad_norm": 0.6825578503203708, "learning_rate": 2.833725953228289e-06, "loss": 0.0261, "step": 28725 }, { "epoch": 3.4063796988023243, "grad_norm": 0.5538535382753909, "learning_rate": 2.8326160341690777e-06, "loss": 0.0317, "step": 28726 }, { "epoch": 3.406498280564449, "grad_norm": 0.4563194512946678, "learning_rate": 2.831506319465968e-06, "loss": 0.0217, "step": 28727 }, { "epoch": 3.4066168623265742, "grad_norm": 0.3267326012780439, "learning_rate": 2.8303968091291766e-06, "loss": 0.0164, "step": 28728 }, { "epoch": 3.406735444088699, "grad_norm": 0.5110498996741871, "learning_rate": 2.829287503168948e-06, "loss": 0.0316, "step": 28729 }, { "epoch": 3.406854025850824, "grad_norm": 0.6683379512445315, "learning_rate": 2.8281784015954965e-06, "loss": 0.0413, "step": 28730 }, { "epoch": 3.406972607612949, "grad_norm": 0.33033705298567917, "learning_rate": 2.8270695044190525e-06, "loss": 0.0165, "step": 28731 }, { "epoch": 3.407091189375074, "grad_norm": 0.5347918989066297, "learning_rate": 2.825960811649836e-06, "loss": 0.0334, "step": 28732 }, { "epoch": 3.407209771137199, "grad_norm": 0.5661835465693941, "learning_rate": 2.824852323298069e-06, "loss": 0.0297, "step": 28733 }, { "epoch": 3.407328352899324, "grad_norm": 1.5902489594171931, "learning_rate": 2.823744039373977e-06, "loss": 0.0532, "step": 28734 }, { "epoch": 3.407446934661449, "grad_norm": 0.4962946204951034, "learning_rate": 2.822635959887765e-06, "loss": 0.0283, "step": 28735 }, { "epoch": 3.407565516423574, "grad_norm": 0.4808005085953573, "learning_rate": 2.8215280848496535e-06, "loss": 0.015, "step": 28736 }, { "epoch": 3.4076840981856993, "grad_norm": 0.5144916528636831, "learning_rate": 2.8204204142698586e-06, "loss": 0.0273, "step": 28737 }, { "epoch": 3.407802679947824, "grad_norm": 0.5716711446858209, "learning_rate": 2.819312948158587e-06, "loss": 0.0302, "step": 28738 }, { "epoch": 3.407921261709949, "grad_norm": 0.38001127083280534, "learning_rate": 2.818205686526054e-06, "loss": 0.0216, "step": 28739 }, { "epoch": 3.408039843472074, "grad_norm": 0.43633095936637223, "learning_rate": 2.8170986293824533e-06, "loss": 0.0231, "step": 28740 }, { "epoch": 3.408158425234199, "grad_norm": 0.49800365188904394, "learning_rate": 2.815991776738014e-06, "loss": 0.0245, "step": 28741 }, { "epoch": 3.408277006996324, "grad_norm": 0.5387362156733787, "learning_rate": 2.814885128602918e-06, "loss": 0.029, "step": 28742 }, { "epoch": 3.4083955887584487, "grad_norm": 0.8424977291524355, "learning_rate": 2.813778684987378e-06, "loss": 0.0473, "step": 28743 }, { "epoch": 3.408514170520574, "grad_norm": 0.47011805050629885, "learning_rate": 2.812672445901593e-06, "loss": 0.0267, "step": 28744 }, { "epoch": 3.408632752282699, "grad_norm": 0.3504501929140975, "learning_rate": 2.811566411355762e-06, "loss": 0.0192, "step": 28745 }, { "epoch": 3.408751334044824, "grad_norm": 0.559375505669071, "learning_rate": 2.8104605813600776e-06, "loss": 0.027, "step": 28746 }, { "epoch": 3.4088699158069486, "grad_norm": 0.415207817184694, "learning_rate": 2.809354955924734e-06, "loss": 0.0182, "step": 28747 }, { "epoch": 3.408988497569074, "grad_norm": 0.6249242115076572, "learning_rate": 2.8082495350599258e-06, "loss": 0.0356, "step": 28748 }, { "epoch": 3.409107079331199, "grad_norm": 0.5403208284915397, "learning_rate": 2.807144318775845e-06, "loss": 0.0244, "step": 28749 }, { "epoch": 3.409225661093324, "grad_norm": 0.2619900264036093, "learning_rate": 2.8060393070826834e-06, "loss": 0.0125, "step": 28750 }, { "epoch": 3.409344242855449, "grad_norm": 0.49729693096449623, "learning_rate": 2.8049344999906134e-06, "loss": 0.025, "step": 28751 }, { "epoch": 3.4094628246175738, "grad_norm": 0.45346782010289904, "learning_rate": 2.803829897509838e-06, "loss": 0.0282, "step": 28752 }, { "epoch": 3.409581406379699, "grad_norm": 0.38342131177603156, "learning_rate": 2.80272549965053e-06, "loss": 0.0213, "step": 28753 }, { "epoch": 3.4096999881418237, "grad_norm": 0.36535184539612237, "learning_rate": 2.8016213064228774e-06, "loss": 0.0153, "step": 28754 }, { "epoch": 3.409818569903949, "grad_norm": 0.5028196633042868, "learning_rate": 2.8005173178370423e-06, "loss": 0.0226, "step": 28755 }, { "epoch": 3.4099371516660737, "grad_norm": 0.5604412292891575, "learning_rate": 2.7994135339032278e-06, "loss": 0.0369, "step": 28756 }, { "epoch": 3.410055733428199, "grad_norm": 0.4373224610143818, "learning_rate": 2.7983099546315895e-06, "loss": 0.0189, "step": 28757 }, { "epoch": 3.4101743151903237, "grad_norm": 0.7633271914761219, "learning_rate": 2.7972065800323078e-06, "loss": 0.0369, "step": 28758 }, { "epoch": 3.410292896952449, "grad_norm": 0.46645790246154306, "learning_rate": 2.7961034101155553e-06, "loss": 0.0281, "step": 28759 }, { "epoch": 3.4104114787145736, "grad_norm": 0.46017041718832447, "learning_rate": 2.795000444891502e-06, "loss": 0.0215, "step": 28760 }, { "epoch": 3.410530060476699, "grad_norm": 0.370389651058783, "learning_rate": 2.7938976843703195e-06, "loss": 0.0156, "step": 28761 }, { "epoch": 3.4106486422388236, "grad_norm": 0.48596459163072586, "learning_rate": 2.792795128562159e-06, "loss": 0.0242, "step": 28762 }, { "epoch": 3.410767224000949, "grad_norm": 0.5170399844450867, "learning_rate": 2.7916927774772034e-06, "loss": 0.0264, "step": 28763 }, { "epoch": 3.4108858057630735, "grad_norm": 0.438527228637401, "learning_rate": 2.790590631125603e-06, "loss": 0.0131, "step": 28764 }, { "epoch": 3.4110043875251987, "grad_norm": 0.7072710669334924, "learning_rate": 2.7894886895175275e-06, "loss": 0.0281, "step": 28765 }, { "epoch": 3.4111229692873235, "grad_norm": 0.6362325291244383, "learning_rate": 2.7883869526631217e-06, "loss": 0.033, "step": 28766 }, { "epoch": 3.4112415510494487, "grad_norm": 0.6868435687160185, "learning_rate": 2.787285420572558e-06, "loss": 0.0292, "step": 28767 }, { "epoch": 3.4113601328115735, "grad_norm": 0.6251614381922825, "learning_rate": 2.786184093255981e-06, "loss": 0.0214, "step": 28768 }, { "epoch": 3.4114787145736987, "grad_norm": 0.303215345526979, "learning_rate": 2.7850829707235432e-06, "loss": 0.0148, "step": 28769 }, { "epoch": 3.4115972963358234, "grad_norm": 0.33098764797048735, "learning_rate": 2.783982052985401e-06, "loss": 0.0143, "step": 28770 }, { "epoch": 3.4117158780979486, "grad_norm": 0.3887891229096125, "learning_rate": 2.782881340051699e-06, "loss": 0.0185, "step": 28771 }, { "epoch": 3.4118344598600734, "grad_norm": 0.44030216548617135, "learning_rate": 2.7817808319325954e-06, "loss": 0.0178, "step": 28772 }, { "epoch": 3.4119530416221986, "grad_norm": 0.6223420398593249, "learning_rate": 2.780680528638213e-06, "loss": 0.03, "step": 28773 }, { "epoch": 3.4120716233843233, "grad_norm": 0.48617301528770646, "learning_rate": 2.7795804301787214e-06, "loss": 0.0285, "step": 28774 }, { "epoch": 3.4121902051464486, "grad_norm": 0.6103593823967456, "learning_rate": 2.778480536564243e-06, "loss": 0.0387, "step": 28775 }, { "epoch": 3.4123087869085733, "grad_norm": 0.40114054083933803, "learning_rate": 2.7773808478049307e-06, "loss": 0.0222, "step": 28776 }, { "epoch": 3.4124273686706985, "grad_norm": 0.4168675835210919, "learning_rate": 2.776281363910904e-06, "loss": 0.0232, "step": 28777 }, { "epoch": 3.4125459504328233, "grad_norm": 0.7261389552771454, "learning_rate": 2.775182084892322e-06, "loss": 0.0304, "step": 28778 }, { "epoch": 3.4126645321949485, "grad_norm": 0.3847977613684596, "learning_rate": 2.7740830107593013e-06, "loss": 0.0128, "step": 28779 }, { "epoch": 3.4127831139570732, "grad_norm": 0.6057826202439162, "learning_rate": 2.772984141521981e-06, "loss": 0.0277, "step": 28780 }, { "epoch": 3.4129016957191984, "grad_norm": 0.3210043831802938, "learning_rate": 2.7718854771904916e-06, "loss": 0.0188, "step": 28781 }, { "epoch": 3.413020277481323, "grad_norm": 0.45900613394174505, "learning_rate": 2.770787017774959e-06, "loss": 0.0198, "step": 28782 }, { "epoch": 3.4131388592434484, "grad_norm": 0.6276219367503522, "learning_rate": 2.7696887632855136e-06, "loss": 0.024, "step": 28783 }, { "epoch": 3.413257441005573, "grad_norm": 0.3203936152781949, "learning_rate": 2.768590713732269e-06, "loss": 0.0183, "step": 28784 }, { "epoch": 3.4133760227676984, "grad_norm": 0.41001388956549906, "learning_rate": 2.7674928691253656e-06, "loss": 0.0259, "step": 28785 }, { "epoch": 3.4134946045298236, "grad_norm": 0.31664711816023183, "learning_rate": 2.766395229474911e-06, "loss": 0.0146, "step": 28786 }, { "epoch": 3.4136131862919483, "grad_norm": 0.4059405856075139, "learning_rate": 2.7652977947910274e-06, "loss": 0.0165, "step": 28787 }, { "epoch": 3.413731768054073, "grad_norm": 0.5782918908508955, "learning_rate": 2.7642005650838325e-06, "loss": 0.0199, "step": 28788 }, { "epoch": 3.4138503498161983, "grad_norm": 0.4867881540640706, "learning_rate": 2.7631035403634402e-06, "loss": 0.0171, "step": 28789 }, { "epoch": 3.4139689315783235, "grad_norm": 0.5586946682758357, "learning_rate": 2.7620067206399675e-06, "loss": 0.0198, "step": 28790 }, { "epoch": 3.4140875133404482, "grad_norm": 0.5569610844118944, "learning_rate": 2.76091010592352e-06, "loss": 0.0235, "step": 28791 }, { "epoch": 3.414206095102573, "grad_norm": 0.5031771499086548, "learning_rate": 2.7598136962242117e-06, "loss": 0.0305, "step": 28792 }, { "epoch": 3.414324676864698, "grad_norm": 0.642888908574521, "learning_rate": 2.7587174915521456e-06, "loss": 0.0422, "step": 28793 }, { "epoch": 3.4144432586268234, "grad_norm": 1.3891231444338141, "learning_rate": 2.7576214919174364e-06, "loss": 0.0731, "step": 28794 }, { "epoch": 3.414561840388948, "grad_norm": 0.4735963545492486, "learning_rate": 2.7565256973301757e-06, "loss": 0.0122, "step": 28795 }, { "epoch": 3.414680422151073, "grad_norm": 0.47936457788849773, "learning_rate": 2.755430107800472e-06, "loss": 0.0197, "step": 28796 }, { "epoch": 3.414799003913198, "grad_norm": 0.4019874670183177, "learning_rate": 2.7543347233384256e-06, "loss": 0.022, "step": 28797 }, { "epoch": 3.4149175856753233, "grad_norm": 0.48441704674533564, "learning_rate": 2.7532395439541307e-06, "loss": 0.0233, "step": 28798 }, { "epoch": 3.415036167437448, "grad_norm": 0.9570420677325593, "learning_rate": 2.7521445696576886e-06, "loss": 0.0346, "step": 28799 }, { "epoch": 3.4151547491995733, "grad_norm": 0.5721184465660097, "learning_rate": 2.7510498004591933e-06, "loss": 0.0307, "step": 28800 }, { "epoch": 3.415273330961698, "grad_norm": 0.6766586062906783, "learning_rate": 2.749955236368737e-06, "loss": 0.0374, "step": 28801 }, { "epoch": 3.4153919127238233, "grad_norm": 0.48773173818155907, "learning_rate": 2.748860877396406e-06, "loss": 0.0317, "step": 28802 }, { "epoch": 3.415510494485948, "grad_norm": 0.5844566180454868, "learning_rate": 2.747766723552289e-06, "loss": 0.0254, "step": 28803 }, { "epoch": 3.415629076248073, "grad_norm": 0.6527410842950828, "learning_rate": 2.746672774846476e-06, "loss": 0.031, "step": 28804 }, { "epoch": 3.415747658010198, "grad_norm": 0.35177256469215223, "learning_rate": 2.7455790312890583e-06, "loss": 0.0149, "step": 28805 }, { "epoch": 3.415866239772323, "grad_norm": 0.4173885102443358, "learning_rate": 2.744485492890106e-06, "loss": 0.0161, "step": 28806 }, { "epoch": 3.415984821534448, "grad_norm": 0.6632841294726318, "learning_rate": 2.743392159659705e-06, "loss": 0.039, "step": 28807 }, { "epoch": 3.416103403296573, "grad_norm": 0.4922858942413555, "learning_rate": 2.7422990316079337e-06, "loss": 0.0333, "step": 28808 }, { "epoch": 3.416221985058698, "grad_norm": 0.5973677978098286, "learning_rate": 2.741206108744876e-06, "loss": 0.0202, "step": 28809 }, { "epoch": 3.416340566820823, "grad_norm": 0.5796100248628445, "learning_rate": 2.740113391080604e-06, "loss": 0.0274, "step": 28810 }, { "epoch": 3.416459148582948, "grad_norm": 0.40189536892413713, "learning_rate": 2.7390208786251786e-06, "loss": 0.0227, "step": 28811 }, { "epoch": 3.416577730345073, "grad_norm": 0.4130522267539853, "learning_rate": 2.7379285713886954e-06, "loss": 0.0269, "step": 28812 }, { "epoch": 3.416696312107198, "grad_norm": 0.6587647972687322, "learning_rate": 2.7368364693812045e-06, "loss": 0.0343, "step": 28813 }, { "epoch": 3.416814893869323, "grad_norm": 0.7035266555521539, "learning_rate": 2.7357445726127807e-06, "loss": 0.0214, "step": 28814 }, { "epoch": 3.416933475631448, "grad_norm": 0.5738068767086077, "learning_rate": 2.734652881093491e-06, "loss": 0.0332, "step": 28815 }, { "epoch": 3.417052057393573, "grad_norm": 0.6777586906310089, "learning_rate": 2.7335613948334003e-06, "loss": 0.0345, "step": 28816 }, { "epoch": 3.4171706391556977, "grad_norm": 0.475651149187935, "learning_rate": 2.732470113842567e-06, "loss": 0.0226, "step": 28817 }, { "epoch": 3.417289220917823, "grad_norm": 0.955411133487436, "learning_rate": 2.731379038131052e-06, "loss": 0.0327, "step": 28818 }, { "epoch": 3.4174078026799477, "grad_norm": 0.8031860796836426, "learning_rate": 2.7302881677089135e-06, "loss": 0.0214, "step": 28819 }, { "epoch": 3.417526384442073, "grad_norm": 0.6387222375595267, "learning_rate": 2.7291975025862087e-06, "loss": 0.0345, "step": 28820 }, { "epoch": 3.4176449662041977, "grad_norm": 0.6853281308370222, "learning_rate": 2.728107042772998e-06, "loss": 0.0336, "step": 28821 }, { "epoch": 3.417763547966323, "grad_norm": 0.4956106200020485, "learning_rate": 2.727016788279321e-06, "loss": 0.0325, "step": 28822 }, { "epoch": 3.4178821297284476, "grad_norm": 0.5748393680819514, "learning_rate": 2.725926739115245e-06, "loss": 0.0285, "step": 28823 }, { "epoch": 3.418000711490573, "grad_norm": 0.7320934680525206, "learning_rate": 2.7248368952908053e-06, "loss": 0.0315, "step": 28824 }, { "epoch": 3.4181192932526976, "grad_norm": 0.6924784333375711, "learning_rate": 2.723747256816059e-06, "loss": 0.0286, "step": 28825 }, { "epoch": 3.418237875014823, "grad_norm": 0.6334722623028909, "learning_rate": 2.722657823701036e-06, "loss": 0.0243, "step": 28826 }, { "epoch": 3.4183564567769475, "grad_norm": 0.2995274659004343, "learning_rate": 2.721568595955798e-06, "loss": 0.0185, "step": 28827 }, { "epoch": 3.4184750385390728, "grad_norm": 0.26480377347418066, "learning_rate": 2.720479573590373e-06, "loss": 0.0137, "step": 28828 }, { "epoch": 3.4185936203011975, "grad_norm": 0.388129896090892, "learning_rate": 2.7193907566148063e-06, "loss": 0.0185, "step": 28829 }, { "epoch": 3.4187122020633227, "grad_norm": 0.6594484573794905, "learning_rate": 2.7183021450391367e-06, "loss": 0.0283, "step": 28830 }, { "epoch": 3.4188307838254475, "grad_norm": 0.3342673508643401, "learning_rate": 2.717213738873395e-06, "loss": 0.02, "step": 28831 }, { "epoch": 3.4189493655875727, "grad_norm": 0.6228229894317422, "learning_rate": 2.716125538127623e-06, "loss": 0.035, "step": 28832 }, { "epoch": 3.4190679473496974, "grad_norm": 0.3331075369698268, "learning_rate": 2.715037542811838e-06, "loss": 0.0205, "step": 28833 }, { "epoch": 3.4191865291118226, "grad_norm": 0.5810199597440494, "learning_rate": 2.7139497529360875e-06, "loss": 0.0243, "step": 28834 }, { "epoch": 3.4193051108739474, "grad_norm": 0.4490727551782478, "learning_rate": 2.7128621685103885e-06, "loss": 0.0239, "step": 28835 }, { "epoch": 3.4194236926360726, "grad_norm": 0.8506575087163718, "learning_rate": 2.7117747895447743e-06, "loss": 0.0408, "step": 28836 }, { "epoch": 3.4195422743981974, "grad_norm": 0.4392783081055932, "learning_rate": 2.710687616049254e-06, "loss": 0.0262, "step": 28837 }, { "epoch": 3.4196608561603226, "grad_norm": 0.43320267239712923, "learning_rate": 2.709600648033872e-06, "loss": 0.0188, "step": 28838 }, { "epoch": 3.4197794379224478, "grad_norm": 0.4726697389903445, "learning_rate": 2.7085138855086317e-06, "loss": 0.0234, "step": 28839 }, { "epoch": 3.4198980196845725, "grad_norm": 0.44346447643242715, "learning_rate": 2.707427328483558e-06, "loss": 0.0244, "step": 28840 }, { "epoch": 3.4200166014466973, "grad_norm": 0.7564008539168818, "learning_rate": 2.706340976968669e-06, "loss": 0.0333, "step": 28841 }, { "epoch": 3.4201351832088225, "grad_norm": 0.7314420056726109, "learning_rate": 2.7052548309739774e-06, "loss": 0.0371, "step": 28842 }, { "epoch": 3.4202537649709477, "grad_norm": 0.4941244638336679, "learning_rate": 2.704168890509501e-06, "loss": 0.0288, "step": 28843 }, { "epoch": 3.4203723467330724, "grad_norm": 0.7031509376201447, "learning_rate": 2.703083155585237e-06, "loss": 0.0252, "step": 28844 }, { "epoch": 3.420490928495197, "grad_norm": 0.6535322389990473, "learning_rate": 2.7019976262112113e-06, "loss": 0.0319, "step": 28845 }, { "epoch": 3.4206095102573224, "grad_norm": 0.7419198214202127, "learning_rate": 2.700912302397421e-06, "loss": 0.0298, "step": 28846 }, { "epoch": 3.4207280920194476, "grad_norm": 0.5418175536770633, "learning_rate": 2.699827184153875e-06, "loss": 0.0329, "step": 28847 }, { "epoch": 3.4208466737815724, "grad_norm": 0.4513432167652821, "learning_rate": 2.698742271490576e-06, "loss": 0.0243, "step": 28848 }, { "epoch": 3.4209652555436976, "grad_norm": 0.4189299049178581, "learning_rate": 2.6976575644175244e-06, "loss": 0.017, "step": 28849 }, { "epoch": 3.4210838373058223, "grad_norm": 0.4908025394991333, "learning_rate": 2.696573062944727e-06, "loss": 0.0234, "step": 28850 }, { "epoch": 3.4212024190679475, "grad_norm": 0.37056933843292483, "learning_rate": 2.6954887670821714e-06, "loss": 0.0194, "step": 28851 }, { "epoch": 3.4213210008300723, "grad_norm": 0.5509020490206578, "learning_rate": 2.694404676839857e-06, "loss": 0.0249, "step": 28852 }, { "epoch": 3.4214395825921975, "grad_norm": 0.8248525963852674, "learning_rate": 2.693320792227777e-06, "loss": 0.0371, "step": 28853 }, { "epoch": 3.4215581643543223, "grad_norm": 0.47423697815192395, "learning_rate": 2.6922371132559328e-06, "loss": 0.0274, "step": 28854 }, { "epoch": 3.4216767461164475, "grad_norm": 0.5003525357083477, "learning_rate": 2.691153639934299e-06, "loss": 0.0226, "step": 28855 }, { "epoch": 3.421795327878572, "grad_norm": 0.7930953747637942, "learning_rate": 2.6900703722728738e-06, "loss": 0.0436, "step": 28856 }, { "epoch": 3.4219139096406974, "grad_norm": 0.8214240661945638, "learning_rate": 2.6889873102816405e-06, "loss": 0.0458, "step": 28857 }, { "epoch": 3.422032491402822, "grad_norm": 0.9454561433902492, "learning_rate": 2.6879044539705855e-06, "loss": 0.0414, "step": 28858 }, { "epoch": 3.4221510731649474, "grad_norm": 0.6580089227418033, "learning_rate": 2.68682180334969e-06, "loss": 0.0298, "step": 28859 }, { "epoch": 3.422269654927072, "grad_norm": 0.48506787182481154, "learning_rate": 2.6857393584289347e-06, "loss": 0.023, "step": 28860 }, { "epoch": 3.4223882366891973, "grad_norm": 0.5142529954386083, "learning_rate": 2.684657119218306e-06, "loss": 0.021, "step": 28861 }, { "epoch": 3.422506818451322, "grad_norm": 0.4062819578139395, "learning_rate": 2.683575085727766e-06, "loss": 0.0232, "step": 28862 }, { "epoch": 3.4226254002134473, "grad_norm": 0.9179842371274384, "learning_rate": 2.6824932579673e-06, "loss": 0.0581, "step": 28863 }, { "epoch": 3.422743981975572, "grad_norm": 0.41338171245838584, "learning_rate": 2.681411635946876e-06, "loss": 0.0224, "step": 28864 }, { "epoch": 3.4228625637376973, "grad_norm": 0.597720482757603, "learning_rate": 2.6803302196764744e-06, "loss": 0.0234, "step": 28865 }, { "epoch": 3.422981145499822, "grad_norm": 0.7673419683828584, "learning_rate": 2.6792490091660544e-06, "loss": 0.0382, "step": 28866 }, { "epoch": 3.4230997272619472, "grad_norm": 0.44033864973767983, "learning_rate": 2.6781680044255854e-06, "loss": 0.0156, "step": 28867 }, { "epoch": 3.423218309024072, "grad_norm": 0.34685110524468127, "learning_rate": 2.677087205465034e-06, "loss": 0.0139, "step": 28868 }, { "epoch": 3.423336890786197, "grad_norm": 0.5520318585130529, "learning_rate": 2.6760066122943657e-06, "loss": 0.025, "step": 28869 }, { "epoch": 3.423455472548322, "grad_norm": 0.7190642751507834, "learning_rate": 2.6749262249235406e-06, "loss": 0.0277, "step": 28870 }, { "epoch": 3.423574054310447, "grad_norm": 0.4496697873807696, "learning_rate": 2.6738460433625186e-06, "loss": 0.0225, "step": 28871 }, { "epoch": 3.423692636072572, "grad_norm": 0.29199031169433565, "learning_rate": 2.672766067621263e-06, "loss": 0.0134, "step": 28872 }, { "epoch": 3.423811217834697, "grad_norm": 0.6858097021769025, "learning_rate": 2.671686297709719e-06, "loss": 0.0464, "step": 28873 }, { "epoch": 3.423929799596822, "grad_norm": 0.4347315545448251, "learning_rate": 2.670606733637848e-06, "loss": 0.0277, "step": 28874 }, { "epoch": 3.424048381358947, "grad_norm": 0.40337716458964296, "learning_rate": 2.669527375415601e-06, "loss": 0.0229, "step": 28875 }, { "epoch": 3.424166963121072, "grad_norm": 0.37926823007009436, "learning_rate": 2.668448223052933e-06, "loss": 0.0141, "step": 28876 }, { "epoch": 3.424285544883197, "grad_norm": 0.5080023095660138, "learning_rate": 2.667369276559781e-06, "loss": 0.0183, "step": 28877 }, { "epoch": 3.424404126645322, "grad_norm": 0.742442396846309, "learning_rate": 2.6662905359460975e-06, "loss": 0.035, "step": 28878 }, { "epoch": 3.424522708407447, "grad_norm": 0.5925498525310628, "learning_rate": 2.6652120012218313e-06, "loss": 0.0313, "step": 28879 }, { "epoch": 3.4246412901695718, "grad_norm": 0.5615337595323237, "learning_rate": 2.6641336723969207e-06, "loss": 0.0343, "step": 28880 }, { "epoch": 3.424759871931697, "grad_norm": 0.5102640743614011, "learning_rate": 2.6630555494813108e-06, "loss": 0.0199, "step": 28881 }, { "epoch": 3.4248784536938217, "grad_norm": 0.7299367425284072, "learning_rate": 2.661977632484927e-06, "loss": 0.0379, "step": 28882 }, { "epoch": 3.424997035455947, "grad_norm": 0.3311944831043888, "learning_rate": 2.660899921417728e-06, "loss": 0.0154, "step": 28883 }, { "epoch": 3.4251156172180717, "grad_norm": 0.5171135340395947, "learning_rate": 2.659822416289634e-06, "loss": 0.0215, "step": 28884 }, { "epoch": 3.425234198980197, "grad_norm": 0.4728392674639292, "learning_rate": 2.658745117110581e-06, "loss": 0.0211, "step": 28885 }, { "epoch": 3.4253527807423216, "grad_norm": 0.4955424733786062, "learning_rate": 2.6576680238905e-06, "loss": 0.0234, "step": 28886 }, { "epoch": 3.425471362504447, "grad_norm": 0.4413447835970298, "learning_rate": 2.6565911366393304e-06, "loss": 0.0193, "step": 28887 }, { "epoch": 3.425589944266572, "grad_norm": 0.4337814196334114, "learning_rate": 2.6555144553669836e-06, "loss": 0.0155, "step": 28888 }, { "epoch": 3.425708526028697, "grad_norm": 0.6703212141361025, "learning_rate": 2.654437980083391e-06, "loss": 0.0226, "step": 28889 }, { "epoch": 3.4258271077908216, "grad_norm": 0.5787462506618869, "learning_rate": 2.653361710798483e-06, "loss": 0.0358, "step": 28890 }, { "epoch": 3.4259456895529468, "grad_norm": 1.025571214280264, "learning_rate": 2.6522856475221735e-06, "loss": 0.0589, "step": 28891 }, { "epoch": 3.426064271315072, "grad_norm": 0.4804945714205404, "learning_rate": 2.651209790264392e-06, "loss": 0.0301, "step": 28892 }, { "epoch": 3.4261828530771967, "grad_norm": 0.4250932148559547, "learning_rate": 2.6501341390350376e-06, "loss": 0.0197, "step": 28893 }, { "epoch": 3.4263014348393215, "grad_norm": 0.7705823944007307, "learning_rate": 2.6490586938440532e-06, "loss": 0.0334, "step": 28894 }, { "epoch": 3.4264200166014467, "grad_norm": 0.5325498944444598, "learning_rate": 2.6479834547013304e-06, "loss": 0.025, "step": 28895 }, { "epoch": 3.426538598363572, "grad_norm": 0.7604292267691592, "learning_rate": 2.646908421616792e-06, "loss": 0.0313, "step": 28896 }, { "epoch": 3.4266571801256966, "grad_norm": 0.4250494865032407, "learning_rate": 2.645833594600347e-06, "loss": 0.0213, "step": 28897 }, { "epoch": 3.426775761887822, "grad_norm": 0.29908906638436566, "learning_rate": 2.644758973661904e-06, "loss": 0.0102, "step": 28898 }, { "epoch": 3.4268943436499466, "grad_norm": 0.5135969459152446, "learning_rate": 2.643684558811374e-06, "loss": 0.0259, "step": 28899 }, { "epoch": 3.427012925412072, "grad_norm": 0.5625710724607172, "learning_rate": 2.64261035005865e-06, "loss": 0.0265, "step": 28900 }, { "epoch": 3.4271315071741966, "grad_norm": 0.687073786725866, "learning_rate": 2.641536347413648e-06, "loss": 0.0249, "step": 28901 }, { "epoch": 3.4272500889363218, "grad_norm": 0.3898847547194416, "learning_rate": 2.6404625508862606e-06, "loss": 0.0185, "step": 28902 }, { "epoch": 3.4273686706984465, "grad_norm": 0.6241801703404349, "learning_rate": 2.6393889604863916e-06, "loss": 0.0266, "step": 28903 }, { "epoch": 3.4274872524605717, "grad_norm": 0.4009879580917514, "learning_rate": 2.6383155762239292e-06, "loss": 0.0152, "step": 28904 }, { "epoch": 3.4276058342226965, "grad_norm": 0.7450831694646657, "learning_rate": 2.637242398108783e-06, "loss": 0.0351, "step": 28905 }, { "epoch": 3.4277244159848217, "grad_norm": 0.49592318093198023, "learning_rate": 2.6361694261508364e-06, "loss": 0.026, "step": 28906 }, { "epoch": 3.4278429977469465, "grad_norm": 0.3851189558440772, "learning_rate": 2.6350966603599815e-06, "loss": 0.0176, "step": 28907 }, { "epoch": 3.4279615795090717, "grad_norm": 0.6327495597531456, "learning_rate": 2.63402410074611e-06, "loss": 0.0483, "step": 28908 }, { "epoch": 3.4280801612711964, "grad_norm": 0.7880176039766688, "learning_rate": 2.6329517473191095e-06, "loss": 0.023, "step": 28909 }, { "epoch": 3.4281987430333216, "grad_norm": 0.3340950192267071, "learning_rate": 2.6318796000888714e-06, "loss": 0.0157, "step": 28910 }, { "epoch": 3.4283173247954464, "grad_norm": 0.5562835093461165, "learning_rate": 2.630807659065268e-06, "loss": 0.0265, "step": 28911 }, { "epoch": 3.4284359065575716, "grad_norm": 0.5930773154972588, "learning_rate": 2.6297359242581865e-06, "loss": 0.0428, "step": 28912 }, { "epoch": 3.4285544883196963, "grad_norm": 0.5845144673885732, "learning_rate": 2.6286643956775074e-06, "loss": 0.0286, "step": 28913 }, { "epoch": 3.4286730700818215, "grad_norm": 0.5514470789576525, "learning_rate": 2.627593073333115e-06, "loss": 0.0242, "step": 28914 }, { "epoch": 3.4287916518439463, "grad_norm": 0.7823458576941363, "learning_rate": 2.6265219572348705e-06, "loss": 0.036, "step": 28915 }, { "epoch": 3.4289102336060715, "grad_norm": 0.5927304254934044, "learning_rate": 2.625451047392666e-06, "loss": 0.0334, "step": 28916 }, { "epoch": 3.4290288153681963, "grad_norm": 0.39314086491178474, "learning_rate": 2.62438034381636e-06, "loss": 0.0185, "step": 28917 }, { "epoch": 3.4291473971303215, "grad_norm": 0.6107250780293171, "learning_rate": 2.6233098465158285e-06, "loss": 0.0356, "step": 28918 }, { "epoch": 3.429265978892446, "grad_norm": 0.4038961031416665, "learning_rate": 2.622239555500941e-06, "loss": 0.0271, "step": 28919 }, { "epoch": 3.4293845606545714, "grad_norm": 0.7267137208854517, "learning_rate": 2.621169470781565e-06, "loss": 0.0355, "step": 28920 }, { "epoch": 3.429503142416696, "grad_norm": 0.5155891985729034, "learning_rate": 2.6200995923675644e-06, "loss": 0.0281, "step": 28921 }, { "epoch": 3.4296217241788214, "grad_norm": 0.6827069050821734, "learning_rate": 2.6190299202688008e-06, "loss": 0.025, "step": 28922 }, { "epoch": 3.429740305940946, "grad_norm": 0.3883765910448688, "learning_rate": 2.617960454495133e-06, "loss": 0.0183, "step": 28923 }, { "epoch": 3.4298588877030713, "grad_norm": 0.5266412958731268, "learning_rate": 2.6168911950564258e-06, "loss": 0.029, "step": 28924 }, { "epoch": 3.429977469465196, "grad_norm": 0.3948659125168395, "learning_rate": 2.6158221419625345e-06, "loss": 0.0165, "step": 28925 }, { "epoch": 3.4300960512273213, "grad_norm": 0.5607422498234144, "learning_rate": 2.6147532952233126e-06, "loss": 0.0225, "step": 28926 }, { "epoch": 3.430214632989446, "grad_norm": 0.37558934232491353, "learning_rate": 2.6136846548486136e-06, "loss": 0.0147, "step": 28927 }, { "epoch": 3.4303332147515713, "grad_norm": 0.7372984896008119, "learning_rate": 2.6126162208482905e-06, "loss": 0.0377, "step": 28928 }, { "epoch": 3.430451796513696, "grad_norm": 0.5178211231700118, "learning_rate": 2.611547993232191e-06, "loss": 0.0198, "step": 28929 }, { "epoch": 3.4305703782758212, "grad_norm": 0.7831609126111655, "learning_rate": 2.6104799720101657e-06, "loss": 0.0381, "step": 28930 }, { "epoch": 3.430688960037946, "grad_norm": 0.6962743700884316, "learning_rate": 2.6094121571920595e-06, "loss": 0.0409, "step": 28931 }, { "epoch": 3.430807541800071, "grad_norm": 0.3247761772565658, "learning_rate": 2.6083445487877223e-06, "loss": 0.0132, "step": 28932 }, { "epoch": 3.430926123562196, "grad_norm": 0.46970845515191734, "learning_rate": 2.607277146806983e-06, "loss": 0.0242, "step": 28933 }, { "epoch": 3.431044705324321, "grad_norm": 0.32169182314522277, "learning_rate": 2.6062099512596888e-06, "loss": 0.016, "step": 28934 }, { "epoch": 3.431163287086446, "grad_norm": 0.9036392163153298, "learning_rate": 2.6051429621556767e-06, "loss": 0.0569, "step": 28935 }, { "epoch": 3.431281868848571, "grad_norm": 0.4497263448425969, "learning_rate": 2.6040761795047913e-06, "loss": 0.0225, "step": 28936 }, { "epoch": 3.4314004506106963, "grad_norm": 0.47822586462331157, "learning_rate": 2.6030096033168557e-06, "loss": 0.0205, "step": 28937 }, { "epoch": 3.431519032372821, "grad_norm": 0.432022138190527, "learning_rate": 2.601943233601703e-06, "loss": 0.0174, "step": 28938 }, { "epoch": 3.431637614134946, "grad_norm": 0.41307157745751927, "learning_rate": 2.6008770703691704e-06, "loss": 0.0184, "step": 28939 }, { "epoch": 3.431756195897071, "grad_norm": 0.913226843079112, "learning_rate": 2.5998111136290804e-06, "loss": 0.0337, "step": 28940 }, { "epoch": 3.4318747776591962, "grad_norm": 0.423660507800162, "learning_rate": 2.598745363391272e-06, "loss": 0.0276, "step": 28941 }, { "epoch": 3.431993359421321, "grad_norm": 0.4220618749950033, "learning_rate": 2.5976798196655492e-06, "loss": 0.0205, "step": 28942 }, { "epoch": 3.4321119411834458, "grad_norm": 0.4196669825802467, "learning_rate": 2.5966144824617567e-06, "loss": 0.0203, "step": 28943 }, { "epoch": 3.432230522945571, "grad_norm": 0.6491129103646393, "learning_rate": 2.5955493517897e-06, "loss": 0.0266, "step": 28944 }, { "epoch": 3.432349104707696, "grad_norm": 0.4014789294156019, "learning_rate": 2.594484427659205e-06, "loss": 0.0337, "step": 28945 }, { "epoch": 3.432467686469821, "grad_norm": 0.9548158241423143, "learning_rate": 2.593419710080089e-06, "loss": 0.0485, "step": 28946 }, { "epoch": 3.4325862682319457, "grad_norm": 0.5629155046198691, "learning_rate": 2.592355199062166e-06, "loss": 0.022, "step": 28947 }, { "epoch": 3.432704849994071, "grad_norm": 0.6146649886225877, "learning_rate": 2.5912908946152533e-06, "loss": 0.0413, "step": 28948 }, { "epoch": 3.432823431756196, "grad_norm": 0.3257017934828633, "learning_rate": 2.590226796749151e-06, "loss": 0.0159, "step": 28949 }, { "epoch": 3.432942013518321, "grad_norm": 0.640979815565223, "learning_rate": 2.5891629054736855e-06, "loss": 0.0217, "step": 28950 }, { "epoch": 3.433060595280446, "grad_norm": 0.2876084486498489, "learning_rate": 2.5880992207986538e-06, "loss": 0.0178, "step": 28951 }, { "epoch": 3.433179177042571, "grad_norm": 0.5187809642305578, "learning_rate": 2.5870357427338675e-06, "loss": 0.0193, "step": 28952 }, { "epoch": 3.433297758804696, "grad_norm": 0.3942401382648835, "learning_rate": 2.5859724712891166e-06, "loss": 0.0157, "step": 28953 }, { "epoch": 3.4334163405668208, "grad_norm": 0.2896247288969914, "learning_rate": 2.584909406474223e-06, "loss": 0.0161, "step": 28954 }, { "epoch": 3.433534922328946, "grad_norm": 0.8198249469216433, "learning_rate": 2.583846548298974e-06, "loss": 0.0378, "step": 28955 }, { "epoch": 3.4336535040910707, "grad_norm": 0.8810984749453231, "learning_rate": 2.5827838967731692e-06, "loss": 0.0496, "step": 28956 }, { "epoch": 3.433772085853196, "grad_norm": 0.47883524445638304, "learning_rate": 2.58172145190661e-06, "loss": 0.0324, "step": 28957 }, { "epoch": 3.4338906676153207, "grad_norm": 0.4944371178585555, "learning_rate": 2.580659213709086e-06, "loss": 0.025, "step": 28958 }, { "epoch": 3.434009249377446, "grad_norm": 0.5544512689634641, "learning_rate": 2.5795971821903965e-06, "loss": 0.0271, "step": 28959 }, { "epoch": 3.4341278311395707, "grad_norm": 0.829947137001259, "learning_rate": 2.578535357360318e-06, "loss": 0.0397, "step": 28960 }, { "epoch": 3.434246412901696, "grad_norm": 0.5632681184734567, "learning_rate": 2.5774737392286587e-06, "loss": 0.0189, "step": 28961 }, { "epoch": 3.4343649946638206, "grad_norm": 0.3221177357320511, "learning_rate": 2.5764123278051886e-06, "loss": 0.0213, "step": 28962 }, { "epoch": 3.434483576425946, "grad_norm": 0.4562813118958436, "learning_rate": 2.575351123099706e-06, "loss": 0.0274, "step": 28963 }, { "epoch": 3.4346021581880706, "grad_norm": 0.6726240799280562, "learning_rate": 2.5742901251219754e-06, "loss": 0.0298, "step": 28964 }, { "epoch": 3.434720739950196, "grad_norm": 0.49034198093782105, "learning_rate": 2.573229333881802e-06, "loss": 0.0214, "step": 28965 }, { "epoch": 3.4348393217123205, "grad_norm": 0.5797672338570193, "learning_rate": 2.572168749388945e-06, "loss": 0.0331, "step": 28966 }, { "epoch": 3.4349579034744457, "grad_norm": 0.622416674927675, "learning_rate": 2.5711083716531914e-06, "loss": 0.0262, "step": 28967 }, { "epoch": 3.4350764852365705, "grad_norm": 0.4267494687830739, "learning_rate": 2.5700482006843136e-06, "loss": 0.0207, "step": 28968 }, { "epoch": 3.4351950669986957, "grad_norm": 0.6534593215897241, "learning_rate": 2.568988236492087e-06, "loss": 0.0226, "step": 28969 }, { "epoch": 3.4353136487608205, "grad_norm": 0.42079344753664144, "learning_rate": 2.5679284790862844e-06, "loss": 0.0223, "step": 28970 }, { "epoch": 3.4354322305229457, "grad_norm": 0.5457963783426637, "learning_rate": 2.5668689284766677e-06, "loss": 0.025, "step": 28971 }, { "epoch": 3.4355508122850704, "grad_norm": 0.6297075658859654, "learning_rate": 2.5658095846730152e-06, "loss": 0.0278, "step": 28972 }, { "epoch": 3.4356693940471956, "grad_norm": 0.5717664802314275, "learning_rate": 2.5647504476850852e-06, "loss": 0.0318, "step": 28973 }, { "epoch": 3.4357879758093204, "grad_norm": 0.21352136780975575, "learning_rate": 2.5636915175226483e-06, "loss": 0.0089, "step": 28974 }, { "epoch": 3.4359065575714456, "grad_norm": 0.4441615052739708, "learning_rate": 2.562632794195455e-06, "loss": 0.0208, "step": 28975 }, { "epoch": 3.4360251393335703, "grad_norm": 0.45180190868681785, "learning_rate": 2.56157427771328e-06, "loss": 0.0258, "step": 28976 }, { "epoch": 3.4361437210956955, "grad_norm": 0.4566492504915087, "learning_rate": 2.560515968085872e-06, "loss": 0.0151, "step": 28977 }, { "epoch": 3.4362623028578203, "grad_norm": 0.41155877860903833, "learning_rate": 2.5594578653229865e-06, "loss": 0.0235, "step": 28978 }, { "epoch": 3.4363808846199455, "grad_norm": 0.36181227405187905, "learning_rate": 2.5583999694343828e-06, "loss": 0.0211, "step": 28979 }, { "epoch": 3.4364994663820703, "grad_norm": 0.4241225499151032, "learning_rate": 2.5573422804298113e-06, "loss": 0.0245, "step": 28980 }, { "epoch": 3.4366180481441955, "grad_norm": 0.6146244230330579, "learning_rate": 2.556284798319028e-06, "loss": 0.0198, "step": 28981 }, { "epoch": 3.4367366299063202, "grad_norm": 0.8238420779172254, "learning_rate": 2.5552275231117725e-06, "loss": 0.0272, "step": 28982 }, { "epoch": 3.4368552116684454, "grad_norm": 0.35260990085841615, "learning_rate": 2.554170454817795e-06, "loss": 0.0167, "step": 28983 }, { "epoch": 3.43697379343057, "grad_norm": 0.9015266317470307, "learning_rate": 2.55311359344684e-06, "loss": 0.0345, "step": 28984 }, { "epoch": 3.4370923751926954, "grad_norm": 0.5529090054201413, "learning_rate": 2.552056939008657e-06, "loss": 0.0259, "step": 28985 }, { "epoch": 3.4372109569548206, "grad_norm": 0.3632721380145686, "learning_rate": 2.55100049151297e-06, "loss": 0.0159, "step": 28986 }, { "epoch": 3.4373295387169454, "grad_norm": 0.722966431134457, "learning_rate": 2.5499442509695437e-06, "loss": 0.0398, "step": 28987 }, { "epoch": 3.43744812047907, "grad_norm": 0.34384596789340444, "learning_rate": 2.548888217388093e-06, "loss": 0.0172, "step": 28988 }, { "epoch": 3.4375667022411953, "grad_norm": 0.7496288126630648, "learning_rate": 2.5478323907783622e-06, "loss": 0.0338, "step": 28989 }, { "epoch": 3.4376852840033205, "grad_norm": 0.5028999285107342, "learning_rate": 2.5467767711500833e-06, "loss": 0.0223, "step": 28990 }, { "epoch": 3.4378038657654453, "grad_norm": 0.584883026883105, "learning_rate": 2.5457213585129926e-06, "loss": 0.0331, "step": 28991 }, { "epoch": 3.43792244752757, "grad_norm": 0.5787965236611579, "learning_rate": 2.544666152876815e-06, "loss": 0.0284, "step": 28992 }, { "epoch": 3.4380410292896952, "grad_norm": 0.47388354370957825, "learning_rate": 2.543611154251277e-06, "loss": 0.0253, "step": 28993 }, { "epoch": 3.4381596110518204, "grad_norm": 0.27379800309881847, "learning_rate": 2.5425563626461065e-06, "loss": 0.0144, "step": 28994 }, { "epoch": 3.438278192813945, "grad_norm": 0.3173338032092776, "learning_rate": 2.541501778071029e-06, "loss": 0.0118, "step": 28995 }, { "epoch": 3.43839677457607, "grad_norm": 0.6322932760022529, "learning_rate": 2.5404474005357613e-06, "loss": 0.0238, "step": 28996 }, { "epoch": 3.438515356338195, "grad_norm": 0.622440449361367, "learning_rate": 2.539393230050033e-06, "loss": 0.0341, "step": 28997 }, { "epoch": 3.4386339381003204, "grad_norm": 0.4264394755013822, "learning_rate": 2.538339266623546e-06, "loss": 0.0259, "step": 28998 }, { "epoch": 3.438752519862445, "grad_norm": 0.4594086091076395, "learning_rate": 2.5372855102660382e-06, "loss": 0.0159, "step": 28999 }, { "epoch": 3.4388711016245703, "grad_norm": 0.6194532428533074, "learning_rate": 2.5362319609872036e-06, "loss": 0.0367, "step": 29000 }, { "epoch": 3.438989683386695, "grad_norm": 0.8420450803968799, "learning_rate": 2.5351786187967684e-06, "loss": 0.0529, "step": 29001 }, { "epoch": 3.4391082651488203, "grad_norm": 0.703784025104894, "learning_rate": 2.534125483704433e-06, "loss": 0.0316, "step": 29002 }, { "epoch": 3.439226846910945, "grad_norm": 0.6601279429254863, "learning_rate": 2.5330725557199204e-06, "loss": 0.0398, "step": 29003 }, { "epoch": 3.4393454286730702, "grad_norm": 0.5726357490352061, "learning_rate": 2.532019834852922e-06, "loss": 0.0349, "step": 29004 }, { "epoch": 3.439464010435195, "grad_norm": 0.5406508742294013, "learning_rate": 2.530967321113148e-06, "loss": 0.0191, "step": 29005 }, { "epoch": 3.43958259219732, "grad_norm": 0.44384692485672, "learning_rate": 2.5299150145103007e-06, "loss": 0.0199, "step": 29006 }, { "epoch": 3.439701173959445, "grad_norm": 0.41679732616769966, "learning_rate": 2.5288629150540838e-06, "loss": 0.0236, "step": 29007 }, { "epoch": 3.43981975572157, "grad_norm": 0.36680708235551435, "learning_rate": 2.527811022754201e-06, "loss": 0.0197, "step": 29008 }, { "epoch": 3.439938337483695, "grad_norm": 0.4367817728170921, "learning_rate": 2.5267593376203325e-06, "loss": 0.0172, "step": 29009 }, { "epoch": 3.44005691924582, "grad_norm": 0.3592701413331721, "learning_rate": 2.5257078596621935e-06, "loss": 0.0167, "step": 29010 }, { "epoch": 3.440175501007945, "grad_norm": 0.7296561004193876, "learning_rate": 2.5246565888894623e-06, "loss": 0.0265, "step": 29011 }, { "epoch": 3.44029408277007, "grad_norm": 0.7082550740449945, "learning_rate": 2.5236055253118423e-06, "loss": 0.0232, "step": 29012 }, { "epoch": 3.440412664532195, "grad_norm": 0.6829663828660302, "learning_rate": 2.522554668939009e-06, "loss": 0.0435, "step": 29013 }, { "epoch": 3.44053124629432, "grad_norm": 0.47394647630635695, "learning_rate": 2.5215040197806656e-06, "loss": 0.0172, "step": 29014 }, { "epoch": 3.440649828056445, "grad_norm": 0.8515580442447738, "learning_rate": 2.5204535778464878e-06, "loss": 0.0498, "step": 29015 }, { "epoch": 3.44076840981857, "grad_norm": 0.7281952740154966, "learning_rate": 2.5194033431461593e-06, "loss": 0.0395, "step": 29016 }, { "epoch": 3.4408869915806948, "grad_norm": 0.6926765658760234, "learning_rate": 2.5183533156893646e-06, "loss": 0.0415, "step": 29017 }, { "epoch": 3.44100557334282, "grad_norm": 0.4468805631146565, "learning_rate": 2.5173034954857845e-06, "loss": 0.0153, "step": 29018 }, { "epoch": 3.4411241551049447, "grad_norm": 0.45258893165217723, "learning_rate": 2.5162538825450997e-06, "loss": 0.0275, "step": 29019 }, { "epoch": 3.44124273686707, "grad_norm": 0.6154791963587078, "learning_rate": 2.515204476876973e-06, "loss": 0.0321, "step": 29020 }, { "epoch": 3.4413613186291947, "grad_norm": 0.4091725277584971, "learning_rate": 2.514155278491101e-06, "loss": 0.0204, "step": 29021 }, { "epoch": 3.44147990039132, "grad_norm": 0.7095330928889038, "learning_rate": 2.513106287397135e-06, "loss": 0.0345, "step": 29022 }, { "epoch": 3.4415984821534447, "grad_norm": 0.5899791236014705, "learning_rate": 2.512057503604759e-06, "loss": 0.0358, "step": 29023 }, { "epoch": 3.44171706391557, "grad_norm": 0.4027118647360011, "learning_rate": 2.5110089271236288e-06, "loss": 0.013, "step": 29024 }, { "epoch": 3.4418356456776946, "grad_norm": 0.38418871693055007, "learning_rate": 2.5099605579634255e-06, "loss": 0.0159, "step": 29025 }, { "epoch": 3.44195422743982, "grad_norm": 0.43132057375401417, "learning_rate": 2.5089123961338057e-06, "loss": 0.0193, "step": 29026 }, { "epoch": 3.4420728092019446, "grad_norm": 0.4279997978923762, "learning_rate": 2.507864441644431e-06, "loss": 0.0221, "step": 29027 }, { "epoch": 3.44219139096407, "grad_norm": 0.5180648788311326, "learning_rate": 2.5068166945049626e-06, "loss": 0.0279, "step": 29028 }, { "epoch": 3.4423099727261945, "grad_norm": 0.48809903774103747, "learning_rate": 2.5057691547250627e-06, "loss": 0.023, "step": 29029 }, { "epoch": 3.4424285544883197, "grad_norm": 0.7137282454900876, "learning_rate": 2.5047218223143933e-06, "loss": 0.0301, "step": 29030 }, { "epoch": 3.4425471362504445, "grad_norm": 0.3282517566488394, "learning_rate": 2.5036746972825904e-06, "loss": 0.0215, "step": 29031 }, { "epoch": 3.4426657180125697, "grad_norm": 0.6311090967392684, "learning_rate": 2.502627779639333e-06, "loss": 0.0375, "step": 29032 }, { "epoch": 3.4427842997746945, "grad_norm": 0.37801694706280176, "learning_rate": 2.5015810693942516e-06, "loss": 0.0221, "step": 29033 }, { "epoch": 3.4429028815368197, "grad_norm": 0.42340830564294646, "learning_rate": 2.5005345665570113e-06, "loss": 0.0176, "step": 29034 }, { "epoch": 3.4430214632989444, "grad_norm": 0.5780364963379726, "learning_rate": 2.4994882711372404e-06, "loss": 0.0287, "step": 29035 }, { "epoch": 3.4431400450610696, "grad_norm": 0.60811192212662, "learning_rate": 2.4984421831446058e-06, "loss": 0.0428, "step": 29036 }, { "epoch": 3.4432586268231944, "grad_norm": 0.8429902216223468, "learning_rate": 2.4973963025887336e-06, "loss": 0.0304, "step": 29037 }, { "epoch": 3.4433772085853196, "grad_norm": 0.4149262359324661, "learning_rate": 2.496350629479277e-06, "loss": 0.0167, "step": 29038 }, { "epoch": 3.443495790347445, "grad_norm": 0.6868572927466966, "learning_rate": 2.49530516382587e-06, "loss": 0.0335, "step": 29039 }, { "epoch": 3.4436143721095696, "grad_norm": 0.4949522328778541, "learning_rate": 2.494259905638152e-06, "loss": 0.0278, "step": 29040 }, { "epoch": 3.4437329538716943, "grad_norm": 0.43521659488519765, "learning_rate": 2.493214854925763e-06, "loss": 0.0229, "step": 29041 }, { "epoch": 3.4438515356338195, "grad_norm": 0.493761703408868, "learning_rate": 2.4921700116983247e-06, "loss": 0.0242, "step": 29042 }, { "epoch": 3.4439701173959447, "grad_norm": 0.7906687535597051, "learning_rate": 2.491125375965489e-06, "loss": 0.0313, "step": 29043 }, { "epoch": 3.4440886991580695, "grad_norm": 0.37421671592941297, "learning_rate": 2.4900809477368697e-06, "loss": 0.0154, "step": 29044 }, { "epoch": 3.4442072809201942, "grad_norm": 0.4604902073836254, "learning_rate": 2.489036727022104e-06, "loss": 0.0274, "step": 29045 }, { "epoch": 3.4443258626823194, "grad_norm": 0.7128855087073487, "learning_rate": 2.4879927138308058e-06, "loss": 0.0349, "step": 29046 }, { "epoch": 3.4444444444444446, "grad_norm": 0.44574328112838224, "learning_rate": 2.4869489081726205e-06, "loss": 0.0208, "step": 29047 }, { "epoch": 3.4445630262065694, "grad_norm": 0.6144187080687736, "learning_rate": 2.4859053100571516e-06, "loss": 0.0309, "step": 29048 }, { "epoch": 3.4446816079686946, "grad_norm": 0.7717896735378873, "learning_rate": 2.4848619194940275e-06, "loss": 0.0234, "step": 29049 }, { "epoch": 3.4448001897308194, "grad_norm": 0.6885027311728122, "learning_rate": 2.483818736492868e-06, "loss": 0.0368, "step": 29050 }, { "epoch": 3.4449187714929446, "grad_norm": 0.46539491262313476, "learning_rate": 2.482775761063286e-06, "loss": 0.0212, "step": 29051 }, { "epoch": 3.4450373532550693, "grad_norm": 0.6612896062642103, "learning_rate": 2.4817329932149054e-06, "loss": 0.0355, "step": 29052 }, { "epoch": 3.4451559350171945, "grad_norm": 0.44109996233261195, "learning_rate": 2.480690432957328e-06, "loss": 0.0158, "step": 29053 }, { "epoch": 3.4452745167793193, "grad_norm": 0.4968721388551394, "learning_rate": 2.4796480803001706e-06, "loss": 0.025, "step": 29054 }, { "epoch": 3.4453930985414445, "grad_norm": 1.147394507233972, "learning_rate": 2.478605935253042e-06, "loss": 0.0526, "step": 29055 }, { "epoch": 3.4455116803035692, "grad_norm": 0.5406626219299528, "learning_rate": 2.4775639978255466e-06, "loss": 0.023, "step": 29056 }, { "epoch": 3.4456302620656944, "grad_norm": 0.6962725141516158, "learning_rate": 2.4765222680272954e-06, "loss": 0.0394, "step": 29057 }, { "epoch": 3.445748843827819, "grad_norm": 0.36824669094538337, "learning_rate": 2.4754807458678862e-06, "loss": 0.021, "step": 29058 }, { "epoch": 3.4458674255899444, "grad_norm": 0.4127295661526935, "learning_rate": 2.4744394313569286e-06, "loss": 0.0217, "step": 29059 }, { "epoch": 3.445986007352069, "grad_norm": 0.6835067752735285, "learning_rate": 2.4733983245040144e-06, "loss": 0.0328, "step": 29060 }, { "epoch": 3.4461045891141944, "grad_norm": 0.3362011922061746, "learning_rate": 2.4723574253187413e-06, "loss": 0.0159, "step": 29061 }, { "epoch": 3.446223170876319, "grad_norm": 0.4708177767983754, "learning_rate": 2.471316733810708e-06, "loss": 0.019, "step": 29062 }, { "epoch": 3.4463417526384443, "grad_norm": 0.6881375737590889, "learning_rate": 2.4702762499895115e-06, "loss": 0.0276, "step": 29063 }, { "epoch": 3.446460334400569, "grad_norm": 0.3133189107218558, "learning_rate": 2.4692359738647363e-06, "loss": 0.0145, "step": 29064 }, { "epoch": 3.4465789161626943, "grad_norm": 0.5284518849314871, "learning_rate": 2.4681959054459775e-06, "loss": 0.017, "step": 29065 }, { "epoch": 3.446697497924819, "grad_norm": 0.44084197578852424, "learning_rate": 2.4671560447428187e-06, "loss": 0.0232, "step": 29066 }, { "epoch": 3.4468160796869443, "grad_norm": 0.6930571780853112, "learning_rate": 2.466116391764853e-06, "loss": 0.0461, "step": 29067 }, { "epoch": 3.446934661449069, "grad_norm": 0.7381030154483369, "learning_rate": 2.465076946521666e-06, "loss": 0.049, "step": 29068 }, { "epoch": 3.447053243211194, "grad_norm": 0.5222286658119337, "learning_rate": 2.4640377090228232e-06, "loss": 0.0211, "step": 29069 }, { "epoch": 3.447171824973319, "grad_norm": 0.3848981858597521, "learning_rate": 2.462998679277931e-06, "loss": 0.0195, "step": 29070 }, { "epoch": 3.447290406735444, "grad_norm": 0.6321971016957006, "learning_rate": 2.461959857296545e-06, "loss": 0.0315, "step": 29071 }, { "epoch": 3.447408988497569, "grad_norm": 0.3801539252808336, "learning_rate": 2.4609212430882546e-06, "loss": 0.0139, "step": 29072 }, { "epoch": 3.447527570259694, "grad_norm": 0.3264625540319487, "learning_rate": 2.4598828366626304e-06, "loss": 0.0165, "step": 29073 }, { "epoch": 3.447646152021819, "grad_norm": 0.48794416207080965, "learning_rate": 2.458844638029248e-06, "loss": 0.0235, "step": 29074 }, { "epoch": 3.447764733783944, "grad_norm": 0.9659093185556107, "learning_rate": 2.4578066471976747e-06, "loss": 0.0616, "step": 29075 }, { "epoch": 3.447883315546069, "grad_norm": 0.6131472729446279, "learning_rate": 2.4567688641774806e-06, "loss": 0.0345, "step": 29076 }, { "epoch": 3.448001897308194, "grad_norm": 0.4245434145883034, "learning_rate": 2.4557312889782304e-06, "loss": 0.0251, "step": 29077 }, { "epoch": 3.448120479070319, "grad_norm": 0.5652360203241789, "learning_rate": 2.454693921609494e-06, "loss": 0.0361, "step": 29078 }, { "epoch": 3.448239060832444, "grad_norm": 0.6171989344295306, "learning_rate": 2.4536567620808387e-06, "loss": 0.0322, "step": 29079 }, { "epoch": 3.448357642594569, "grad_norm": 0.6236299718862258, "learning_rate": 2.45261981040181e-06, "loss": 0.0313, "step": 29080 }, { "epoch": 3.448476224356694, "grad_norm": 1.0501241705669155, "learning_rate": 2.451583066581986e-06, "loss": 0.0417, "step": 29081 }, { "epoch": 3.4485948061188187, "grad_norm": 0.5401271169231141, "learning_rate": 2.4505465306309096e-06, "loss": 0.0313, "step": 29082 }, { "epoch": 3.448713387880944, "grad_norm": 0.5457164411682847, "learning_rate": 2.44951020255815e-06, "loss": 0.0194, "step": 29083 }, { "epoch": 3.4488319696430687, "grad_norm": 0.48322794242525596, "learning_rate": 2.4484740823732423e-06, "loss": 0.0146, "step": 29084 }, { "epoch": 3.448950551405194, "grad_norm": 0.47927678487680353, "learning_rate": 2.4474381700857556e-06, "loss": 0.0205, "step": 29085 }, { "epoch": 3.4490691331673187, "grad_norm": 0.5112067035784186, "learning_rate": 2.446402465705233e-06, "loss": 0.0269, "step": 29086 }, { "epoch": 3.449187714929444, "grad_norm": 0.3549496276497199, "learning_rate": 2.4453669692412196e-06, "loss": 0.0149, "step": 29087 }, { "epoch": 3.449306296691569, "grad_norm": 0.32026219841249004, "learning_rate": 2.444331680703266e-06, "loss": 0.0115, "step": 29088 }, { "epoch": 3.449424878453694, "grad_norm": 0.4906294223303354, "learning_rate": 2.4432966001009137e-06, "loss": 0.0171, "step": 29089 }, { "epoch": 3.4495434602158186, "grad_norm": 0.5084425183433193, "learning_rate": 2.4422617274437094e-06, "loss": 0.0253, "step": 29090 }, { "epoch": 3.449662041977944, "grad_norm": 0.4702801357258427, "learning_rate": 2.441227062741183e-06, "loss": 0.0244, "step": 29091 }, { "epoch": 3.449780623740069, "grad_norm": 0.6261992569868765, "learning_rate": 2.440192606002889e-06, "loss": 0.0297, "step": 29092 }, { "epoch": 3.4498992055021938, "grad_norm": 0.6001678109123858, "learning_rate": 2.439158357238347e-06, "loss": 0.0279, "step": 29093 }, { "epoch": 3.4500177872643185, "grad_norm": 0.6577475440541127, "learning_rate": 2.438124316457108e-06, "loss": 0.0429, "step": 29094 }, { "epoch": 3.4501363690264437, "grad_norm": 0.6099428838597033, "learning_rate": 2.437090483668683e-06, "loss": 0.0327, "step": 29095 }, { "epoch": 3.450254950788569, "grad_norm": 0.7798756714031444, "learning_rate": 2.436056858882624e-06, "loss": 0.0225, "step": 29096 }, { "epoch": 3.4503735325506937, "grad_norm": 0.25367868906629515, "learning_rate": 2.43502344210845e-06, "loss": 0.0148, "step": 29097 }, { "epoch": 3.450492114312819, "grad_norm": 0.7278339321405408, "learning_rate": 2.4339902333556847e-06, "loss": 0.0424, "step": 29098 }, { "epoch": 3.4506106960749436, "grad_norm": 0.5388410171293393, "learning_rate": 2.432957232633859e-06, "loss": 0.0196, "step": 29099 }, { "epoch": 3.450729277837069, "grad_norm": 0.41524535508994814, "learning_rate": 2.431924439952496e-06, "loss": 0.0228, "step": 29100 }, { "epoch": 3.4508478595991936, "grad_norm": 0.448102558638431, "learning_rate": 2.4308918553211158e-06, "loss": 0.0241, "step": 29101 }, { "epoch": 3.450966441361319, "grad_norm": 0.728779967832756, "learning_rate": 2.4298594787492303e-06, "loss": 0.0428, "step": 29102 }, { "epoch": 3.4510850231234436, "grad_norm": 0.40671009155851484, "learning_rate": 2.4288273102463707e-06, "loss": 0.0101, "step": 29103 }, { "epoch": 3.4512036048855688, "grad_norm": 0.5054074567225302, "learning_rate": 2.4277953498220402e-06, "loss": 0.0245, "step": 29104 }, { "epoch": 3.4513221866476935, "grad_norm": 0.5195237817801355, "learning_rate": 2.4267635974857596e-06, "loss": 0.0209, "step": 29105 }, { "epoch": 3.4514407684098187, "grad_norm": 0.5329880639000194, "learning_rate": 2.4257320532470347e-06, "loss": 0.0294, "step": 29106 }, { "epoch": 3.4515593501719435, "grad_norm": 0.5435215251312642, "learning_rate": 2.4247007171153775e-06, "loss": 0.0204, "step": 29107 }, { "epoch": 3.4516779319340687, "grad_norm": 0.4983534860961178, "learning_rate": 2.4236695891003026e-06, "loss": 0.027, "step": 29108 }, { "epoch": 3.4517965136961934, "grad_norm": 0.42680178054981305, "learning_rate": 2.4226386692113024e-06, "loss": 0.015, "step": 29109 }, { "epoch": 3.4519150954583186, "grad_norm": 0.4364123449344783, "learning_rate": 2.421607957457889e-06, "loss": 0.0179, "step": 29110 }, { "epoch": 3.4520336772204434, "grad_norm": 0.535360851232987, "learning_rate": 2.420577453849562e-06, "loss": 0.0215, "step": 29111 }, { "epoch": 3.4521522589825686, "grad_norm": 0.7699222645836294, "learning_rate": 2.4195471583958266e-06, "loss": 0.0296, "step": 29112 }, { "epoch": 3.4522708407446934, "grad_norm": 0.39377041856280265, "learning_rate": 2.4185170711061663e-06, "loss": 0.0179, "step": 29113 }, { "epoch": 3.4523894225068186, "grad_norm": 0.48562298414123767, "learning_rate": 2.4174871919900976e-06, "loss": 0.0256, "step": 29114 }, { "epoch": 3.4525080042689433, "grad_norm": 0.9009202034298417, "learning_rate": 2.4164575210571028e-06, "loss": 0.0514, "step": 29115 }, { "epoch": 3.4526265860310685, "grad_norm": 0.44423996392698284, "learning_rate": 2.415428058316671e-06, "loss": 0.0263, "step": 29116 }, { "epoch": 3.4527451677931933, "grad_norm": 0.41158259356190857, "learning_rate": 2.4143988037783006e-06, "loss": 0.0172, "step": 29117 }, { "epoch": 3.4528637495553185, "grad_norm": 0.5035397554871353, "learning_rate": 2.4133697574514753e-06, "loss": 0.0335, "step": 29118 }, { "epoch": 3.4529823313174433, "grad_norm": 0.4993300982736495, "learning_rate": 2.4123409193456903e-06, "loss": 0.0204, "step": 29119 }, { "epoch": 3.4531009130795685, "grad_norm": 0.6121959058679166, "learning_rate": 2.411312289470416e-06, "loss": 0.0292, "step": 29120 }, { "epoch": 3.453219494841693, "grad_norm": 0.5103585864992013, "learning_rate": 2.410283867835142e-06, "loss": 0.024, "step": 29121 }, { "epoch": 3.4533380766038184, "grad_norm": 0.31194641684442226, "learning_rate": 2.409255654449352e-06, "loss": 0.0101, "step": 29122 }, { "epoch": 3.453456658365943, "grad_norm": 0.5296828074421802, "learning_rate": 2.4082276493225255e-06, "loss": 0.02, "step": 29123 }, { "epoch": 3.4535752401280684, "grad_norm": 0.47057795131740915, "learning_rate": 2.4071998524641316e-06, "loss": 0.0286, "step": 29124 }, { "epoch": 3.453693821890193, "grad_norm": 0.653847418806593, "learning_rate": 2.406172263883649e-06, "loss": 0.0353, "step": 29125 }, { "epoch": 3.4538124036523183, "grad_norm": 0.3908199735091344, "learning_rate": 2.4051448835905543e-06, "loss": 0.0215, "step": 29126 }, { "epoch": 3.453930985414443, "grad_norm": 0.2446434433545278, "learning_rate": 2.4041177115943143e-06, "loss": 0.0116, "step": 29127 }, { "epoch": 3.4540495671765683, "grad_norm": 0.6051101993628845, "learning_rate": 2.403090747904399e-06, "loss": 0.0348, "step": 29128 }, { "epoch": 3.454168148938693, "grad_norm": 1.1011181467229332, "learning_rate": 2.402063992530279e-06, "loss": 0.0365, "step": 29129 }, { "epoch": 3.4542867307008183, "grad_norm": 0.49919465078662584, "learning_rate": 2.4010374454814215e-06, "loss": 0.0283, "step": 29130 }, { "epoch": 3.454405312462943, "grad_norm": 0.9398934601483375, "learning_rate": 2.4000111067672803e-06, "loss": 0.0396, "step": 29131 }, { "epoch": 3.4545238942250682, "grad_norm": 0.502926861637996, "learning_rate": 2.3989849763973253e-06, "loss": 0.0201, "step": 29132 }, { "epoch": 3.454642475987193, "grad_norm": 0.4352577537427602, "learning_rate": 2.3979590543810103e-06, "loss": 0.017, "step": 29133 }, { "epoch": 3.454761057749318, "grad_norm": 0.6917650226454554, "learning_rate": 2.3969333407278026e-06, "loss": 0.0334, "step": 29134 }, { "epoch": 3.454879639511443, "grad_norm": 0.443412991482155, "learning_rate": 2.395907835447148e-06, "loss": 0.022, "step": 29135 }, { "epoch": 3.454998221273568, "grad_norm": 0.5807171552882378, "learning_rate": 2.394882538548504e-06, "loss": 0.0272, "step": 29136 }, { "epoch": 3.4551168030356934, "grad_norm": 0.524486607627375, "learning_rate": 2.393857450041323e-06, "loss": 0.0298, "step": 29137 }, { "epoch": 3.455235384797818, "grad_norm": 0.7361390524249458, "learning_rate": 2.3928325699350525e-06, "loss": 0.042, "step": 29138 }, { "epoch": 3.455353966559943, "grad_norm": 0.3936206145546536, "learning_rate": 2.391807898239151e-06, "loss": 0.0184, "step": 29139 }, { "epoch": 3.455472548322068, "grad_norm": 0.41097104133096174, "learning_rate": 2.3907834349630454e-06, "loss": 0.0196, "step": 29140 }, { "epoch": 3.4555911300841933, "grad_norm": 0.4702539497503319, "learning_rate": 2.3897591801161995e-06, "loss": 0.0212, "step": 29141 }, { "epoch": 3.455709711846318, "grad_norm": 0.5432561748206506, "learning_rate": 2.388735133708045e-06, "loss": 0.0258, "step": 29142 }, { "epoch": 3.455828293608443, "grad_norm": 0.5220575364757349, "learning_rate": 2.387711295748024e-06, "loss": 0.0361, "step": 29143 }, { "epoch": 3.455946875370568, "grad_norm": 0.6786209163520089, "learning_rate": 2.3866876662455768e-06, "loss": 0.037, "step": 29144 }, { "epoch": 3.456065457132693, "grad_norm": 0.3989756706300206, "learning_rate": 2.3856642452101426e-06, "loss": 0.0142, "step": 29145 }, { "epoch": 3.456184038894818, "grad_norm": 0.941302209669808, "learning_rate": 2.384641032651147e-06, "loss": 0.0451, "step": 29146 }, { "epoch": 3.4563026206569427, "grad_norm": 0.4718219337395561, "learning_rate": 2.38361802857803e-06, "loss": 0.0231, "step": 29147 }, { "epoch": 3.456421202419068, "grad_norm": 0.5575502589083844, "learning_rate": 2.3825952330002175e-06, "loss": 0.0268, "step": 29148 }, { "epoch": 3.456539784181193, "grad_norm": 0.6456794590073137, "learning_rate": 2.3815726459271466e-06, "loss": 0.0206, "step": 29149 }, { "epoch": 3.456658365943318, "grad_norm": 0.4729364059552349, "learning_rate": 2.3805502673682393e-06, "loss": 0.0149, "step": 29150 }, { "epoch": 3.456776947705443, "grad_norm": 0.3512795100912271, "learning_rate": 2.3795280973329144e-06, "loss": 0.0176, "step": 29151 }, { "epoch": 3.456895529467568, "grad_norm": 0.3194289502246709, "learning_rate": 2.3785061358306103e-06, "loss": 0.0135, "step": 29152 }, { "epoch": 3.457014111229693, "grad_norm": 0.4841076055402178, "learning_rate": 2.377484382870734e-06, "loss": 0.0269, "step": 29153 }, { "epoch": 3.457132692991818, "grad_norm": 0.744501861447363, "learning_rate": 2.3764628384627092e-06, "loss": 0.0401, "step": 29154 }, { "epoch": 3.457251274753943, "grad_norm": 0.5965989632184261, "learning_rate": 2.375441502615958e-06, "loss": 0.0384, "step": 29155 }, { "epoch": 3.4573698565160678, "grad_norm": 0.5019516278429694, "learning_rate": 2.3744203753398893e-06, "loss": 0.0313, "step": 29156 }, { "epoch": 3.457488438278193, "grad_norm": 0.7482487622862202, "learning_rate": 2.373399456643924e-06, "loss": 0.0261, "step": 29157 }, { "epoch": 3.4576070200403177, "grad_norm": 0.4037401990261856, "learning_rate": 2.37237874653746e-06, "loss": 0.0215, "step": 29158 }, { "epoch": 3.457725601802443, "grad_norm": 0.4071867916431023, "learning_rate": 2.3713582450299256e-06, "loss": 0.0192, "step": 29159 }, { "epoch": 3.4578441835645677, "grad_norm": 0.431317747015767, "learning_rate": 2.3703379521307133e-06, "loss": 0.0246, "step": 29160 }, { "epoch": 3.457962765326693, "grad_norm": 0.4498646230470677, "learning_rate": 2.369317867849241e-06, "loss": 0.0186, "step": 29161 }, { "epoch": 3.4580813470888176, "grad_norm": 0.6381561397648977, "learning_rate": 2.3682979921948957e-06, "loss": 0.0309, "step": 29162 }, { "epoch": 3.458199928850943, "grad_norm": 0.6913315143822379, "learning_rate": 2.3672783251770996e-06, "loss": 0.0376, "step": 29163 }, { "epoch": 3.4583185106130676, "grad_norm": 0.5026546854088444, "learning_rate": 2.3662588668052405e-06, "loss": 0.0219, "step": 29164 }, { "epoch": 3.458437092375193, "grad_norm": 0.5068024540847944, "learning_rate": 2.3652396170887185e-06, "loss": 0.0201, "step": 29165 }, { "epoch": 3.4585556741373176, "grad_norm": 0.6150152004638736, "learning_rate": 2.3642205760369296e-06, "loss": 0.025, "step": 29166 }, { "epoch": 3.4586742558994428, "grad_norm": 0.39428498436565934, "learning_rate": 2.3632017436592713e-06, "loss": 0.0148, "step": 29167 }, { "epoch": 3.4587928376615675, "grad_norm": 0.6371613394360435, "learning_rate": 2.3621831199651363e-06, "loss": 0.0253, "step": 29168 }, { "epoch": 3.4589114194236927, "grad_norm": 0.35446485321873566, "learning_rate": 2.3611647049639085e-06, "loss": 0.0188, "step": 29169 }, { "epoch": 3.4590300011858175, "grad_norm": 0.42105730205117387, "learning_rate": 2.3601464986649805e-06, "loss": 0.0198, "step": 29170 }, { "epoch": 3.4591485829479427, "grad_norm": 0.44397925153500356, "learning_rate": 2.3591285010777398e-06, "loss": 0.0172, "step": 29171 }, { "epoch": 3.4592671647100675, "grad_norm": 0.4601114513597288, "learning_rate": 2.3581107122115726e-06, "loss": 0.0176, "step": 29172 }, { "epoch": 3.4593857464721927, "grad_norm": 0.5760908276558131, "learning_rate": 2.357093132075852e-06, "loss": 0.0236, "step": 29173 }, { "epoch": 3.4595043282343174, "grad_norm": 0.5707483367214836, "learning_rate": 2.356075760679977e-06, "loss": 0.0246, "step": 29174 }, { "epoch": 3.4596229099964426, "grad_norm": 0.7650300239006366, "learning_rate": 2.355058598033308e-06, "loss": 0.0325, "step": 29175 }, { "epoch": 3.4597414917585674, "grad_norm": 0.6268083426615895, "learning_rate": 2.3540416441452303e-06, "loss": 0.0312, "step": 29176 }, { "epoch": 3.4598600735206926, "grad_norm": 0.5324158442267943, "learning_rate": 2.353024899025119e-06, "loss": 0.0214, "step": 29177 }, { "epoch": 3.4599786552828173, "grad_norm": 0.478556260432901, "learning_rate": 2.352008362682348e-06, "loss": 0.0342, "step": 29178 }, { "epoch": 3.4600972370449425, "grad_norm": 0.4941512047149912, "learning_rate": 2.3509920351262897e-06, "loss": 0.0225, "step": 29179 }, { "epoch": 3.4602158188070673, "grad_norm": 0.4360482135295614, "learning_rate": 2.3499759163663064e-06, "loss": 0.0198, "step": 29180 }, { "epoch": 3.4603344005691925, "grad_norm": 0.42785148117722394, "learning_rate": 2.3489600064117714e-06, "loss": 0.021, "step": 29181 }, { "epoch": 3.4604529823313173, "grad_norm": 0.4193752116905748, "learning_rate": 2.3479443052720455e-06, "loss": 0.0224, "step": 29182 }, { "epoch": 3.4605715640934425, "grad_norm": 0.6183234947768533, "learning_rate": 2.3469288129565033e-06, "loss": 0.0265, "step": 29183 }, { "epoch": 3.4606901458555672, "grad_norm": 0.686243132890176, "learning_rate": 2.3459135294744915e-06, "loss": 0.0388, "step": 29184 }, { "epoch": 3.4608087276176924, "grad_norm": 0.38876932355266997, "learning_rate": 2.344898454835376e-06, "loss": 0.0157, "step": 29185 }, { "epoch": 3.4609273093798176, "grad_norm": 0.5486989908427763, "learning_rate": 2.343883589048518e-06, "loss": 0.0282, "step": 29186 }, { "epoch": 3.4610458911419424, "grad_norm": 0.8197035562226992, "learning_rate": 2.3428689321232683e-06, "loss": 0.0394, "step": 29187 }, { "epoch": 3.461164472904067, "grad_norm": 0.38294462543233554, "learning_rate": 2.3418544840689837e-06, "loss": 0.023, "step": 29188 }, { "epoch": 3.4612830546661923, "grad_norm": 0.47244381162723154, "learning_rate": 2.3408402448950178e-06, "loss": 0.0212, "step": 29189 }, { "epoch": 3.4614016364283176, "grad_norm": 0.7327408986259605, "learning_rate": 2.3398262146107184e-06, "loss": 0.0358, "step": 29190 }, { "epoch": 3.4615202181904423, "grad_norm": 0.9052039550191823, "learning_rate": 2.3388123932254314e-06, "loss": 0.0355, "step": 29191 }, { "epoch": 3.461638799952567, "grad_norm": 0.37430350096345405, "learning_rate": 2.337798780748507e-06, "loss": 0.0213, "step": 29192 }, { "epoch": 3.4617573817146923, "grad_norm": 0.6426367452790981, "learning_rate": 2.336785377189285e-06, "loss": 0.0256, "step": 29193 }, { "epoch": 3.4618759634768175, "grad_norm": 0.7611860600812577, "learning_rate": 2.3357721825571167e-06, "loss": 0.0284, "step": 29194 }, { "epoch": 3.4619945452389422, "grad_norm": 0.5374940418043093, "learning_rate": 2.3347591968613303e-06, "loss": 0.0222, "step": 29195 }, { "epoch": 3.462113127001067, "grad_norm": 0.43357739383765964, "learning_rate": 2.3337464201112714e-06, "loss": 0.0183, "step": 29196 }, { "epoch": 3.462231708763192, "grad_norm": 0.45984287638870347, "learning_rate": 2.3327338523162737e-06, "loss": 0.0201, "step": 29197 }, { "epoch": 3.4623502905253174, "grad_norm": 0.5021546935423872, "learning_rate": 2.331721493485672e-06, "loss": 0.0246, "step": 29198 }, { "epoch": 3.462468872287442, "grad_norm": 0.38046099764940894, "learning_rate": 2.3307093436288035e-06, "loss": 0.0148, "step": 29199 }, { "epoch": 3.4625874540495674, "grad_norm": 0.8246527526689806, "learning_rate": 2.329697402754996e-06, "loss": 0.0318, "step": 29200 }, { "epoch": 3.462706035811692, "grad_norm": 0.5096182106261258, "learning_rate": 2.3286856708735817e-06, "loss": 0.0246, "step": 29201 }, { "epoch": 3.4628246175738173, "grad_norm": 0.6155270745791951, "learning_rate": 2.32767414799388e-06, "loss": 0.0403, "step": 29202 }, { "epoch": 3.462943199335942, "grad_norm": 0.7149685061260455, "learning_rate": 2.326662834125218e-06, "loss": 0.025, "step": 29203 }, { "epoch": 3.4630617810980673, "grad_norm": 0.799547838291144, "learning_rate": 2.325651729276923e-06, "loss": 0.0323, "step": 29204 }, { "epoch": 3.463180362860192, "grad_norm": 0.47744047681059104, "learning_rate": 2.324640833458314e-06, "loss": 0.0255, "step": 29205 }, { "epoch": 3.4632989446223172, "grad_norm": 0.38861427293090195, "learning_rate": 2.323630146678715e-06, "loss": 0.0212, "step": 29206 }, { "epoch": 3.463417526384442, "grad_norm": 0.5181630971639927, "learning_rate": 2.322619668947429e-06, "loss": 0.0291, "step": 29207 }, { "epoch": 3.463536108146567, "grad_norm": 0.5033355362689607, "learning_rate": 2.3216094002737887e-06, "loss": 0.0203, "step": 29208 }, { "epoch": 3.463654689908692, "grad_norm": 0.3843616538554143, "learning_rate": 2.3205993406670956e-06, "loss": 0.0129, "step": 29209 }, { "epoch": 3.463773271670817, "grad_norm": 0.49273614052821935, "learning_rate": 2.31958949013667e-06, "loss": 0.0232, "step": 29210 }, { "epoch": 3.463891853432942, "grad_norm": 0.41686628947461485, "learning_rate": 2.318579848691807e-06, "loss": 0.0199, "step": 29211 }, { "epoch": 3.464010435195067, "grad_norm": 0.5355078326313036, "learning_rate": 2.3175704163418327e-06, "loss": 0.0305, "step": 29212 }, { "epoch": 3.464129016957192, "grad_norm": 0.46081804506628865, "learning_rate": 2.3165611930960422e-06, "loss": 0.0187, "step": 29213 }, { "epoch": 3.464247598719317, "grad_norm": 0.4681618977565449, "learning_rate": 2.3155521789637395e-06, "loss": 0.025, "step": 29214 }, { "epoch": 3.464366180481442, "grad_norm": 0.37783781811394546, "learning_rate": 2.314543373954228e-06, "loss": 0.0209, "step": 29215 }, { "epoch": 3.464484762243567, "grad_norm": 0.38131194814540975, "learning_rate": 2.313534778076809e-06, "loss": 0.0158, "step": 29216 }, { "epoch": 3.464603344005692, "grad_norm": 0.3049061224715207, "learning_rate": 2.312526391340783e-06, "loss": 0.0115, "step": 29217 }, { "epoch": 3.464721925767817, "grad_norm": 0.8081328030924009, "learning_rate": 2.311518213755434e-06, "loss": 0.0361, "step": 29218 }, { "epoch": 3.4648405075299418, "grad_norm": 0.7385873176272129, "learning_rate": 2.310510245330072e-06, "loss": 0.0422, "step": 29219 }, { "epoch": 3.464959089292067, "grad_norm": 0.6154225075242171, "learning_rate": 2.3095024860739777e-06, "loss": 0.0249, "step": 29220 }, { "epoch": 3.4650776710541917, "grad_norm": 0.4057790569876691, "learning_rate": 2.3084949359964525e-06, "loss": 0.0198, "step": 29221 }, { "epoch": 3.465196252816317, "grad_norm": 0.6043904936017849, "learning_rate": 2.3074875951067663e-06, "loss": 0.0299, "step": 29222 }, { "epoch": 3.4653148345784417, "grad_norm": 0.4725309851724821, "learning_rate": 2.306480463414226e-06, "loss": 0.0273, "step": 29223 }, { "epoch": 3.465433416340567, "grad_norm": 0.41311783224261067, "learning_rate": 2.305473540928105e-06, "loss": 0.0166, "step": 29224 }, { "epoch": 3.4655519981026917, "grad_norm": 0.39116023405248135, "learning_rate": 2.3044668276576865e-06, "loss": 0.0177, "step": 29225 }, { "epoch": 3.465670579864817, "grad_norm": 0.4396692049511022, "learning_rate": 2.3034603236122526e-06, "loss": 0.0185, "step": 29226 }, { "epoch": 3.4657891616269416, "grad_norm": 0.64726760833289, "learning_rate": 2.302454028801082e-06, "loss": 0.0261, "step": 29227 }, { "epoch": 3.465907743389067, "grad_norm": 0.2683013761735624, "learning_rate": 2.301447943233459e-06, "loss": 0.0135, "step": 29228 }, { "epoch": 3.4660263251511916, "grad_norm": 0.7350067798720586, "learning_rate": 2.30044206691864e-06, "loss": 0.0337, "step": 29229 }, { "epoch": 3.466144906913317, "grad_norm": 0.4117896264739196, "learning_rate": 2.2994363998659175e-06, "loss": 0.0169, "step": 29230 }, { "epoch": 3.4662634886754415, "grad_norm": 0.5889909730550916, "learning_rate": 2.2984309420845505e-06, "loss": 0.0286, "step": 29231 }, { "epoch": 3.4663820704375667, "grad_norm": 0.7806434397525628, "learning_rate": 2.2974256935838155e-06, "loss": 0.0451, "step": 29232 }, { "epoch": 3.4665006521996915, "grad_norm": 0.5877016213551188, "learning_rate": 2.296420654372966e-06, "loss": 0.031, "step": 29233 }, { "epoch": 3.4666192339618167, "grad_norm": 0.5305012375935263, "learning_rate": 2.295415824461289e-06, "loss": 0.0258, "step": 29234 }, { "epoch": 3.466737815723942, "grad_norm": 0.6468108451264133, "learning_rate": 2.2944112038580296e-06, "loss": 0.0311, "step": 29235 }, { "epoch": 3.4668563974860667, "grad_norm": 0.7446141292116759, "learning_rate": 2.2934067925724558e-06, "loss": 0.0338, "step": 29236 }, { "epoch": 3.4669749792481914, "grad_norm": 0.6951810469313582, "learning_rate": 2.2924025906138263e-06, "loss": 0.0438, "step": 29237 }, { "epoch": 3.4670935610103166, "grad_norm": 0.48966888181366836, "learning_rate": 2.2913985979913984e-06, "loss": 0.0245, "step": 29238 }, { "epoch": 3.467212142772442, "grad_norm": 0.858347380816816, "learning_rate": 2.2903948147144335e-06, "loss": 0.0397, "step": 29239 }, { "epoch": 3.4673307245345666, "grad_norm": 0.6910544504983422, "learning_rate": 2.289391240792174e-06, "loss": 0.0406, "step": 29240 }, { "epoch": 3.4674493062966913, "grad_norm": 0.4180187487007885, "learning_rate": 2.2883878762338802e-06, "loss": 0.0188, "step": 29241 }, { "epoch": 3.4675678880588165, "grad_norm": 0.4030310050911613, "learning_rate": 2.287384721048799e-06, "loss": 0.0216, "step": 29242 }, { "epoch": 3.4676864698209418, "grad_norm": 0.29165651728320235, "learning_rate": 2.2863817752461824e-06, "loss": 0.0127, "step": 29243 }, { "epoch": 3.4678050515830665, "grad_norm": 0.6283633334819425, "learning_rate": 2.285379038835264e-06, "loss": 0.0276, "step": 29244 }, { "epoch": 3.4679236333451913, "grad_norm": 0.5748974427353366, "learning_rate": 2.2843765118253063e-06, "loss": 0.0325, "step": 29245 }, { "epoch": 3.4680422151073165, "grad_norm": 0.5430599629009871, "learning_rate": 2.283374194225535e-06, "loss": 0.0276, "step": 29246 }, { "epoch": 3.4681607968694417, "grad_norm": 0.45924909733827496, "learning_rate": 2.2823720860451982e-06, "loss": 0.0212, "step": 29247 }, { "epoch": 3.4682793786315664, "grad_norm": 0.3977549388136383, "learning_rate": 2.28137018729353e-06, "loss": 0.0161, "step": 29248 }, { "epoch": 3.4683979603936916, "grad_norm": 0.6090912479066212, "learning_rate": 2.280368497979771e-06, "loss": 0.0402, "step": 29249 }, { "epoch": 3.4685165421558164, "grad_norm": 0.5027632041767917, "learning_rate": 2.27936701811316e-06, "loss": 0.0291, "step": 29250 }, { "epoch": 3.4686351239179416, "grad_norm": 0.7510045877220342, "learning_rate": 2.2783657477029154e-06, "loss": 0.0283, "step": 29251 }, { "epoch": 3.4687537056800664, "grad_norm": 0.37895849673328674, "learning_rate": 2.2773646867582766e-06, "loss": 0.0193, "step": 29252 }, { "epoch": 3.4688722874421916, "grad_norm": 0.872207472640943, "learning_rate": 2.2763638352884724e-06, "loss": 0.0333, "step": 29253 }, { "epoch": 3.4689908692043163, "grad_norm": 0.7354669985035106, "learning_rate": 2.2753631933027262e-06, "loss": 0.0376, "step": 29254 }, { "epoch": 3.4691094509664415, "grad_norm": 0.6138287094172261, "learning_rate": 2.2743627608102724e-06, "loss": 0.0283, "step": 29255 }, { "epoch": 3.4692280327285663, "grad_norm": 0.5557663521248286, "learning_rate": 2.273362537820314e-06, "loss": 0.0272, "step": 29256 }, { "epoch": 3.4693466144906915, "grad_norm": 0.4284694470059661, "learning_rate": 2.272362524342092e-06, "loss": 0.0138, "step": 29257 }, { "epoch": 3.4694651962528162, "grad_norm": 0.5118234662778252, "learning_rate": 2.271362720384815e-06, "loss": 0.0305, "step": 29258 }, { "epoch": 3.4695837780149414, "grad_norm": 0.37555604533488723, "learning_rate": 2.2703631259577e-06, "loss": 0.0213, "step": 29259 }, { "epoch": 3.469702359777066, "grad_norm": 0.4737136585785027, "learning_rate": 2.2693637410699657e-06, "loss": 0.0218, "step": 29260 }, { "epoch": 3.4698209415391914, "grad_norm": 0.6315782985750064, "learning_rate": 2.2683645657308267e-06, "loss": 0.0321, "step": 29261 }, { "epoch": 3.469939523301316, "grad_norm": 0.42753246248342197, "learning_rate": 2.267365599949489e-06, "loss": 0.0141, "step": 29262 }, { "epoch": 3.4700581050634414, "grad_norm": 0.47867536845770137, "learning_rate": 2.2663668437351625e-06, "loss": 0.0232, "step": 29263 }, { "epoch": 3.470176686825566, "grad_norm": 0.5341752148269964, "learning_rate": 2.2653682970970535e-06, "loss": 0.0189, "step": 29264 }, { "epoch": 3.4702952685876913, "grad_norm": 0.7014484795579908, "learning_rate": 2.264369960044374e-06, "loss": 0.0466, "step": 29265 }, { "epoch": 3.470413850349816, "grad_norm": 0.47148391756452984, "learning_rate": 2.263371832586325e-06, "loss": 0.018, "step": 29266 }, { "epoch": 3.4705324321119413, "grad_norm": 0.6170328148881957, "learning_rate": 2.2623739147320967e-06, "loss": 0.0245, "step": 29267 }, { "epoch": 3.470651013874066, "grad_norm": 0.5502432708161583, "learning_rate": 2.261376206490909e-06, "loss": 0.0251, "step": 29268 }, { "epoch": 3.4707695956361913, "grad_norm": 0.6851771844123379, "learning_rate": 2.2603787078719437e-06, "loss": 0.0329, "step": 29269 }, { "epoch": 3.470888177398316, "grad_norm": 0.6088632999321347, "learning_rate": 2.259381418884404e-06, "loss": 0.0273, "step": 29270 }, { "epoch": 3.471006759160441, "grad_norm": 0.5242722267236496, "learning_rate": 2.2583843395374753e-06, "loss": 0.0341, "step": 29271 }, { "epoch": 3.471125340922566, "grad_norm": 0.6595012240541882, "learning_rate": 2.2573874698403634e-06, "loss": 0.034, "step": 29272 }, { "epoch": 3.471243922684691, "grad_norm": 0.7472497732631438, "learning_rate": 2.2563908098022445e-06, "loss": 0.034, "step": 29273 }, { "epoch": 3.471362504446816, "grad_norm": 0.5263060717119447, "learning_rate": 2.255394359432314e-06, "loss": 0.0214, "step": 29274 }, { "epoch": 3.471481086208941, "grad_norm": 0.6765304455154629, "learning_rate": 2.2543981187397534e-06, "loss": 0.0285, "step": 29275 }, { "epoch": 3.471599667971066, "grad_norm": 0.4443430979735869, "learning_rate": 2.2534020877337524e-06, "loss": 0.018, "step": 29276 }, { "epoch": 3.471718249733191, "grad_norm": 0.7081427017708232, "learning_rate": 2.252406266423493e-06, "loss": 0.0308, "step": 29277 }, { "epoch": 3.471836831495316, "grad_norm": 0.3477525673613209, "learning_rate": 2.2514106548181454e-06, "loss": 0.0174, "step": 29278 }, { "epoch": 3.471955413257441, "grad_norm": 0.318594687569461, "learning_rate": 2.2504152529269047e-06, "loss": 0.0139, "step": 29279 }, { "epoch": 3.472073995019566, "grad_norm": 0.9101779629918835, "learning_rate": 2.2494200607589334e-06, "loss": 0.0356, "step": 29280 }, { "epoch": 3.472192576781691, "grad_norm": 0.500116108698615, "learning_rate": 2.248425078323413e-06, "loss": 0.0238, "step": 29281 }, { "epoch": 3.472311158543816, "grad_norm": 0.42246203136601496, "learning_rate": 2.2474303056295056e-06, "loss": 0.0177, "step": 29282 }, { "epoch": 3.472429740305941, "grad_norm": 0.43451935592753393, "learning_rate": 2.246435742686401e-06, "loss": 0.0268, "step": 29283 }, { "epoch": 3.4725483220680657, "grad_norm": 0.7409369785823833, "learning_rate": 2.24544138950325e-06, "loss": 0.0447, "step": 29284 }, { "epoch": 3.472666903830191, "grad_norm": 0.5498597544107012, "learning_rate": 2.244447246089226e-06, "loss": 0.0226, "step": 29285 }, { "epoch": 3.4727854855923157, "grad_norm": 0.4850332120498831, "learning_rate": 2.2434533124534945e-06, "loss": 0.0195, "step": 29286 }, { "epoch": 3.472904067354441, "grad_norm": 0.5777104109762493, "learning_rate": 2.242459588605214e-06, "loss": 0.0313, "step": 29287 }, { "epoch": 3.473022649116566, "grad_norm": 0.8531998864192926, "learning_rate": 2.241466074553558e-06, "loss": 0.0375, "step": 29288 }, { "epoch": 3.473141230878691, "grad_norm": 0.8808510730651472, "learning_rate": 2.2404727703076666e-06, "loss": 0.061, "step": 29289 }, { "epoch": 3.4732598126408156, "grad_norm": 0.3629974071762716, "learning_rate": 2.239479675876713e-06, "loss": 0.0142, "step": 29290 }, { "epoch": 3.473378394402941, "grad_norm": 0.90598838058496, "learning_rate": 2.2384867912698447e-06, "loss": 0.0461, "step": 29291 }, { "epoch": 3.473496976165066, "grad_norm": 0.4284036238984433, "learning_rate": 2.2374941164962194e-06, "loss": 0.0193, "step": 29292 }, { "epoch": 3.473615557927191, "grad_norm": 0.7239327351875692, "learning_rate": 2.2365016515649763e-06, "loss": 0.0364, "step": 29293 }, { "epoch": 3.4737341396893155, "grad_norm": 0.8821446501222017, "learning_rate": 2.2355093964852802e-06, "loss": 0.035, "step": 29294 }, { "epoch": 3.4738527214514408, "grad_norm": 1.303705268701076, "learning_rate": 2.2345173512662685e-06, "loss": 0.0223, "step": 29295 }, { "epoch": 3.473971303213566, "grad_norm": 0.740830195590785, "learning_rate": 2.2335255159170925e-06, "loss": 0.0458, "step": 29296 }, { "epoch": 3.4740898849756907, "grad_norm": 0.5318721779287762, "learning_rate": 2.2325338904468917e-06, "loss": 0.0255, "step": 29297 }, { "epoch": 3.474208466737816, "grad_norm": 0.5567133470567315, "learning_rate": 2.231542474864809e-06, "loss": 0.0251, "step": 29298 }, { "epoch": 3.4743270484999407, "grad_norm": 0.7224846561077777, "learning_rate": 2.230551269179987e-06, "loss": 0.0378, "step": 29299 }, { "epoch": 3.474445630262066, "grad_norm": 0.43786612182919876, "learning_rate": 2.229560273401554e-06, "loss": 0.0202, "step": 29300 }, { "epoch": 3.4745642120241906, "grad_norm": 0.4855989336001142, "learning_rate": 2.2285694875386593e-06, "loss": 0.0265, "step": 29301 }, { "epoch": 3.474682793786316, "grad_norm": 0.7723315909555917, "learning_rate": 2.227578911600428e-06, "loss": 0.0445, "step": 29302 }, { "epoch": 3.4748013755484406, "grad_norm": 0.9579218388019975, "learning_rate": 2.226588545595995e-06, "loss": 0.0492, "step": 29303 }, { "epoch": 3.474919957310566, "grad_norm": 0.6058188706639753, "learning_rate": 2.2255983895344836e-06, "loss": 0.0307, "step": 29304 }, { "epoch": 3.4750385390726906, "grad_norm": 0.3739418652275196, "learning_rate": 2.224608443425033e-06, "loss": 0.0194, "step": 29305 }, { "epoch": 3.4751571208348158, "grad_norm": 0.3088536485077807, "learning_rate": 2.223618707276759e-06, "loss": 0.0126, "step": 29306 }, { "epoch": 3.4752757025969405, "grad_norm": 0.5323389155948776, "learning_rate": 2.2226291810987924e-06, "loss": 0.0228, "step": 29307 }, { "epoch": 3.4753942843590657, "grad_norm": 0.5161717899718726, "learning_rate": 2.221639864900252e-06, "loss": 0.0257, "step": 29308 }, { "epoch": 3.4755128661211905, "grad_norm": 0.5874382754576339, "learning_rate": 2.22065075869026e-06, "loss": 0.0251, "step": 29309 }, { "epoch": 3.4756314478833157, "grad_norm": 0.6195073131961786, "learning_rate": 2.2196618624779397e-06, "loss": 0.0309, "step": 29310 }, { "epoch": 3.4757500296454404, "grad_norm": 0.45311891609728844, "learning_rate": 2.2186731762723958e-06, "loss": 0.0156, "step": 29311 }, { "epoch": 3.4758686114075656, "grad_norm": 0.9052438917065136, "learning_rate": 2.217684700082748e-06, "loss": 0.0373, "step": 29312 }, { "epoch": 3.4759871931696904, "grad_norm": 0.528925437288683, "learning_rate": 2.2166964339181113e-06, "loss": 0.0334, "step": 29313 }, { "epoch": 3.4761057749318156, "grad_norm": 0.35370057350201084, "learning_rate": 2.2157083777875954e-06, "loss": 0.0219, "step": 29314 }, { "epoch": 3.4762243566939404, "grad_norm": 0.4138473033560201, "learning_rate": 2.2147205317003067e-06, "loss": 0.0221, "step": 29315 }, { "epoch": 3.4763429384560656, "grad_norm": 0.6401150730010214, "learning_rate": 2.2137328956653546e-06, "loss": 0.0312, "step": 29316 }, { "epoch": 3.4764615202181903, "grad_norm": 0.6707898494459144, "learning_rate": 2.212745469691846e-06, "loss": 0.0315, "step": 29317 }, { "epoch": 3.4765801019803155, "grad_norm": 0.40358682743428187, "learning_rate": 2.211758253788876e-06, "loss": 0.0202, "step": 29318 }, { "epoch": 3.4766986837424403, "grad_norm": 0.5326616715658983, "learning_rate": 2.210771247965551e-06, "loss": 0.022, "step": 29319 }, { "epoch": 3.4768172655045655, "grad_norm": 0.7829377123292888, "learning_rate": 2.20978445223097e-06, "loss": 0.0379, "step": 29320 }, { "epoch": 3.4769358472666902, "grad_norm": 0.7053117780983884, "learning_rate": 2.208797866594234e-06, "loss": 0.0328, "step": 29321 }, { "epoch": 3.4770544290288155, "grad_norm": 0.5754881475084993, "learning_rate": 2.2078114910644267e-06, "loss": 0.0313, "step": 29322 }, { "epoch": 3.47717301079094, "grad_norm": 0.479199447300817, "learning_rate": 2.2068253256506493e-06, "loss": 0.0194, "step": 29323 }, { "epoch": 3.4772915925530654, "grad_norm": 0.6377698461028196, "learning_rate": 2.2058393703619897e-06, "loss": 0.034, "step": 29324 }, { "epoch": 3.47741017431519, "grad_norm": 0.44227365875209984, "learning_rate": 2.2048536252075397e-06, "loss": 0.0207, "step": 29325 }, { "epoch": 3.4775287560773154, "grad_norm": 0.9156117619760162, "learning_rate": 2.2038680901963925e-06, "loss": 0.0477, "step": 29326 }, { "epoch": 3.47764733783944, "grad_norm": 0.7707995508117805, "learning_rate": 2.202882765337616e-06, "loss": 0.0336, "step": 29327 }, { "epoch": 3.4777659196015653, "grad_norm": 0.44029075020120123, "learning_rate": 2.201897650640314e-06, "loss": 0.0226, "step": 29328 }, { "epoch": 3.47788450136369, "grad_norm": 0.45122823482367913, "learning_rate": 2.2009127461135565e-06, "loss": 0.0172, "step": 29329 }, { "epoch": 3.4780030831258153, "grad_norm": 0.5891445525146076, "learning_rate": 2.1999280517664226e-06, "loss": 0.0263, "step": 29330 }, { "epoch": 3.47812166488794, "grad_norm": 0.3462110599317727, "learning_rate": 2.1989435676079943e-06, "loss": 0.0139, "step": 29331 }, { "epoch": 3.4782402466500653, "grad_norm": 0.5959919057688172, "learning_rate": 2.1979592936473504e-06, "loss": 0.0304, "step": 29332 }, { "epoch": 3.47835882841219, "grad_norm": 0.4198642945130328, "learning_rate": 2.1969752298935526e-06, "loss": 0.0237, "step": 29333 }, { "epoch": 3.478477410174315, "grad_norm": 0.8768855868878417, "learning_rate": 2.1959913763556826e-06, "loss": 0.0456, "step": 29334 }, { "epoch": 3.47859599193644, "grad_norm": 0.6418834501250485, "learning_rate": 2.1950077330428088e-06, "loss": 0.0322, "step": 29335 }, { "epoch": 3.478714573698565, "grad_norm": 0.9323994816249697, "learning_rate": 2.1940242999639983e-06, "loss": 0.0451, "step": 29336 }, { "epoch": 3.4788331554606904, "grad_norm": 0.4736428340892236, "learning_rate": 2.1930410771283215e-06, "loss": 0.0211, "step": 29337 }, { "epoch": 3.478951737222815, "grad_norm": 0.3806524484121805, "learning_rate": 2.1920580645448273e-06, "loss": 0.0173, "step": 29338 }, { "epoch": 3.47907031898494, "grad_norm": 0.39271459213148735, "learning_rate": 2.1910752622225998e-06, "loss": 0.0195, "step": 29339 }, { "epoch": 3.479188900747065, "grad_norm": 0.7652184910521687, "learning_rate": 2.190092670170682e-06, "loss": 0.0401, "step": 29340 }, { "epoch": 3.4793074825091903, "grad_norm": 0.6550023011773486, "learning_rate": 2.189110288398144e-06, "loss": 0.0325, "step": 29341 }, { "epoch": 3.479426064271315, "grad_norm": 0.556079665405781, "learning_rate": 2.188128116914029e-06, "loss": 0.0244, "step": 29342 }, { "epoch": 3.47954464603344, "grad_norm": 0.5804483168101726, "learning_rate": 2.187146155727407e-06, "loss": 0.0333, "step": 29343 }, { "epoch": 3.479663227795565, "grad_norm": 0.4207348553413509, "learning_rate": 2.186164404847316e-06, "loss": 0.024, "step": 29344 }, { "epoch": 3.4797818095576902, "grad_norm": 0.37604461781482884, "learning_rate": 2.185182864282814e-06, "loss": 0.0184, "step": 29345 }, { "epoch": 3.479900391319815, "grad_norm": 0.40143818922139574, "learning_rate": 2.184201534042951e-06, "loss": 0.0115, "step": 29346 }, { "epoch": 3.4800189730819397, "grad_norm": 0.36458376725432545, "learning_rate": 2.183220414136769e-06, "loss": 0.0204, "step": 29347 }, { "epoch": 3.480137554844065, "grad_norm": 0.6322863943474227, "learning_rate": 2.1822395045733216e-06, "loss": 0.0275, "step": 29348 }, { "epoch": 3.48025613660619, "grad_norm": 0.37379970915503685, "learning_rate": 2.181258805361633e-06, "loss": 0.0173, "step": 29349 }, { "epoch": 3.480374718368315, "grad_norm": 0.7049291399989598, "learning_rate": 2.180278316510767e-06, "loss": 0.029, "step": 29350 }, { "epoch": 3.48049330013044, "grad_norm": 0.3141729574854941, "learning_rate": 2.179298038029745e-06, "loss": 0.0138, "step": 29351 }, { "epoch": 3.480611881892565, "grad_norm": 0.769604685147985, "learning_rate": 2.1783179699276178e-06, "loss": 0.0462, "step": 29352 }, { "epoch": 3.48073046365469, "grad_norm": 0.5229118164403525, "learning_rate": 2.1773381122134032e-06, "loss": 0.0206, "step": 29353 }, { "epoch": 3.480849045416815, "grad_norm": 0.39675361815150295, "learning_rate": 2.1763584648961525e-06, "loss": 0.015, "step": 29354 }, { "epoch": 3.48096762717894, "grad_norm": 0.5901665434375012, "learning_rate": 2.1753790279848835e-06, "loss": 0.0291, "step": 29355 }, { "epoch": 3.481086208941065, "grad_norm": 0.7726218988545044, "learning_rate": 2.174399801488633e-06, "loss": 0.0389, "step": 29356 }, { "epoch": 3.48120479070319, "grad_norm": 0.5212799956283036, "learning_rate": 2.173420785416422e-06, "loss": 0.0309, "step": 29357 }, { "epoch": 3.4813233724653148, "grad_norm": 0.49352449095883394, "learning_rate": 2.1724419797772816e-06, "loss": 0.0243, "step": 29358 }, { "epoch": 3.48144195422744, "grad_norm": 0.3115012475697842, "learning_rate": 2.1714633845802384e-06, "loss": 0.0132, "step": 29359 }, { "epoch": 3.4815605359895647, "grad_norm": 0.4576440760352765, "learning_rate": 2.170484999834299e-06, "loss": 0.0167, "step": 29360 }, { "epoch": 3.48167911775169, "grad_norm": 0.42054799161064255, "learning_rate": 2.1695068255485007e-06, "loss": 0.0195, "step": 29361 }, { "epoch": 3.4817976995138147, "grad_norm": 0.9073489335772802, "learning_rate": 2.16852886173185e-06, "loss": 0.0456, "step": 29362 }, { "epoch": 3.48191628127594, "grad_norm": 0.4137503570205825, "learning_rate": 2.1675511083933645e-06, "loss": 0.0223, "step": 29363 }, { "epoch": 3.4820348630380646, "grad_norm": 0.3712116115315126, "learning_rate": 2.1665735655420573e-06, "loss": 0.0139, "step": 29364 }, { "epoch": 3.48215344480019, "grad_norm": 0.2998202805668738, "learning_rate": 2.165596233186945e-06, "loss": 0.0131, "step": 29365 }, { "epoch": 3.4822720265623146, "grad_norm": 0.23754619209433964, "learning_rate": 2.1646191113370352e-06, "loss": 0.008, "step": 29366 }, { "epoch": 3.48239060832444, "grad_norm": 0.6090247735199279, "learning_rate": 2.1636422000013312e-06, "loss": 0.0283, "step": 29367 }, { "epoch": 3.4825091900865646, "grad_norm": 0.8229742648228398, "learning_rate": 2.162665499188843e-06, "loss": 0.0262, "step": 29368 }, { "epoch": 3.4826277718486898, "grad_norm": 0.420605370464569, "learning_rate": 2.161689008908574e-06, "loss": 0.0169, "step": 29369 }, { "epoch": 3.4827463536108145, "grad_norm": 0.8027462768663394, "learning_rate": 2.1607127291695284e-06, "loss": 0.0427, "step": 29370 }, { "epoch": 3.4828649353729397, "grad_norm": 0.7813221749347392, "learning_rate": 2.159736659980699e-06, "loss": 0.0374, "step": 29371 }, { "epoch": 3.4829835171350645, "grad_norm": 0.5417231535248139, "learning_rate": 2.158760801351095e-06, "loss": 0.0227, "step": 29372 }, { "epoch": 3.4831020988971897, "grad_norm": 0.42590830354843245, "learning_rate": 2.157785153289704e-06, "loss": 0.0219, "step": 29373 }, { "epoch": 3.4832206806593144, "grad_norm": 0.5690959167975002, "learning_rate": 2.1568097158055216e-06, "loss": 0.0298, "step": 29374 }, { "epoch": 3.4833392624214397, "grad_norm": 0.45855264073905355, "learning_rate": 2.1558344889075407e-06, "loss": 0.0222, "step": 29375 }, { "epoch": 3.4834578441835644, "grad_norm": 0.5412947137452215, "learning_rate": 2.1548594726047534e-06, "loss": 0.0204, "step": 29376 }, { "epoch": 3.4835764259456896, "grad_norm": 0.9244819516968619, "learning_rate": 2.1538846669061528e-06, "loss": 0.0474, "step": 29377 }, { "epoch": 3.4836950077078144, "grad_norm": 0.7712668451395548, "learning_rate": 2.152910071820713e-06, "loss": 0.0406, "step": 29378 }, { "epoch": 3.4838135894699396, "grad_norm": 0.3715724229858837, "learning_rate": 2.151935687357426e-06, "loss": 0.0198, "step": 29379 }, { "epoch": 3.4839321712320643, "grad_norm": 0.4190971512742161, "learning_rate": 2.1509615135252763e-06, "loss": 0.0189, "step": 29380 }, { "epoch": 3.4840507529941895, "grad_norm": 0.5583625104877675, "learning_rate": 2.149987550333243e-06, "loss": 0.0297, "step": 29381 }, { "epoch": 3.4841693347563143, "grad_norm": 0.5107668040664239, "learning_rate": 2.149013797790303e-06, "loss": 0.0163, "step": 29382 }, { "epoch": 3.4842879165184395, "grad_norm": 0.4987948268058593, "learning_rate": 2.1480402559054312e-06, "loss": 0.0235, "step": 29383 }, { "epoch": 3.4844064982805643, "grad_norm": 0.5468848392140822, "learning_rate": 2.1470669246876074e-06, "loss": 0.0243, "step": 29384 }, { "epoch": 3.4845250800426895, "grad_norm": 0.3070318112993743, "learning_rate": 2.146093804145802e-06, "loss": 0.0136, "step": 29385 }, { "epoch": 3.4846436618048147, "grad_norm": 0.4242458097504232, "learning_rate": 2.145120894288985e-06, "loss": 0.023, "step": 29386 }, { "epoch": 3.4847622435669394, "grad_norm": 0.5342233751205393, "learning_rate": 2.144148195126128e-06, "loss": 0.0236, "step": 29387 }, { "epoch": 3.484880825329064, "grad_norm": 0.7280900267584821, "learning_rate": 2.1431757066662006e-06, "loss": 0.0421, "step": 29388 }, { "epoch": 3.4849994070911894, "grad_norm": 0.4620429294018603, "learning_rate": 2.14220342891816e-06, "loss": 0.0213, "step": 29389 }, { "epoch": 3.4851179888533146, "grad_norm": 0.4123952486557153, "learning_rate": 2.1412313618909746e-06, "loss": 0.0187, "step": 29390 }, { "epoch": 3.4852365706154393, "grad_norm": 0.48836975103166097, "learning_rate": 2.1402595055936033e-06, "loss": 0.0371, "step": 29391 }, { "epoch": 3.485355152377564, "grad_norm": 0.6443357435648437, "learning_rate": 2.1392878600350134e-06, "loss": 0.0332, "step": 29392 }, { "epoch": 3.4854737341396893, "grad_norm": 0.29849359634335715, "learning_rate": 2.1383164252241487e-06, "loss": 0.0139, "step": 29393 }, { "epoch": 3.4855923159018145, "grad_norm": 0.7039033963168636, "learning_rate": 2.137345201169974e-06, "loss": 0.0341, "step": 29394 }, { "epoch": 3.4857108976639393, "grad_norm": 0.6359529403845702, "learning_rate": 2.1363741878814403e-06, "loss": 0.0352, "step": 29395 }, { "epoch": 3.485829479426064, "grad_norm": 0.5386421084082034, "learning_rate": 2.1354033853674986e-06, "loss": 0.0244, "step": 29396 }, { "epoch": 3.4859480611881892, "grad_norm": 0.44246996754913304, "learning_rate": 2.1344327936371036e-06, "loss": 0.0194, "step": 29397 }, { "epoch": 3.4860666429503144, "grad_norm": 0.7378677484345851, "learning_rate": 2.133462412699189e-06, "loss": 0.0425, "step": 29398 }, { "epoch": 3.486185224712439, "grad_norm": 0.45655437152017103, "learning_rate": 2.13249224256272e-06, "loss": 0.019, "step": 29399 }, { "epoch": 3.4863038064745644, "grad_norm": 0.37666063736740313, "learning_rate": 2.1315222832366253e-06, "loss": 0.0196, "step": 29400 }, { "epoch": 3.486422388236689, "grad_norm": 0.4224789087048029, "learning_rate": 2.1305525347298544e-06, "loss": 0.0218, "step": 29401 }, { "epoch": 3.4865409699988144, "grad_norm": 0.3401520767526731, "learning_rate": 2.1295829970513437e-06, "loss": 0.0128, "step": 29402 }, { "epoch": 3.486659551760939, "grad_norm": 0.5337249347891128, "learning_rate": 2.1286136702100363e-06, "loss": 0.0227, "step": 29403 }, { "epoch": 3.4867781335230643, "grad_norm": 0.30963777003327553, "learning_rate": 2.127644554214858e-06, "loss": 0.0199, "step": 29404 }, { "epoch": 3.486896715285189, "grad_norm": 0.8811327835155651, "learning_rate": 2.1266756490747498e-06, "loss": 0.0428, "step": 29405 }, { "epoch": 3.4870152970473143, "grad_norm": 0.5199223003569657, "learning_rate": 2.125706954798645e-06, "loss": 0.0202, "step": 29406 }, { "epoch": 3.487133878809439, "grad_norm": 0.7010271495556544, "learning_rate": 2.124738471395471e-06, "loss": 0.023, "step": 29407 }, { "epoch": 3.4872524605715642, "grad_norm": 0.7038915040488598, "learning_rate": 2.1237701988741587e-06, "loss": 0.0459, "step": 29408 }, { "epoch": 3.487371042333689, "grad_norm": 0.551770917002288, "learning_rate": 2.1228021372436264e-06, "loss": 0.0273, "step": 29409 }, { "epoch": 3.487489624095814, "grad_norm": 0.6711967133211967, "learning_rate": 2.121834286512814e-06, "loss": 0.026, "step": 29410 }, { "epoch": 3.487608205857939, "grad_norm": 0.5162639335517566, "learning_rate": 2.120866646690628e-06, "loss": 0.022, "step": 29411 }, { "epoch": 3.487726787620064, "grad_norm": 0.5992216275926507, "learning_rate": 2.119899217785995e-06, "loss": 0.0245, "step": 29412 }, { "epoch": 3.487845369382189, "grad_norm": 0.5398326060707596, "learning_rate": 2.1189319998078355e-06, "loss": 0.0366, "step": 29413 }, { "epoch": 3.487963951144314, "grad_norm": 0.3769189267695202, "learning_rate": 2.1179649927650617e-06, "loss": 0.0166, "step": 29414 }, { "epoch": 3.488082532906439, "grad_norm": 0.602338151840381, "learning_rate": 2.1169981966665976e-06, "loss": 0.0236, "step": 29415 }, { "epoch": 3.488201114668564, "grad_norm": 0.48540939936694744, "learning_rate": 2.116031611521341e-06, "loss": 0.0206, "step": 29416 }, { "epoch": 3.488319696430689, "grad_norm": 0.4509895890743566, "learning_rate": 2.115065237338218e-06, "loss": 0.0203, "step": 29417 }, { "epoch": 3.488438278192814, "grad_norm": 0.4826364100839541, "learning_rate": 2.1140990741261245e-06, "loss": 0.0252, "step": 29418 }, { "epoch": 3.488556859954939, "grad_norm": 0.3834478205525187, "learning_rate": 2.113133121893979e-06, "loss": 0.0204, "step": 29419 }, { "epoch": 3.488675441717064, "grad_norm": 0.7331115487735184, "learning_rate": 2.112167380650673e-06, "loss": 0.0298, "step": 29420 }, { "epoch": 3.4887940234791888, "grad_norm": 0.5455376413379442, "learning_rate": 2.1112018504051227e-06, "loss": 0.0261, "step": 29421 }, { "epoch": 3.488912605241314, "grad_norm": 0.46184470398611494, "learning_rate": 2.1102365311662207e-06, "loss": 0.0249, "step": 29422 }, { "epoch": 3.4890311870034387, "grad_norm": 0.693358570366718, "learning_rate": 2.1092714229428685e-06, "loss": 0.0311, "step": 29423 }, { "epoch": 3.489149768765564, "grad_norm": 0.3374396538198353, "learning_rate": 2.108306525743964e-06, "loss": 0.014, "step": 29424 }, { "epoch": 3.4892683505276887, "grad_norm": 0.5268943906258573, "learning_rate": 2.1073418395784002e-06, "loss": 0.0293, "step": 29425 }, { "epoch": 3.489386932289814, "grad_norm": 0.47938582135188496, "learning_rate": 2.1063773644550756e-06, "loss": 0.0207, "step": 29426 }, { "epoch": 3.4895055140519387, "grad_norm": 0.4103014705784838, "learning_rate": 2.105413100382875e-06, "loss": 0.0207, "step": 29427 }, { "epoch": 3.489624095814064, "grad_norm": 0.437666074045083, "learning_rate": 2.1044490473706907e-06, "loss": 0.0164, "step": 29428 }, { "epoch": 3.4897426775761886, "grad_norm": 0.47971578784816044, "learning_rate": 2.103485205427408e-06, "loss": 0.0143, "step": 29429 }, { "epoch": 3.489861259338314, "grad_norm": 0.5741578574749671, "learning_rate": 2.1025215745619194e-06, "loss": 0.0296, "step": 29430 }, { "epoch": 3.4899798411004386, "grad_norm": 0.392588591608724, "learning_rate": 2.1015581547830955e-06, "loss": 0.0142, "step": 29431 }, { "epoch": 3.4900984228625638, "grad_norm": 0.6110633421997878, "learning_rate": 2.100594946099832e-06, "loss": 0.0292, "step": 29432 }, { "epoch": 3.4902170046246885, "grad_norm": 0.3164716050968801, "learning_rate": 2.099631948521e-06, "loss": 0.0192, "step": 29433 }, { "epoch": 3.4903355863868137, "grad_norm": 0.6071533982890582, "learning_rate": 2.0986691620554778e-06, "loss": 0.0293, "step": 29434 }, { "epoch": 3.490454168148939, "grad_norm": 0.40696859177568284, "learning_rate": 2.097706586712142e-06, "loss": 0.0215, "step": 29435 }, { "epoch": 3.4905727499110637, "grad_norm": 0.5438633198727599, "learning_rate": 2.096744222499866e-06, "loss": 0.0197, "step": 29436 }, { "epoch": 3.4906913316731885, "grad_norm": 0.5414904931906812, "learning_rate": 2.0957820694275293e-06, "loss": 0.0292, "step": 29437 }, { "epoch": 3.4908099134353137, "grad_norm": 0.5336601853069579, "learning_rate": 2.094820127503988e-06, "loss": 0.0232, "step": 29438 }, { "epoch": 3.490928495197439, "grad_norm": 0.5669269086458006, "learning_rate": 2.0938583967381187e-06, "loss": 0.0385, "step": 29439 }, { "epoch": 3.4910470769595636, "grad_norm": 0.5598989618008455, "learning_rate": 2.0928968771387862e-06, "loss": 0.0237, "step": 29440 }, { "epoch": 3.4911656587216884, "grad_norm": 0.42248233711197236, "learning_rate": 2.091935568714856e-06, "loss": 0.027, "step": 29441 }, { "epoch": 3.4912842404838136, "grad_norm": 0.423111939223382, "learning_rate": 2.0909744714751823e-06, "loss": 0.0206, "step": 29442 }, { "epoch": 3.491402822245939, "grad_norm": 0.36964562740379975, "learning_rate": 2.090013585428638e-06, "loss": 0.0172, "step": 29443 }, { "epoch": 3.4915214040080635, "grad_norm": 0.43629783887313606, "learning_rate": 2.0890529105840717e-06, "loss": 0.0251, "step": 29444 }, { "epoch": 3.4916399857701883, "grad_norm": 0.5571094983362268, "learning_rate": 2.0880924469503404e-06, "loss": 0.0209, "step": 29445 }, { "epoch": 3.4917585675323135, "grad_norm": 0.6135165539668231, "learning_rate": 2.087132194536301e-06, "loss": 0.0315, "step": 29446 }, { "epoch": 3.4918771492944387, "grad_norm": 0.3426728604592107, "learning_rate": 2.0861721533508043e-06, "loss": 0.0142, "step": 29447 }, { "epoch": 3.4919957310565635, "grad_norm": 0.4749896754294779, "learning_rate": 2.0852123234027077e-06, "loss": 0.0226, "step": 29448 }, { "epoch": 3.4921143128186887, "grad_norm": 0.28633173042339977, "learning_rate": 2.0842527047008482e-06, "loss": 0.0113, "step": 29449 }, { "epoch": 3.4922328945808134, "grad_norm": 0.4113974975876739, "learning_rate": 2.083293297254077e-06, "loss": 0.0198, "step": 29450 }, { "epoch": 3.4923514763429386, "grad_norm": 0.4452204640179377, "learning_rate": 2.08233410107124e-06, "loss": 0.0156, "step": 29451 }, { "epoch": 3.4924700581050634, "grad_norm": 0.6686726501295411, "learning_rate": 2.0813751161611828e-06, "loss": 0.0337, "step": 29452 }, { "epoch": 3.4925886398671886, "grad_norm": 0.4534851446297752, "learning_rate": 2.0804163425327374e-06, "loss": 0.0212, "step": 29453 }, { "epoch": 3.4927072216293134, "grad_norm": 0.33045743022122853, "learning_rate": 2.079457780194749e-06, "loss": 0.0167, "step": 29454 }, { "epoch": 3.4928258033914386, "grad_norm": 0.3903844207319644, "learning_rate": 2.0784994291560527e-06, "loss": 0.0208, "step": 29455 }, { "epoch": 3.4929443851535633, "grad_norm": 0.5807085152143989, "learning_rate": 2.077541289425483e-06, "loss": 0.0211, "step": 29456 }, { "epoch": 3.4930629669156885, "grad_norm": 0.6691980434924414, "learning_rate": 2.076583361011872e-06, "loss": 0.0361, "step": 29457 }, { "epoch": 3.4931815486778133, "grad_norm": 0.6510748914266865, "learning_rate": 2.0756256439240533e-06, "loss": 0.0272, "step": 29458 }, { "epoch": 3.4933001304399385, "grad_norm": 0.7112301769684595, "learning_rate": 2.0746681381708597e-06, "loss": 0.028, "step": 29459 }, { "epoch": 3.4934187122020632, "grad_norm": 0.319235308813987, "learning_rate": 2.0737108437611063e-06, "loss": 0.0179, "step": 29460 }, { "epoch": 3.4935372939641884, "grad_norm": 0.8241387854296641, "learning_rate": 2.0727537607036276e-06, "loss": 0.0363, "step": 29461 }, { "epoch": 3.493655875726313, "grad_norm": 0.6320428515343267, "learning_rate": 2.0717968890072407e-06, "loss": 0.029, "step": 29462 }, { "epoch": 3.4937744574884384, "grad_norm": 1.1063651299355879, "learning_rate": 2.070840228680773e-06, "loss": 0.0539, "step": 29463 }, { "epoch": 3.493893039250563, "grad_norm": 0.5866326419535265, "learning_rate": 2.0698837797330422e-06, "loss": 0.0303, "step": 29464 }, { "epoch": 3.4940116210126884, "grad_norm": 0.8312682670730789, "learning_rate": 2.0689275421728578e-06, "loss": 0.0526, "step": 29465 }, { "epoch": 3.494130202774813, "grad_norm": 0.4566677319338217, "learning_rate": 2.0679715160090513e-06, "loss": 0.0224, "step": 29466 }, { "epoch": 3.4942487845369383, "grad_norm": 0.4417295397761817, "learning_rate": 2.067015701250419e-06, "loss": 0.0267, "step": 29467 }, { "epoch": 3.494367366299063, "grad_norm": 0.36565664900608946, "learning_rate": 2.0660600979057865e-06, "loss": 0.0188, "step": 29468 }, { "epoch": 3.4944859480611883, "grad_norm": 0.39428458353256207, "learning_rate": 2.0651047059839473e-06, "loss": 0.0201, "step": 29469 }, { "epoch": 3.494604529823313, "grad_norm": 0.5221801586768795, "learning_rate": 2.0641495254937247e-06, "loss": 0.0216, "step": 29470 }, { "epoch": 3.4947231115854382, "grad_norm": 0.5320315848797146, "learning_rate": 2.063194556443915e-06, "loss": 0.0315, "step": 29471 }, { "epoch": 3.494841693347563, "grad_norm": 0.4099389845805603, "learning_rate": 2.062239798843324e-06, "loss": 0.0195, "step": 29472 }, { "epoch": 3.494960275109688, "grad_norm": 0.3726984440852311, "learning_rate": 2.0612852527007537e-06, "loss": 0.0145, "step": 29473 }, { "epoch": 3.495078856871813, "grad_norm": 0.5149011735431152, "learning_rate": 2.0603309180250054e-06, "loss": 0.0219, "step": 29474 }, { "epoch": 3.495197438633938, "grad_norm": 0.46559487516153697, "learning_rate": 2.05937679482488e-06, "loss": 0.0177, "step": 29475 }, { "epoch": 3.495316020396063, "grad_norm": 0.5863530920399637, "learning_rate": 2.058422883109157e-06, "loss": 0.021, "step": 29476 }, { "epoch": 3.495434602158188, "grad_norm": 0.5913666323265289, "learning_rate": 2.0574691828866538e-06, "loss": 0.0272, "step": 29477 }, { "epoch": 3.495553183920313, "grad_norm": 0.3429728647376966, "learning_rate": 2.0565156941661443e-06, "loss": 0.0141, "step": 29478 }, { "epoch": 3.495671765682438, "grad_norm": 0.5756306772277019, "learning_rate": 2.055562416956433e-06, "loss": 0.0266, "step": 29479 }, { "epoch": 3.495790347444563, "grad_norm": 0.6189393203170792, "learning_rate": 2.054609351266287e-06, "loss": 0.0226, "step": 29480 }, { "epoch": 3.495908929206688, "grad_norm": 0.5850588495805122, "learning_rate": 2.053656497104517e-06, "loss": 0.0278, "step": 29481 }, { "epoch": 3.496027510968813, "grad_norm": 0.37492915065561, "learning_rate": 2.05270385447989e-06, "loss": 0.0174, "step": 29482 }, { "epoch": 3.496146092730938, "grad_norm": 0.3579702787009843, "learning_rate": 2.051751423401194e-06, "loss": 0.0154, "step": 29483 }, { "epoch": 3.4962646744930628, "grad_norm": 0.5093053404462596, "learning_rate": 2.0507992038772073e-06, "loss": 0.0265, "step": 29484 }, { "epoch": 3.496383256255188, "grad_norm": 0.7263665222168784, "learning_rate": 2.0498471959167125e-06, "loss": 0.0294, "step": 29485 }, { "epoch": 3.4965018380173127, "grad_norm": 0.38256318162623437, "learning_rate": 2.048895399528486e-06, "loss": 0.0131, "step": 29486 }, { "epoch": 3.496620419779438, "grad_norm": 0.4495346321662533, "learning_rate": 2.04794381472129e-06, "loss": 0.0278, "step": 29487 }, { "epoch": 3.496739001541563, "grad_norm": 0.637661883925862, "learning_rate": 2.0469924415039144e-06, "loss": 0.0212, "step": 29488 }, { "epoch": 3.496857583303688, "grad_norm": 0.48202136296255393, "learning_rate": 2.0460412798851193e-06, "loss": 0.0185, "step": 29489 }, { "epoch": 3.4969761650658127, "grad_norm": 0.45805472465549957, "learning_rate": 2.0450903298736807e-06, "loss": 0.0207, "step": 29490 }, { "epoch": 3.497094746827938, "grad_norm": 0.7849762786868582, "learning_rate": 2.04413959147835e-06, "loss": 0.0327, "step": 29491 }, { "epoch": 3.497213328590063, "grad_norm": 0.8457574087857751, "learning_rate": 2.0431890647079094e-06, "loss": 0.0419, "step": 29492 }, { "epoch": 3.497331910352188, "grad_norm": 0.7742914895374151, "learning_rate": 2.0422387495711128e-06, "loss": 0.0398, "step": 29493 }, { "epoch": 3.4974504921143126, "grad_norm": 0.32289992937838297, "learning_rate": 2.0412886460767195e-06, "loss": 0.0163, "step": 29494 }, { "epoch": 3.497569073876438, "grad_norm": 0.5231213072175789, "learning_rate": 2.040338754233492e-06, "loss": 0.0212, "step": 29495 }, { "epoch": 3.497687655638563, "grad_norm": 0.5535457446979084, "learning_rate": 2.0393890740501853e-06, "loss": 0.0203, "step": 29496 }, { "epoch": 3.4978062374006877, "grad_norm": 0.5076396174238198, "learning_rate": 2.038439605535561e-06, "loss": 0.027, "step": 29497 }, { "epoch": 3.497924819162813, "grad_norm": 0.6784852878684662, "learning_rate": 2.0374903486983623e-06, "loss": 0.0262, "step": 29498 }, { "epoch": 3.4980434009249377, "grad_norm": 0.40428458286992836, "learning_rate": 2.036541303547343e-06, "loss": 0.012, "step": 29499 }, { "epoch": 3.498161982687063, "grad_norm": 0.503255303388937, "learning_rate": 2.035592470091252e-06, "loss": 0.0202, "step": 29500 }, { "epoch": 3.4982805644491877, "grad_norm": 0.638353840772891, "learning_rate": 2.0346438483388435e-06, "loss": 0.0311, "step": 29501 }, { "epoch": 3.498399146211313, "grad_norm": 0.3203928027940021, "learning_rate": 2.0336954382988466e-06, "loss": 0.015, "step": 29502 }, { "epoch": 3.4985177279734376, "grad_norm": 0.602906241609347, "learning_rate": 2.0327472399800264e-06, "loss": 0.0182, "step": 29503 }, { "epoch": 3.498636309735563, "grad_norm": 0.5285257856548802, "learning_rate": 2.0317992533911064e-06, "loss": 0.0232, "step": 29504 }, { "epoch": 3.4987548914976876, "grad_norm": 0.7629904741081432, "learning_rate": 2.0308514785408295e-06, "loss": 0.0371, "step": 29505 }, { "epoch": 3.498873473259813, "grad_norm": 0.38464289273571456, "learning_rate": 2.029903915437939e-06, "loss": 0.0164, "step": 29506 }, { "epoch": 3.4989920550219376, "grad_norm": 0.7309055014785212, "learning_rate": 2.028956564091164e-06, "loss": 0.0388, "step": 29507 }, { "epoch": 3.4991106367840628, "grad_norm": 0.4824803273090274, "learning_rate": 2.0280094245092474e-06, "loss": 0.021, "step": 29508 }, { "epoch": 3.4992292185461875, "grad_norm": 0.34802137662040433, "learning_rate": 2.027062496700907e-06, "loss": 0.0179, "step": 29509 }, { "epoch": 3.4993478003083127, "grad_norm": 0.6157303150919883, "learning_rate": 2.026115780674881e-06, "loss": 0.0266, "step": 29510 }, { "epoch": 3.4994663820704375, "grad_norm": 0.3378976272550531, "learning_rate": 2.025169276439892e-06, "loss": 0.0136, "step": 29511 }, { "epoch": 3.4995849638325627, "grad_norm": 0.7507575892259514, "learning_rate": 2.0242229840046726e-06, "loss": 0.0318, "step": 29512 }, { "epoch": 3.4997035455946874, "grad_norm": 0.6100660555194072, "learning_rate": 2.0232769033779435e-06, "loss": 0.0327, "step": 29513 }, { "epoch": 3.4998221273568126, "grad_norm": 0.4226053264348614, "learning_rate": 2.02233103456842e-06, "loss": 0.0218, "step": 29514 }, { "epoch": 3.4999407091189374, "grad_norm": 0.6878214759450412, "learning_rate": 2.0213853775848335e-06, "loss": 0.0266, "step": 29515 }, { "epoch": 3.5000592908810626, "grad_norm": 0.6272253205852738, "learning_rate": 2.0204399324358914e-06, "loss": 0.0287, "step": 29516 }, { "epoch": 3.5001778726431874, "grad_norm": 0.4613333498792484, "learning_rate": 2.019494699130314e-06, "loss": 0.0127, "step": 29517 }, { "epoch": 3.5002964544053126, "grad_norm": 0.5874770327211822, "learning_rate": 2.018549677676815e-06, "loss": 0.0393, "step": 29518 }, { "epoch": 3.5004150361674373, "grad_norm": 0.6190801290983952, "learning_rate": 2.017604868084111e-06, "loss": 0.0298, "step": 29519 }, { "epoch": 3.5005336179295625, "grad_norm": 0.5576964103584814, "learning_rate": 2.016660270360904e-06, "loss": 0.0322, "step": 29520 }, { "epoch": 3.5006521996916873, "grad_norm": 0.5891120022865568, "learning_rate": 2.0157158845159037e-06, "loss": 0.0269, "step": 29521 }, { "epoch": 3.5007707814538125, "grad_norm": 0.7090971150692977, "learning_rate": 2.0147717105578168e-06, "loss": 0.0237, "step": 29522 }, { "epoch": 3.5008893632159372, "grad_norm": 0.3997031040332091, "learning_rate": 2.0138277484953503e-06, "loss": 0.0266, "step": 29523 }, { "epoch": 3.5010079449780624, "grad_norm": 0.42812672269971297, "learning_rate": 2.012883998337209e-06, "loss": 0.0191, "step": 29524 }, { "epoch": 3.501126526740187, "grad_norm": 0.7548634090375841, "learning_rate": 2.0119404600920767e-06, "loss": 0.032, "step": 29525 }, { "epoch": 3.5012451085023124, "grad_norm": 0.39529790276587495, "learning_rate": 2.0109971337686743e-06, "loss": 0.0171, "step": 29526 }, { "epoch": 3.501363690264437, "grad_norm": 0.7539029737828782, "learning_rate": 2.0100540193756843e-06, "loss": 0.0314, "step": 29527 }, { "epoch": 3.5014822720265624, "grad_norm": 1.2205161570483813, "learning_rate": 2.0091111169218023e-06, "loss": 0.0445, "step": 29528 }, { "epoch": 3.5016008537886876, "grad_norm": 0.47981721503067704, "learning_rate": 2.0081684264157243e-06, "loss": 0.0191, "step": 29529 }, { "epoch": 3.5017194355508123, "grad_norm": 0.4932517252431313, "learning_rate": 2.0072259478661427e-06, "loss": 0.0222, "step": 29530 }, { "epoch": 3.501838017312937, "grad_norm": 0.6733580733831385, "learning_rate": 2.006283681281737e-06, "loss": 0.0353, "step": 29531 }, { "epoch": 3.5019565990750623, "grad_norm": 0.4827700017522993, "learning_rate": 2.0053416266712005e-06, "loss": 0.0241, "step": 29532 }, { "epoch": 3.5020751808371875, "grad_norm": 0.720532477161533, "learning_rate": 2.004399784043215e-06, "loss": 0.0382, "step": 29533 }, { "epoch": 3.5021937625993123, "grad_norm": 0.5789961311279976, "learning_rate": 2.003458153406465e-06, "loss": 0.0301, "step": 29534 }, { "epoch": 3.502312344361437, "grad_norm": 0.5715596171521957, "learning_rate": 2.0025167347696326e-06, "loss": 0.0246, "step": 29535 }, { "epoch": 3.502430926123562, "grad_norm": 0.5954565362438518, "learning_rate": 2.001575528141389e-06, "loss": 0.028, "step": 29536 }, { "epoch": 3.5025495078856874, "grad_norm": 0.46326454588296745, "learning_rate": 2.0006345335304236e-06, "loss": 0.0245, "step": 29537 }, { "epoch": 3.502668089647812, "grad_norm": 0.5860698043915059, "learning_rate": 1.9996937509454e-06, "loss": 0.0217, "step": 29538 }, { "epoch": 3.502786671409937, "grad_norm": 0.715873157043195, "learning_rate": 1.9987531803949992e-06, "loss": 0.0373, "step": 29539 }, { "epoch": 3.502905253172062, "grad_norm": 0.5633231596290524, "learning_rate": 1.9978128218878815e-06, "loss": 0.0234, "step": 29540 }, { "epoch": 3.5030238349341873, "grad_norm": 0.3674469635955456, "learning_rate": 1.9968726754327288e-06, "loss": 0.0164, "step": 29541 }, { "epoch": 3.503142416696312, "grad_norm": 0.3646268393548582, "learning_rate": 1.9959327410381977e-06, "loss": 0.0149, "step": 29542 }, { "epoch": 3.503260998458437, "grad_norm": 0.503339453024869, "learning_rate": 1.9949930187129568e-06, "loss": 0.0222, "step": 29543 }, { "epoch": 3.503379580220562, "grad_norm": 0.4892730351912566, "learning_rate": 1.9940535084656708e-06, "loss": 0.0216, "step": 29544 }, { "epoch": 3.5034981619826873, "grad_norm": 0.4281489214260765, "learning_rate": 1.9931142103049997e-06, "loss": 0.0179, "step": 29545 }, { "epoch": 3.503616743744812, "grad_norm": 0.607353142751209, "learning_rate": 1.9921751242396063e-06, "loss": 0.0262, "step": 29546 }, { "epoch": 3.503735325506937, "grad_norm": 0.5866833244617999, "learning_rate": 1.991236250278136e-06, "loss": 0.04, "step": 29547 }, { "epoch": 3.503853907269062, "grad_norm": 0.7682837239099337, "learning_rate": 1.990297588429263e-06, "loss": 0.0386, "step": 29548 }, { "epoch": 3.503972489031187, "grad_norm": 0.7781459304149279, "learning_rate": 1.9893591387016213e-06, "loss": 0.0364, "step": 29549 }, { "epoch": 3.504091070793312, "grad_norm": 0.6226874703786452, "learning_rate": 1.9884209011038795e-06, "loss": 0.0261, "step": 29550 }, { "epoch": 3.5042096525554367, "grad_norm": 0.4867669916934617, "learning_rate": 1.987482875644667e-06, "loss": 0.0294, "step": 29551 }, { "epoch": 3.504328234317562, "grad_norm": 0.5464274201567452, "learning_rate": 1.9865450623326507e-06, "loss": 0.0293, "step": 29552 }, { "epoch": 3.504446816079687, "grad_norm": 0.49084904363394993, "learning_rate": 1.985607461176467e-06, "loss": 0.0212, "step": 29553 }, { "epoch": 3.504565397841812, "grad_norm": 0.7082671793139824, "learning_rate": 1.984670072184758e-06, "loss": 0.0369, "step": 29554 }, { "epoch": 3.504683979603937, "grad_norm": 0.6461377912177296, "learning_rate": 1.9837328953661666e-06, "loss": 0.0281, "step": 29555 }, { "epoch": 3.504802561366062, "grad_norm": 0.5399045413081784, "learning_rate": 1.9827959307293336e-06, "loss": 0.0241, "step": 29556 }, { "epoch": 3.504921143128187, "grad_norm": 0.8801373431753775, "learning_rate": 1.981859178282902e-06, "loss": 0.0383, "step": 29557 }, { "epoch": 3.505039724890312, "grad_norm": 0.46756890164341003, "learning_rate": 1.980922638035493e-06, "loss": 0.0286, "step": 29558 }, { "epoch": 3.505158306652437, "grad_norm": 0.5091476964623408, "learning_rate": 1.97998630999576e-06, "loss": 0.0229, "step": 29559 }, { "epoch": 3.5052768884145618, "grad_norm": 0.5609570215489693, "learning_rate": 1.979050194172319e-06, "loss": 0.0255, "step": 29560 }, { "epoch": 3.505395470176687, "grad_norm": 0.6318271181938978, "learning_rate": 1.978114290573807e-06, "loss": 0.0311, "step": 29561 }, { "epoch": 3.5055140519388117, "grad_norm": 0.34770459840088874, "learning_rate": 1.9771785992088426e-06, "loss": 0.0167, "step": 29562 }, { "epoch": 3.505632633700937, "grad_norm": 0.5357617660345239, "learning_rate": 1.9762431200860687e-06, "loss": 0.025, "step": 29563 }, { "epoch": 3.5057512154630617, "grad_norm": 0.5311184938876035, "learning_rate": 1.975307853214095e-06, "loss": 0.0275, "step": 29564 }, { "epoch": 3.505869797225187, "grad_norm": 0.5150242119693037, "learning_rate": 1.9743727986015483e-06, "loss": 0.0227, "step": 29565 }, { "epoch": 3.5059883789873116, "grad_norm": 0.4717521701665454, "learning_rate": 1.9734379562570487e-06, "loss": 0.0149, "step": 29566 }, { "epoch": 3.506106960749437, "grad_norm": 0.5927171395805009, "learning_rate": 1.9725033261892125e-06, "loss": 0.0285, "step": 29567 }, { "epoch": 3.5062255425115616, "grad_norm": 0.4382707715034032, "learning_rate": 1.9715689084066624e-06, "loss": 0.016, "step": 29568 }, { "epoch": 3.506344124273687, "grad_norm": 0.48182536842137463, "learning_rate": 1.9706347029180034e-06, "loss": 0.0161, "step": 29569 }, { "epoch": 3.5064627060358116, "grad_norm": 0.6055452522770357, "learning_rate": 1.9697007097318533e-06, "loss": 0.0346, "step": 29570 }, { "epoch": 3.5065812877979368, "grad_norm": 0.9652197753369854, "learning_rate": 1.9687669288568187e-06, "loss": 0.0376, "step": 29571 }, { "epoch": 3.5066998695600615, "grad_norm": 0.427891223809996, "learning_rate": 1.967833360301513e-06, "loss": 0.0202, "step": 29572 }, { "epoch": 3.5068184513221867, "grad_norm": 0.5053811700893635, "learning_rate": 1.9669000040745374e-06, "loss": 0.0237, "step": 29573 }, { "epoch": 3.5069370330843115, "grad_norm": 0.32842386821874886, "learning_rate": 1.965966860184498e-06, "loss": 0.0149, "step": 29574 }, { "epoch": 3.5070556148464367, "grad_norm": 0.4536251696529942, "learning_rate": 1.965033928640006e-06, "loss": 0.0162, "step": 29575 }, { "epoch": 3.5071741966085614, "grad_norm": 0.4667117660383834, "learning_rate": 1.964101209449648e-06, "loss": 0.0212, "step": 29576 }, { "epoch": 3.5072927783706866, "grad_norm": 0.6090268623289764, "learning_rate": 1.9631687026220257e-06, "loss": 0.0234, "step": 29577 }, { "epoch": 3.507411360132812, "grad_norm": 0.5199920335126424, "learning_rate": 1.9622364081657406e-06, "loss": 0.0194, "step": 29578 }, { "epoch": 3.5075299418949366, "grad_norm": 0.7373765348361074, "learning_rate": 1.9613043260893886e-06, "loss": 0.041, "step": 29579 }, { "epoch": 3.5076485236570614, "grad_norm": 0.5214237100576595, "learning_rate": 1.9603724564015575e-06, "loss": 0.0291, "step": 29580 }, { "epoch": 3.5077671054191866, "grad_norm": 0.4413548614710275, "learning_rate": 1.959440799110837e-06, "loss": 0.0259, "step": 29581 }, { "epoch": 3.5078856871813118, "grad_norm": 0.5335800402767409, "learning_rate": 1.9585093542258154e-06, "loss": 0.0208, "step": 29582 }, { "epoch": 3.5080042689434365, "grad_norm": 0.36509116342251013, "learning_rate": 1.9575781217550852e-06, "loss": 0.0139, "step": 29583 }, { "epoch": 3.5081228507055613, "grad_norm": 0.932930387388467, "learning_rate": 1.956647101707232e-06, "loss": 0.0458, "step": 29584 }, { "epoch": 3.5082414324676865, "grad_norm": 0.6739914321156154, "learning_rate": 1.9557162940908284e-06, "loss": 0.0311, "step": 29585 }, { "epoch": 3.5083600142298117, "grad_norm": 0.7134146963673423, "learning_rate": 1.9547856989144685e-06, "loss": 0.0396, "step": 29586 }, { "epoch": 3.5084785959919365, "grad_norm": 0.3704251757078723, "learning_rate": 1.95385531618672e-06, "loss": 0.0219, "step": 29587 }, { "epoch": 3.508597177754061, "grad_norm": 0.7519129722716738, "learning_rate": 1.9529251459161653e-06, "loss": 0.035, "step": 29588 }, { "epoch": 3.5087157595161864, "grad_norm": 0.5835186728282883, "learning_rate": 1.9519951881113772e-06, "loss": 0.0228, "step": 29589 }, { "epoch": 3.5088343412783116, "grad_norm": 0.7163742166854814, "learning_rate": 1.951065442780933e-06, "loss": 0.0276, "step": 29590 }, { "epoch": 3.5089529230404364, "grad_norm": 0.5672426472282756, "learning_rate": 1.950135909933401e-06, "loss": 0.0221, "step": 29591 }, { "epoch": 3.509071504802561, "grad_norm": 0.4806240372953579, "learning_rate": 1.949206589577346e-06, "loss": 0.0267, "step": 29592 }, { "epoch": 3.5091900865646863, "grad_norm": 0.3769800322390542, "learning_rate": 1.9482774817213417e-06, "loss": 0.0182, "step": 29593 }, { "epoch": 3.5093086683268115, "grad_norm": 0.38709736770845776, "learning_rate": 1.947348586373951e-06, "loss": 0.0204, "step": 29594 }, { "epoch": 3.5094272500889363, "grad_norm": 0.47183871844565234, "learning_rate": 1.946419903543742e-06, "loss": 0.0217, "step": 29595 }, { "epoch": 3.509545831851061, "grad_norm": 0.4097210270540447, "learning_rate": 1.945491433239263e-06, "loss": 0.0245, "step": 29596 }, { "epoch": 3.5096644136131863, "grad_norm": 0.510203773771718, "learning_rate": 1.9445631754690885e-06, "loss": 0.0185, "step": 29597 }, { "epoch": 3.5097829953753115, "grad_norm": 0.6822582453555958, "learning_rate": 1.9436351302417666e-06, "loss": 0.0255, "step": 29598 }, { "epoch": 3.5099015771374362, "grad_norm": 0.6615562811530399, "learning_rate": 1.94270729756586e-06, "loss": 0.0349, "step": 29599 }, { "epoch": 3.510020158899561, "grad_norm": 0.41395718114449204, "learning_rate": 1.9417796774499063e-06, "loss": 0.0196, "step": 29600 }, { "epoch": 3.510138740661686, "grad_norm": 0.7085347210407597, "learning_rate": 1.9408522699024796e-06, "loss": 0.0372, "step": 29601 }, { "epoch": 3.5102573224238114, "grad_norm": 0.6743141013078395, "learning_rate": 1.9399250749321114e-06, "loss": 0.0299, "step": 29602 }, { "epoch": 3.510375904185936, "grad_norm": 0.7320010288126069, "learning_rate": 1.938998092547356e-06, "loss": 0.0301, "step": 29603 }, { "epoch": 3.5104944859480613, "grad_norm": 0.431532223406101, "learning_rate": 1.9380713227567596e-06, "loss": 0.0182, "step": 29604 }, { "epoch": 3.510613067710186, "grad_norm": 0.6599875789207439, "learning_rate": 1.9371447655688658e-06, "loss": 0.0378, "step": 29605 }, { "epoch": 3.5107316494723113, "grad_norm": 0.4092663005973759, "learning_rate": 1.936218420992217e-06, "loss": 0.0199, "step": 29606 }, { "epoch": 3.510850231234436, "grad_norm": 0.48358169861865946, "learning_rate": 1.935292289035345e-06, "loss": 0.0226, "step": 29607 }, { "epoch": 3.5109688129965613, "grad_norm": 0.5220249872718978, "learning_rate": 1.9343663697068053e-06, "loss": 0.032, "step": 29608 }, { "epoch": 3.511087394758686, "grad_norm": 0.45702503859636795, "learning_rate": 1.933440663015115e-06, "loss": 0.0197, "step": 29609 }, { "epoch": 3.5112059765208112, "grad_norm": 0.7306646169252282, "learning_rate": 1.9325151689688207e-06, "loss": 0.0383, "step": 29610 }, { "epoch": 3.511324558282936, "grad_norm": 0.7744797342008439, "learning_rate": 1.9315898875764433e-06, "loss": 0.028, "step": 29611 }, { "epoch": 3.511443140045061, "grad_norm": 0.30991037028910856, "learning_rate": 1.9306648188465252e-06, "loss": 0.0119, "step": 29612 }, { "epoch": 3.511561721807186, "grad_norm": 0.5934865067352479, "learning_rate": 1.929739962787583e-06, "loss": 0.0283, "step": 29613 }, { "epoch": 3.511680303569311, "grad_norm": 0.5957070153333013, "learning_rate": 1.9288153194081503e-06, "loss": 0.033, "step": 29614 }, { "epoch": 3.511798885331436, "grad_norm": 0.5821952271711831, "learning_rate": 1.9278908887167463e-06, "loss": 0.0332, "step": 29615 }, { "epoch": 3.511917467093561, "grad_norm": 0.5871446202841408, "learning_rate": 1.926966670721897e-06, "loss": 0.0248, "step": 29616 }, { "epoch": 3.512036048855686, "grad_norm": 0.46695794701868576, "learning_rate": 1.9260426654321235e-06, "loss": 0.0195, "step": 29617 }, { "epoch": 3.512154630617811, "grad_norm": 0.6863742796871579, "learning_rate": 1.9251188728559326e-06, "loss": 0.036, "step": 29618 }, { "epoch": 3.512273212379936, "grad_norm": 0.40636269765334687, "learning_rate": 1.9241952930018596e-06, "loss": 0.022, "step": 29619 }, { "epoch": 3.512391794142061, "grad_norm": 0.5423927164089518, "learning_rate": 1.9232719258784032e-06, "loss": 0.0288, "step": 29620 }, { "epoch": 3.512510375904186, "grad_norm": 0.781421555757999, "learning_rate": 1.9223487714940843e-06, "loss": 0.0385, "step": 29621 }, { "epoch": 3.512628957666311, "grad_norm": 0.8883713286810705, "learning_rate": 1.921425829857407e-06, "loss": 0.0516, "step": 29622 }, { "epoch": 3.5127475394284358, "grad_norm": 0.44150381555695517, "learning_rate": 1.920503100976881e-06, "loss": 0.0216, "step": 29623 }, { "epoch": 3.512866121190561, "grad_norm": 0.5427999058762435, "learning_rate": 1.9195805848610222e-06, "loss": 0.0257, "step": 29624 }, { "epoch": 3.5129847029526857, "grad_norm": 0.544656027787927, "learning_rate": 1.9186582815183208e-06, "loss": 0.0256, "step": 29625 }, { "epoch": 3.513103284714811, "grad_norm": 0.44278500863808384, "learning_rate": 1.9177361909572866e-06, "loss": 0.0245, "step": 29626 }, { "epoch": 3.5132218664769357, "grad_norm": 0.27737628181903967, "learning_rate": 1.9168143131864215e-06, "loss": 0.0153, "step": 29627 }, { "epoch": 3.513340448239061, "grad_norm": 0.6136770095971343, "learning_rate": 1.9158926482142237e-06, "loss": 0.0309, "step": 29628 }, { "epoch": 3.5134590300011856, "grad_norm": 0.3621820019466071, "learning_rate": 1.9149711960491813e-06, "loss": 0.0213, "step": 29629 }, { "epoch": 3.513577611763311, "grad_norm": 0.5967470897335201, "learning_rate": 1.914049956699801e-06, "loss": 0.028, "step": 29630 }, { "epoch": 3.513696193525436, "grad_norm": 0.507774968902957, "learning_rate": 1.913128930174568e-06, "loss": 0.0209, "step": 29631 }, { "epoch": 3.513814775287561, "grad_norm": 0.599974932094328, "learning_rate": 1.9122081164819756e-06, "loss": 0.0284, "step": 29632 }, { "epoch": 3.5139333570496856, "grad_norm": 0.5939036281796418, "learning_rate": 1.911287515630514e-06, "loss": 0.0249, "step": 29633 }, { "epoch": 3.5140519388118108, "grad_norm": 0.3481040792208887, "learning_rate": 1.910367127628665e-06, "loss": 0.0161, "step": 29634 }, { "epoch": 3.514170520573936, "grad_norm": 0.3774997390319046, "learning_rate": 1.9094469524849225e-06, "loss": 0.0211, "step": 29635 }, { "epoch": 3.5142891023360607, "grad_norm": 0.4000016677201124, "learning_rate": 1.9085269902077597e-06, "loss": 0.0161, "step": 29636 }, { "epoch": 3.5144076840981855, "grad_norm": 0.5054983592089364, "learning_rate": 1.907607240805659e-06, "loss": 0.0243, "step": 29637 }, { "epoch": 3.5145262658603107, "grad_norm": 0.4283824433350457, "learning_rate": 1.9066877042871052e-06, "loss": 0.0265, "step": 29638 }, { "epoch": 3.514644847622436, "grad_norm": 0.44753561069757386, "learning_rate": 1.9057683806605746e-06, "loss": 0.0175, "step": 29639 }, { "epoch": 3.5147634293845607, "grad_norm": 0.32287057446611794, "learning_rate": 1.9048492699345332e-06, "loss": 0.0152, "step": 29640 }, { "epoch": 3.5148820111466854, "grad_norm": 1.0516437305164756, "learning_rate": 1.9039303721174629e-06, "loss": 0.0438, "step": 29641 }, { "epoch": 3.5150005929088106, "grad_norm": 0.3504618090520581, "learning_rate": 1.9030116872178316e-06, "loss": 0.0125, "step": 29642 }, { "epoch": 3.515119174670936, "grad_norm": 0.3803829647724266, "learning_rate": 1.9020932152441107e-06, "loss": 0.0175, "step": 29643 }, { "epoch": 3.5152377564330606, "grad_norm": 0.49416538229672813, "learning_rate": 1.9011749562047627e-06, "loss": 0.0328, "step": 29644 }, { "epoch": 3.5153563381951853, "grad_norm": 0.44376221604637317, "learning_rate": 1.9002569101082584e-06, "loss": 0.0227, "step": 29645 }, { "epoch": 3.5154749199573105, "grad_norm": 0.7082606163596536, "learning_rate": 1.8993390769630636e-06, "loss": 0.0306, "step": 29646 }, { "epoch": 3.5155935017194357, "grad_norm": 0.8558929614903185, "learning_rate": 1.898421456777627e-06, "loss": 0.0298, "step": 29647 }, { "epoch": 3.5157120834815605, "grad_norm": 0.45894099323366055, "learning_rate": 1.897504049560417e-06, "loss": 0.0221, "step": 29648 }, { "epoch": 3.5158306652436853, "grad_norm": 0.43016147091776114, "learning_rate": 1.89658685531989e-06, "loss": 0.022, "step": 29649 }, { "epoch": 3.5159492470058105, "grad_norm": 0.3181326198213371, "learning_rate": 1.8956698740645067e-06, "loss": 0.0214, "step": 29650 }, { "epoch": 3.5160678287679357, "grad_norm": 0.6281937526608898, "learning_rate": 1.8947531058027074e-06, "loss": 0.0294, "step": 29651 }, { "epoch": 3.5161864105300604, "grad_norm": 0.498828565561426, "learning_rate": 1.8938365505429545e-06, "loss": 0.02, "step": 29652 }, { "epoch": 3.5163049922921856, "grad_norm": 0.46505471297673684, "learning_rate": 1.8929202082936915e-06, "loss": 0.0223, "step": 29653 }, { "epoch": 3.5164235740543104, "grad_norm": 0.3321871596213565, "learning_rate": 1.8920040790633669e-06, "loss": 0.0111, "step": 29654 }, { "epoch": 3.5165421558164356, "grad_norm": 0.555140228770797, "learning_rate": 1.8910881628604354e-06, "loss": 0.0264, "step": 29655 }, { "epoch": 3.5166607375785603, "grad_norm": 0.8613957217933224, "learning_rate": 1.8901724596933234e-06, "loss": 0.0372, "step": 29656 }, { "epoch": 3.5167793193406856, "grad_norm": 0.731112582975474, "learning_rate": 1.889256969570491e-06, "loss": 0.0347, "step": 29657 }, { "epoch": 3.5168979011028103, "grad_norm": 0.7422720290959284, "learning_rate": 1.8883416925003617e-06, "loss": 0.0428, "step": 29658 }, { "epoch": 3.5170164828649355, "grad_norm": 0.4303124218604529, "learning_rate": 1.8874266284913843e-06, "loss": 0.0208, "step": 29659 }, { "epoch": 3.5171350646270603, "grad_norm": 0.34520177925465506, "learning_rate": 1.8865117775519886e-06, "loss": 0.0149, "step": 29660 }, { "epoch": 3.5172536463891855, "grad_norm": 0.3747786656637705, "learning_rate": 1.8855971396906174e-06, "loss": 0.0193, "step": 29661 }, { "epoch": 3.5173722281513102, "grad_norm": 0.5074548829955556, "learning_rate": 1.8846827149156893e-06, "loss": 0.0203, "step": 29662 }, { "epoch": 3.5174908099134354, "grad_norm": 0.7400346534485179, "learning_rate": 1.8837685032356417e-06, "loss": 0.0451, "step": 29663 }, { "epoch": 3.51760939167556, "grad_norm": 0.8536580836533645, "learning_rate": 1.8828545046589014e-06, "loss": 0.0347, "step": 29664 }, { "epoch": 3.5177279734376854, "grad_norm": 0.9127146000370324, "learning_rate": 1.8819407191938949e-06, "loss": 0.0461, "step": 29665 }, { "epoch": 3.51784655519981, "grad_norm": 0.5412433263276498, "learning_rate": 1.8810271468490488e-06, "loss": 0.0259, "step": 29666 }, { "epoch": 3.5179651369619354, "grad_norm": 0.5767836569551241, "learning_rate": 1.8801137876327757e-06, "loss": 0.0243, "step": 29667 }, { "epoch": 3.51808371872406, "grad_norm": 0.8147106222775594, "learning_rate": 1.879200641553508e-06, "loss": 0.0398, "step": 29668 }, { "epoch": 3.5182023004861853, "grad_norm": 0.41246234206159027, "learning_rate": 1.8782877086196554e-06, "loss": 0.0189, "step": 29669 }, { "epoch": 3.51832088224831, "grad_norm": 0.41626186213020283, "learning_rate": 1.8773749888396336e-06, "loss": 0.0186, "step": 29670 }, { "epoch": 3.5184394640104353, "grad_norm": 0.6132295157455471, "learning_rate": 1.8764624822218636e-06, "loss": 0.0227, "step": 29671 }, { "epoch": 3.51855804577256, "grad_norm": 0.7154469873697779, "learning_rate": 1.8755501887747495e-06, "loss": 0.0284, "step": 29672 }, { "epoch": 3.5186766275346852, "grad_norm": 0.35340632141370865, "learning_rate": 1.8746381085067127e-06, "loss": 0.0156, "step": 29673 }, { "epoch": 3.51879520929681, "grad_norm": 0.44925773717861406, "learning_rate": 1.8737262414261465e-06, "loss": 0.0237, "step": 29674 }, { "epoch": 3.518913791058935, "grad_norm": 0.42047274746251406, "learning_rate": 1.8728145875414715e-06, "loss": 0.0172, "step": 29675 }, { "epoch": 3.51903237282106, "grad_norm": 0.594638172759034, "learning_rate": 1.8719031468610814e-06, "loss": 0.0243, "step": 29676 }, { "epoch": 3.519150954583185, "grad_norm": 0.6297717193241232, "learning_rate": 1.8709919193933862e-06, "loss": 0.0338, "step": 29677 }, { "epoch": 3.51926953634531, "grad_norm": 0.4717516310631284, "learning_rate": 1.870080905146776e-06, "loss": 0.0174, "step": 29678 }, { "epoch": 3.519388118107435, "grad_norm": 0.3432506974583966, "learning_rate": 1.8691701041296638e-06, "loss": 0.0111, "step": 29679 }, { "epoch": 3.5195066998695603, "grad_norm": 0.38875552222811577, "learning_rate": 1.8682595163504345e-06, "loss": 0.0218, "step": 29680 }, { "epoch": 3.519625281631685, "grad_norm": 0.5839137206955795, "learning_rate": 1.8673491418174871e-06, "loss": 0.0183, "step": 29681 }, { "epoch": 3.51974386339381, "grad_norm": 0.5529411482596726, "learning_rate": 1.8664389805392146e-06, "loss": 0.0269, "step": 29682 }, { "epoch": 3.519862445155935, "grad_norm": 0.9319428874280611, "learning_rate": 1.8655290325240021e-06, "loss": 0.0477, "step": 29683 }, { "epoch": 3.5199810269180603, "grad_norm": 0.38838764256661396, "learning_rate": 1.8646192977802513e-06, "loss": 0.0159, "step": 29684 }, { "epoch": 3.520099608680185, "grad_norm": 0.5818772020666683, "learning_rate": 1.8637097763163303e-06, "loss": 0.0315, "step": 29685 }, { "epoch": 3.5202181904423098, "grad_norm": 0.37042470689580087, "learning_rate": 1.8628004681406407e-06, "loss": 0.0141, "step": 29686 }, { "epoch": 3.520336772204435, "grad_norm": 0.897056078967392, "learning_rate": 1.8618913732615539e-06, "loss": 0.031, "step": 29687 }, { "epoch": 3.52045535396656, "grad_norm": 0.47115445022716557, "learning_rate": 1.8609824916874602e-06, "loss": 0.0267, "step": 29688 }, { "epoch": 3.520573935728685, "grad_norm": 0.5992225640650202, "learning_rate": 1.8600738234267223e-06, "loss": 0.0239, "step": 29689 }, { "epoch": 3.5206925174908097, "grad_norm": 0.44771610779624055, "learning_rate": 1.8591653684877392e-06, "loss": 0.0232, "step": 29690 }, { "epoch": 3.520811099252935, "grad_norm": 0.6867346275128839, "learning_rate": 1.8582571268788678e-06, "loss": 0.0468, "step": 29691 }, { "epoch": 3.52092968101506, "grad_norm": 0.3671703580022963, "learning_rate": 1.857349098608488e-06, "loss": 0.0217, "step": 29692 }, { "epoch": 3.521048262777185, "grad_norm": 0.34657071949819673, "learning_rate": 1.8564412836849704e-06, "loss": 0.0172, "step": 29693 }, { "epoch": 3.5211668445393096, "grad_norm": 0.48540224407376853, "learning_rate": 1.8555336821166835e-06, "loss": 0.0226, "step": 29694 }, { "epoch": 3.521285426301435, "grad_norm": 0.6520159605800557, "learning_rate": 1.8546262939120012e-06, "loss": 0.0267, "step": 29695 }, { "epoch": 3.52140400806356, "grad_norm": 0.5376216652980128, "learning_rate": 1.8537191190792752e-06, "loss": 0.0253, "step": 29696 }, { "epoch": 3.521522589825685, "grad_norm": 0.6995267668829592, "learning_rate": 1.8528121576268736e-06, "loss": 0.0313, "step": 29697 }, { "epoch": 3.5216411715878095, "grad_norm": 0.5387353452381793, "learning_rate": 1.8519054095631594e-06, "loss": 0.0144, "step": 29698 }, { "epoch": 3.5217597533499347, "grad_norm": 0.9250499282323328, "learning_rate": 1.8509988748964979e-06, "loss": 0.0475, "step": 29699 }, { "epoch": 3.52187833511206, "grad_norm": 0.4888143653995135, "learning_rate": 1.8500925536352299e-06, "loss": 0.0216, "step": 29700 }, { "epoch": 3.5219969168741847, "grad_norm": 0.5456528077285249, "learning_rate": 1.8491864457877289e-06, "loss": 0.0241, "step": 29701 }, { "epoch": 3.5221154986363095, "grad_norm": 0.6685726184589093, "learning_rate": 1.848280551362333e-06, "loss": 0.0337, "step": 29702 }, { "epoch": 3.5222340803984347, "grad_norm": 0.6970023529872814, "learning_rate": 1.847374870367402e-06, "loss": 0.0455, "step": 29703 }, { "epoch": 3.52235266216056, "grad_norm": 0.5489197892061664, "learning_rate": 1.8464694028112822e-06, "loss": 0.0251, "step": 29704 }, { "epoch": 3.5224712439226846, "grad_norm": 0.5353116764367867, "learning_rate": 1.8455641487023224e-06, "loss": 0.019, "step": 29705 }, { "epoch": 3.52258982568481, "grad_norm": 0.47505751643292954, "learning_rate": 1.844659108048874e-06, "loss": 0.0251, "step": 29706 }, { "epoch": 3.5227084074469346, "grad_norm": 0.7357875312882917, "learning_rate": 1.8437542808592666e-06, "loss": 0.0379, "step": 29707 }, { "epoch": 3.52282698920906, "grad_norm": 0.42882625277318426, "learning_rate": 1.8428496671418495e-06, "loss": 0.0268, "step": 29708 }, { "epoch": 3.5229455709711845, "grad_norm": 0.25241811279585646, "learning_rate": 1.8419452669049598e-06, "loss": 0.008, "step": 29709 }, { "epoch": 3.5230641527333098, "grad_norm": 0.4006458156053699, "learning_rate": 1.8410410801569412e-06, "loss": 0.0175, "step": 29710 }, { "epoch": 3.5231827344954345, "grad_norm": 0.630975163255042, "learning_rate": 1.8401371069061202e-06, "loss": 0.0266, "step": 29711 }, { "epoch": 3.5233013162575597, "grad_norm": 0.31771555545763186, "learning_rate": 1.8392333471608347e-06, "loss": 0.015, "step": 29712 }, { "epoch": 3.5234198980196845, "grad_norm": 0.6291049474267222, "learning_rate": 1.838329800929417e-06, "loss": 0.0294, "step": 29713 }, { "epoch": 3.5235384797818097, "grad_norm": 0.32485782197760504, "learning_rate": 1.8374264682201964e-06, "loss": 0.0164, "step": 29714 }, { "epoch": 3.5236570615439344, "grad_norm": 0.6103802782541219, "learning_rate": 1.8365233490414995e-06, "loss": 0.0247, "step": 29715 }, { "epoch": 3.5237756433060596, "grad_norm": 0.6548619220076796, "learning_rate": 1.8356204434016505e-06, "loss": 0.0313, "step": 29716 }, { "epoch": 3.5238942250681844, "grad_norm": 0.4996384464336699, "learning_rate": 1.8347177513089814e-06, "loss": 0.0196, "step": 29717 }, { "epoch": 3.5240128068303096, "grad_norm": 0.6585623012661895, "learning_rate": 1.8338152727718023e-06, "loss": 0.0247, "step": 29718 }, { "epoch": 3.5241313885924344, "grad_norm": 0.6963795298365674, "learning_rate": 1.8329130077984369e-06, "loss": 0.0403, "step": 29719 }, { "epoch": 3.5242499703545596, "grad_norm": 0.5714889052874405, "learning_rate": 1.8320109563972065e-06, "loss": 0.0298, "step": 29720 }, { "epoch": 3.5243685521166843, "grad_norm": 0.6195618349232952, "learning_rate": 1.8311091185764239e-06, "loss": 0.0264, "step": 29721 }, { "epoch": 3.5244871338788095, "grad_norm": 0.46879577847516685, "learning_rate": 1.8302074943444076e-06, "loss": 0.029, "step": 29722 }, { "epoch": 3.5246057156409343, "grad_norm": 0.5259661868476645, "learning_rate": 1.829306083709456e-06, "loss": 0.0209, "step": 29723 }, { "epoch": 3.5247242974030595, "grad_norm": 0.9055345916563335, "learning_rate": 1.8284048866798992e-06, "loss": 0.0383, "step": 29724 }, { "epoch": 3.5248428791651842, "grad_norm": 0.56106685881978, "learning_rate": 1.8275039032640273e-06, "loss": 0.0292, "step": 29725 }, { "epoch": 3.5249614609273094, "grad_norm": 0.7752177432515858, "learning_rate": 1.8266031334701616e-06, "loss": 0.0362, "step": 29726 }, { "epoch": 3.525080042689434, "grad_norm": 0.28872964714054916, "learning_rate": 1.8257025773065873e-06, "loss": 0.0116, "step": 29727 }, { "epoch": 3.5251986244515594, "grad_norm": 0.7143010570943701, "learning_rate": 1.8248022347816252e-06, "loss": 0.0283, "step": 29728 }, { "epoch": 3.5253172062136846, "grad_norm": 0.529932449245664, "learning_rate": 1.8239021059035632e-06, "loss": 0.0261, "step": 29729 }, { "epoch": 3.5254357879758094, "grad_norm": 0.29375156478062153, "learning_rate": 1.8230021906807033e-06, "loss": 0.0135, "step": 29730 }, { "epoch": 3.525554369737934, "grad_norm": 0.3890218094661398, "learning_rate": 1.8221024891213412e-06, "loss": 0.0207, "step": 29731 }, { "epoch": 3.5256729515000593, "grad_norm": 0.7284754227780927, "learning_rate": 1.8212030012337705e-06, "loss": 0.0224, "step": 29732 }, { "epoch": 3.5257915332621845, "grad_norm": 0.5576624598077933, "learning_rate": 1.8203037270262902e-06, "loss": 0.0316, "step": 29733 }, { "epoch": 3.5259101150243093, "grad_norm": 0.8065300353300112, "learning_rate": 1.8194046665071767e-06, "loss": 0.0237, "step": 29734 }, { "epoch": 3.526028696786434, "grad_norm": 0.6892253378817784, "learning_rate": 1.818505819684735e-06, "loss": 0.0365, "step": 29735 }, { "epoch": 3.5261472785485592, "grad_norm": 0.6990501324314508, "learning_rate": 1.8176071865672385e-06, "loss": 0.0316, "step": 29736 }, { "epoch": 3.5262658603106845, "grad_norm": 0.37134012610537076, "learning_rate": 1.8167087671629779e-06, "loss": 0.0154, "step": 29737 }, { "epoch": 3.526384442072809, "grad_norm": 0.5492653098781812, "learning_rate": 1.8158105614802274e-06, "loss": 0.0262, "step": 29738 }, { "epoch": 3.526503023834934, "grad_norm": 0.7258268100105604, "learning_rate": 1.81491256952728e-06, "loss": 0.0347, "step": 29739 }, { "epoch": 3.526621605597059, "grad_norm": 0.3487530175058038, "learning_rate": 1.8140147913124017e-06, "loss": 0.0134, "step": 29740 }, { "epoch": 3.5267401873591844, "grad_norm": 0.5475254010171273, "learning_rate": 1.8131172268438774e-06, "loss": 0.0353, "step": 29741 }, { "epoch": 3.526858769121309, "grad_norm": 0.4152964105190126, "learning_rate": 1.812219876129978e-06, "loss": 0.0171, "step": 29742 }, { "epoch": 3.526977350883434, "grad_norm": 0.43576758401708504, "learning_rate": 1.811322739178975e-06, "loss": 0.0203, "step": 29743 }, { "epoch": 3.527095932645559, "grad_norm": 0.5038076982810951, "learning_rate": 1.810425815999145e-06, "loss": 0.0185, "step": 29744 }, { "epoch": 3.5272145144076843, "grad_norm": 0.4616487735042255, "learning_rate": 1.8095291065987453e-06, "loss": 0.0168, "step": 29745 }, { "epoch": 3.527333096169809, "grad_norm": 0.5776199886167062, "learning_rate": 1.8086326109860553e-06, "loss": 0.0278, "step": 29746 }, { "epoch": 3.527451677931934, "grad_norm": 0.4409578798158197, "learning_rate": 1.8077363291693295e-06, "loss": 0.0204, "step": 29747 }, { "epoch": 3.527570259694059, "grad_norm": 0.7135770286718727, "learning_rate": 1.806840261156842e-06, "loss": 0.021, "step": 29748 }, { "epoch": 3.527688841456184, "grad_norm": 0.40929862935449957, "learning_rate": 1.8059444069568333e-06, "loss": 0.0264, "step": 29749 }, { "epoch": 3.527807423218309, "grad_norm": 0.4696238576683629, "learning_rate": 1.8050487665775856e-06, "loss": 0.0165, "step": 29750 }, { "epoch": 3.5279260049804337, "grad_norm": 0.4805204173437258, "learning_rate": 1.8041533400273396e-06, "loss": 0.0191, "step": 29751 }, { "epoch": 3.528044586742559, "grad_norm": 0.8351890280130448, "learning_rate": 1.8032581273143555e-06, "loss": 0.0407, "step": 29752 }, { "epoch": 3.528163168504684, "grad_norm": 0.46345807106851716, "learning_rate": 1.8023631284468878e-06, "loss": 0.0211, "step": 29753 }, { "epoch": 3.528281750266809, "grad_norm": 0.4764786421781317, "learning_rate": 1.8014683434331825e-06, "loss": 0.0216, "step": 29754 }, { "epoch": 3.528400332028934, "grad_norm": 0.5979450285203324, "learning_rate": 1.800573772281497e-06, "loss": 0.0311, "step": 29755 }, { "epoch": 3.528518913791059, "grad_norm": 0.5781158556776873, "learning_rate": 1.7996794150000661e-06, "loss": 0.0217, "step": 29756 }, { "epoch": 3.528637495553184, "grad_norm": 0.43461293957983593, "learning_rate": 1.798785271597142e-06, "loss": 0.0189, "step": 29757 }, { "epoch": 3.528756077315309, "grad_norm": 0.7318547932139784, "learning_rate": 1.7978913420809678e-06, "loss": 0.03, "step": 29758 }, { "epoch": 3.528874659077434, "grad_norm": 0.46857644655075453, "learning_rate": 1.7969976264597843e-06, "loss": 0.0256, "step": 29759 }, { "epoch": 3.528993240839559, "grad_norm": 0.4098172809060326, "learning_rate": 1.7961041247418236e-06, "loss": 0.0213, "step": 29760 }, { "epoch": 3.529111822601684, "grad_norm": 0.657450695798323, "learning_rate": 1.795210836935335e-06, "loss": 0.0277, "step": 29761 }, { "epoch": 3.5292304043638087, "grad_norm": 0.6956680632804466, "learning_rate": 1.7943177630485448e-06, "loss": 0.0326, "step": 29762 }, { "epoch": 3.529348986125934, "grad_norm": 0.8641524781888356, "learning_rate": 1.7934249030896854e-06, "loss": 0.0491, "step": 29763 }, { "epoch": 3.5294675678880587, "grad_norm": 0.4476380285047533, "learning_rate": 1.7925322570669923e-06, "loss": 0.0195, "step": 29764 }, { "epoch": 3.529586149650184, "grad_norm": 0.3403047944334038, "learning_rate": 1.7916398249886945e-06, "loss": 0.0094, "step": 29765 }, { "epoch": 3.5297047314123087, "grad_norm": 0.439544609568498, "learning_rate": 1.7907476068630192e-06, "loss": 0.0208, "step": 29766 }, { "epoch": 3.529823313174434, "grad_norm": 0.5261525017984751, "learning_rate": 1.7898556026981871e-06, "loss": 0.0225, "step": 29767 }, { "epoch": 3.5299418949365586, "grad_norm": 0.4613335383832063, "learning_rate": 1.7889638125024226e-06, "loss": 0.0205, "step": 29768 }, { "epoch": 3.530060476698684, "grad_norm": 0.25250783324545806, "learning_rate": 1.7880722362839498e-06, "loss": 0.0111, "step": 29769 }, { "epoch": 3.5301790584608086, "grad_norm": 0.4006041100798737, "learning_rate": 1.7871808740509894e-06, "loss": 0.018, "step": 29770 }, { "epoch": 3.530297640222934, "grad_norm": 0.5715513134702446, "learning_rate": 1.786289725811749e-06, "loss": 0.0243, "step": 29771 }, { "epoch": 3.5304162219850586, "grad_norm": 0.5265539816408541, "learning_rate": 1.785398791574458e-06, "loss": 0.0268, "step": 29772 }, { "epoch": 3.5305348037471838, "grad_norm": 0.4411914484670427, "learning_rate": 1.7845080713473212e-06, "loss": 0.0203, "step": 29773 }, { "epoch": 3.5306533855093085, "grad_norm": 0.6804348971348104, "learning_rate": 1.7836175651385484e-06, "loss": 0.0293, "step": 29774 }, { "epoch": 3.5307719672714337, "grad_norm": 0.6091942990032148, "learning_rate": 1.7827272729563526e-06, "loss": 0.0264, "step": 29775 }, { "epoch": 3.5308905490335585, "grad_norm": 0.4288014175599847, "learning_rate": 1.7818371948089386e-06, "loss": 0.0295, "step": 29776 }, { "epoch": 3.5310091307956837, "grad_norm": 0.5536340441838888, "learning_rate": 1.7809473307045215e-06, "loss": 0.0272, "step": 29777 }, { "epoch": 3.531127712557809, "grad_norm": 0.48684513697108084, "learning_rate": 1.7800576806512897e-06, "loss": 0.0159, "step": 29778 }, { "epoch": 3.5312462943199336, "grad_norm": 0.4312612775888368, "learning_rate": 1.779168244657453e-06, "loss": 0.0165, "step": 29779 }, { "epoch": 3.5313648760820584, "grad_norm": 0.5374613345885483, "learning_rate": 1.7782790227312106e-06, "loss": 0.032, "step": 29780 }, { "epoch": 3.5314834578441836, "grad_norm": 0.4593400431718555, "learning_rate": 1.7773900148807587e-06, "loss": 0.0198, "step": 29781 }, { "epoch": 3.531602039606309, "grad_norm": 0.7657652966938282, "learning_rate": 1.7765012211142962e-06, "loss": 0.029, "step": 29782 }, { "epoch": 3.5317206213684336, "grad_norm": 0.5974263722987603, "learning_rate": 1.7756126414400082e-06, "loss": 0.0229, "step": 29783 }, { "epoch": 3.5318392031305583, "grad_norm": 0.3957321137181532, "learning_rate": 1.7747242758660992e-06, "loss": 0.0196, "step": 29784 }, { "epoch": 3.5319577848926835, "grad_norm": 0.509377134328125, "learning_rate": 1.7738361244007461e-06, "loss": 0.0305, "step": 29785 }, { "epoch": 3.5320763666548087, "grad_norm": 0.7960216473323293, "learning_rate": 1.7729481870521452e-06, "loss": 0.0322, "step": 29786 }, { "epoch": 3.5321949484169335, "grad_norm": 0.5857607100419213, "learning_rate": 1.772060463828476e-06, "loss": 0.0319, "step": 29787 }, { "epoch": 3.5323135301790582, "grad_norm": 0.9649839251366753, "learning_rate": 1.771172954737932e-06, "loss": 0.0249, "step": 29788 }, { "epoch": 3.5324321119411835, "grad_norm": 0.6824612739463263, "learning_rate": 1.7702856597886813e-06, "loss": 0.0387, "step": 29789 }, { "epoch": 3.5325506937033087, "grad_norm": 0.41981516135438396, "learning_rate": 1.7693985789889123e-06, "loss": 0.0182, "step": 29790 }, { "epoch": 3.5326692754654334, "grad_norm": 0.7424707163317142, "learning_rate": 1.7685117123468016e-06, "loss": 0.0382, "step": 29791 }, { "epoch": 3.532787857227558, "grad_norm": 0.7691360384680115, "learning_rate": 1.767625059870523e-06, "loss": 0.0286, "step": 29792 }, { "epoch": 3.5329064389896834, "grad_norm": 0.4726886140656565, "learning_rate": 1.7667386215682591e-06, "loss": 0.0163, "step": 29793 }, { "epoch": 3.5330250207518086, "grad_norm": 0.4180160839087153, "learning_rate": 1.7658523974481644e-06, "loss": 0.016, "step": 29794 }, { "epoch": 3.5331436025139333, "grad_norm": 0.4763833556551987, "learning_rate": 1.764966387518427e-06, "loss": 0.0253, "step": 29795 }, { "epoch": 3.533262184276058, "grad_norm": 0.4350279395138895, "learning_rate": 1.764080591787201e-06, "loss": 0.0183, "step": 29796 }, { "epoch": 3.5333807660381833, "grad_norm": 0.57984987883396, "learning_rate": 1.7631950102626666e-06, "loss": 0.0308, "step": 29797 }, { "epoch": 3.5334993478003085, "grad_norm": 0.7109498071084223, "learning_rate": 1.7623096429529668e-06, "loss": 0.0393, "step": 29798 }, { "epoch": 3.5336179295624333, "grad_norm": 0.42575143583734737, "learning_rate": 1.7614244898662868e-06, "loss": 0.0244, "step": 29799 }, { "epoch": 3.533736511324558, "grad_norm": 0.4894183557219041, "learning_rate": 1.7605395510107704e-06, "loss": 0.0255, "step": 29800 }, { "epoch": 3.533855093086683, "grad_norm": 0.6967255717471638, "learning_rate": 1.7596548263945828e-06, "loss": 0.029, "step": 29801 }, { "epoch": 3.5339736748488084, "grad_norm": 0.565453006553706, "learning_rate": 1.7587703160258789e-06, "loss": 0.0258, "step": 29802 }, { "epoch": 3.534092256610933, "grad_norm": 0.34699338374368754, "learning_rate": 1.7578860199128105e-06, "loss": 0.0208, "step": 29803 }, { "epoch": 3.5342108383730584, "grad_norm": 0.5329438542840225, "learning_rate": 1.7570019380635378e-06, "loss": 0.0372, "step": 29804 }, { "epoch": 3.534329420135183, "grad_norm": 0.6070295370621347, "learning_rate": 1.7561180704861958e-06, "loss": 0.0294, "step": 29805 }, { "epoch": 3.5344480018973083, "grad_norm": 0.7241442145827334, "learning_rate": 1.7552344171889474e-06, "loss": 0.0373, "step": 29806 }, { "epoch": 3.534566583659433, "grad_norm": 0.4590079409599786, "learning_rate": 1.7543509781799332e-06, "loss": 0.0208, "step": 29807 }, { "epoch": 3.5346851654215583, "grad_norm": 0.36181743238498887, "learning_rate": 1.7534677534672971e-06, "loss": 0.0143, "step": 29808 }, { "epoch": 3.534803747183683, "grad_norm": 0.9479400055064624, "learning_rate": 1.7525847430591769e-06, "loss": 0.0482, "step": 29809 }, { "epoch": 3.5349223289458083, "grad_norm": 0.5026042435741257, "learning_rate": 1.7517019469637242e-06, "loss": 0.0216, "step": 29810 }, { "epoch": 3.535040910707933, "grad_norm": 0.4065932565695605, "learning_rate": 1.7508193651890659e-06, "loss": 0.0184, "step": 29811 }, { "epoch": 3.5351594924700582, "grad_norm": 0.5686483375640226, "learning_rate": 1.7499369977433456e-06, "loss": 0.0258, "step": 29812 }, { "epoch": 3.535278074232183, "grad_norm": 0.7063369383911269, "learning_rate": 1.7490548446346928e-06, "loss": 0.0344, "step": 29813 }, { "epoch": 3.535396655994308, "grad_norm": 0.6307223736504821, "learning_rate": 1.7481729058712425e-06, "loss": 0.0196, "step": 29814 }, { "epoch": 3.535515237756433, "grad_norm": 0.6120507000120063, "learning_rate": 1.7472911814611276e-06, "loss": 0.0232, "step": 29815 }, { "epoch": 3.535633819518558, "grad_norm": 0.34526954467351745, "learning_rate": 1.7464096714124662e-06, "loss": 0.0125, "step": 29816 }, { "epoch": 3.535752401280683, "grad_norm": 0.6143104941930999, "learning_rate": 1.7455283757334018e-06, "loss": 0.0293, "step": 29817 }, { "epoch": 3.535870983042808, "grad_norm": 0.5187638570596144, "learning_rate": 1.7446472944320446e-06, "loss": 0.0248, "step": 29818 }, { "epoch": 3.535989564804933, "grad_norm": 0.4925714011980354, "learning_rate": 1.7437664275165245e-06, "loss": 0.0247, "step": 29819 }, { "epoch": 3.536108146567058, "grad_norm": 0.3907879156021508, "learning_rate": 1.742885774994954e-06, "loss": 0.0182, "step": 29820 }, { "epoch": 3.536226728329183, "grad_norm": 0.45103791226788476, "learning_rate": 1.742005336875463e-06, "loss": 0.0219, "step": 29821 }, { "epoch": 3.536345310091308, "grad_norm": 0.4708229392251132, "learning_rate": 1.741125113166156e-06, "loss": 0.023, "step": 29822 }, { "epoch": 3.536463891853433, "grad_norm": 0.3293516195170832, "learning_rate": 1.740245103875157e-06, "loss": 0.0136, "step": 29823 }, { "epoch": 3.536582473615558, "grad_norm": 0.30673657240651214, "learning_rate": 1.7393653090105738e-06, "loss": 0.0142, "step": 29824 }, { "epoch": 3.5367010553776828, "grad_norm": 0.8813419418895659, "learning_rate": 1.738485728580519e-06, "loss": 0.0325, "step": 29825 }, { "epoch": 3.536819637139808, "grad_norm": 0.5471058434582963, "learning_rate": 1.7376063625931027e-06, "loss": 0.0263, "step": 29826 }, { "epoch": 3.5369382189019327, "grad_norm": 0.4110963901204428, "learning_rate": 1.736727211056427e-06, "loss": 0.0145, "step": 29827 }, { "epoch": 3.537056800664058, "grad_norm": 0.5729802721238637, "learning_rate": 1.7358482739785992e-06, "loss": 0.0326, "step": 29828 }, { "epoch": 3.5371753824261827, "grad_norm": 0.45772041966232146, "learning_rate": 1.734969551367721e-06, "loss": 0.021, "step": 29829 }, { "epoch": 3.537293964188308, "grad_norm": 0.7026992874302102, "learning_rate": 1.7340910432318946e-06, "loss": 0.038, "step": 29830 }, { "epoch": 3.537412545950433, "grad_norm": 0.6072973548208316, "learning_rate": 1.7332127495792189e-06, "loss": 0.0274, "step": 29831 }, { "epoch": 3.537531127712558, "grad_norm": 0.39797307307314894, "learning_rate": 1.73233467041779e-06, "loss": 0.0205, "step": 29832 }, { "epoch": 3.5376497094746826, "grad_norm": 0.47417361822863985, "learning_rate": 1.7314568057557046e-06, "loss": 0.0209, "step": 29833 }, { "epoch": 3.537768291236808, "grad_norm": 0.4523918507213722, "learning_rate": 1.7305791556010532e-06, "loss": 0.0181, "step": 29834 }, { "epoch": 3.537886872998933, "grad_norm": 0.6083819919802055, "learning_rate": 1.7297017199619236e-06, "loss": 0.0253, "step": 29835 }, { "epoch": 3.5380054547610578, "grad_norm": 0.4105308727370176, "learning_rate": 1.7288244988464097e-06, "loss": 0.0186, "step": 29836 }, { "epoch": 3.5381240365231825, "grad_norm": 0.7437640263052261, "learning_rate": 1.7279474922626021e-06, "loss": 0.0274, "step": 29837 }, { "epoch": 3.5382426182853077, "grad_norm": 0.7463108807535694, "learning_rate": 1.727070700218575e-06, "loss": 0.0413, "step": 29838 }, { "epoch": 3.538361200047433, "grad_norm": 0.32205807592036056, "learning_rate": 1.726194122722416e-06, "loss": 0.0156, "step": 29839 }, { "epoch": 3.5384797818095577, "grad_norm": 0.5236762675707538, "learning_rate": 1.7253177597822078e-06, "loss": 0.0282, "step": 29840 }, { "epoch": 3.5385983635716824, "grad_norm": 0.45458682206957407, "learning_rate": 1.7244416114060301e-06, "loss": 0.0188, "step": 29841 }, { "epoch": 3.5387169453338077, "grad_norm": 0.5487242520176663, "learning_rate": 1.7235656776019626e-06, "loss": 0.0229, "step": 29842 }, { "epoch": 3.538835527095933, "grad_norm": 0.46616555505282553, "learning_rate": 1.7226899583780654e-06, "loss": 0.0166, "step": 29843 }, { "epoch": 3.5389541088580576, "grad_norm": 0.5716581887123204, "learning_rate": 1.721814453742432e-06, "loss": 0.026, "step": 29844 }, { "epoch": 3.5390726906201824, "grad_norm": 0.5047634887575484, "learning_rate": 1.7209391637031197e-06, "loss": 0.0261, "step": 29845 }, { "epoch": 3.5391912723823076, "grad_norm": 0.8422113633952243, "learning_rate": 1.7200640882682e-06, "loss": 0.0342, "step": 29846 }, { "epoch": 3.5393098541444328, "grad_norm": 0.4934628124124678, "learning_rate": 1.7191892274457445e-06, "loss": 0.0243, "step": 29847 }, { "epoch": 3.5394284359065575, "grad_norm": 0.519232498817814, "learning_rate": 1.7183145812438184e-06, "loss": 0.0319, "step": 29848 }, { "epoch": 3.5395470176686823, "grad_norm": 0.4231169568421761, "learning_rate": 1.7174401496704767e-06, "loss": 0.0211, "step": 29849 }, { "epoch": 3.5396655994308075, "grad_norm": 0.4309525575842736, "learning_rate": 1.716565932733788e-06, "loss": 0.0195, "step": 29850 }, { "epoch": 3.5397841811929327, "grad_norm": 0.4648038862937506, "learning_rate": 1.7156919304418068e-06, "loss": 0.0162, "step": 29851 }, { "epoch": 3.5399027629550575, "grad_norm": 0.4509805621105282, "learning_rate": 1.7148181428025962e-06, "loss": 0.0208, "step": 29852 }, { "epoch": 3.5400213447171827, "grad_norm": 0.4517073671130342, "learning_rate": 1.713944569824208e-06, "loss": 0.0204, "step": 29853 }, { "epoch": 3.5401399264793074, "grad_norm": 0.843925161529657, "learning_rate": 1.7130712115146913e-06, "loss": 0.0499, "step": 29854 }, { "epoch": 3.5402585082414326, "grad_norm": 0.3756836068383002, "learning_rate": 1.7121980678821064e-06, "loss": 0.0158, "step": 29855 }, { "epoch": 3.5403770900035574, "grad_norm": 0.8415302714690331, "learning_rate": 1.7113251389344969e-06, "loss": 0.0351, "step": 29856 }, { "epoch": 3.5404956717656826, "grad_norm": 0.3928314870784064, "learning_rate": 1.710452424679909e-06, "loss": 0.0172, "step": 29857 }, { "epoch": 3.5406142535278073, "grad_norm": 0.522855154415334, "learning_rate": 1.7095799251263922e-06, "loss": 0.0265, "step": 29858 }, { "epoch": 3.5407328352899325, "grad_norm": 0.3957905381863885, "learning_rate": 1.7087076402819896e-06, "loss": 0.019, "step": 29859 }, { "epoch": 3.5408514170520573, "grad_norm": 0.5036143622327476, "learning_rate": 1.7078355701547394e-06, "loss": 0.0253, "step": 29860 }, { "epoch": 3.5409699988141825, "grad_norm": 0.6341055452638957, "learning_rate": 1.7069637147526824e-06, "loss": 0.0319, "step": 29861 }, { "epoch": 3.5410885805763073, "grad_norm": 0.5486917435269995, "learning_rate": 1.7060920740838538e-06, "loss": 0.0372, "step": 29862 }, { "epoch": 3.5412071623384325, "grad_norm": 0.652490334588292, "learning_rate": 1.7052206481562915e-06, "loss": 0.0424, "step": 29863 }, { "epoch": 3.5413257441005572, "grad_norm": 0.8264144148776539, "learning_rate": 1.7043494369780367e-06, "loss": 0.0374, "step": 29864 }, { "epoch": 3.5414443258626824, "grad_norm": 0.49793402026515315, "learning_rate": 1.7034784405571018e-06, "loss": 0.0248, "step": 29865 }, { "epoch": 3.541562907624807, "grad_norm": 0.527763515865206, "learning_rate": 1.7026076589015366e-06, "loss": 0.0256, "step": 29866 }, { "epoch": 3.5416814893869324, "grad_norm": 0.4020104147478867, "learning_rate": 1.7017370920193537e-06, "loss": 0.0165, "step": 29867 }, { "epoch": 3.541800071149057, "grad_norm": 0.7513557306923028, "learning_rate": 1.7008667399185912e-06, "loss": 0.0328, "step": 29868 }, { "epoch": 3.5419186529111824, "grad_norm": 0.6295057151808664, "learning_rate": 1.6999966026072568e-06, "loss": 0.04, "step": 29869 }, { "epoch": 3.542037234673307, "grad_norm": 0.6083393387560253, "learning_rate": 1.699126680093388e-06, "loss": 0.0318, "step": 29870 }, { "epoch": 3.5421558164354323, "grad_norm": 0.3369097470882282, "learning_rate": 1.6982569723849956e-06, "loss": 0.0179, "step": 29871 }, { "epoch": 3.542274398197557, "grad_norm": 0.5884479634680414, "learning_rate": 1.6973874794901007e-06, "loss": 0.0316, "step": 29872 }, { "epoch": 3.5423929799596823, "grad_norm": 0.5902351141991251, "learning_rate": 1.6965182014167163e-06, "loss": 0.0321, "step": 29873 }, { "epoch": 3.542511561721807, "grad_norm": 0.2552214514480266, "learning_rate": 1.6956491381728556e-06, "loss": 0.0124, "step": 29874 }, { "epoch": 3.5426301434839322, "grad_norm": 0.684058939501947, "learning_rate": 1.6947802897665399e-06, "loss": 0.0268, "step": 29875 }, { "epoch": 3.542748725246057, "grad_norm": 0.5782506562229784, "learning_rate": 1.69391165620576e-06, "loss": 0.0321, "step": 29876 }, { "epoch": 3.542867307008182, "grad_norm": 0.43153916030429934, "learning_rate": 1.6930432374985428e-06, "loss": 0.0243, "step": 29877 }, { "epoch": 3.542985888770307, "grad_norm": 0.5635094567141441, "learning_rate": 1.6921750336528846e-06, "loss": 0.0257, "step": 29878 }, { "epoch": 3.543104470532432, "grad_norm": 0.599253942043817, "learning_rate": 1.6913070446767903e-06, "loss": 0.0187, "step": 29879 }, { "epoch": 3.5432230522945574, "grad_norm": 0.8975826066513624, "learning_rate": 1.6904392705782591e-06, "loss": 0.0478, "step": 29880 }, { "epoch": 3.543341634056682, "grad_norm": 0.4528030354100953, "learning_rate": 1.6895717113652953e-06, "loss": 0.0223, "step": 29881 }, { "epoch": 3.543460215818807, "grad_norm": 0.6386564114857325, "learning_rate": 1.6887043670459012e-06, "loss": 0.0343, "step": 29882 }, { "epoch": 3.543578797580932, "grad_norm": 0.5383767017053323, "learning_rate": 1.687837237628062e-06, "loss": 0.0205, "step": 29883 }, { "epoch": 3.5436973793430573, "grad_norm": 1.1116871755873077, "learning_rate": 1.6869703231197742e-06, "loss": 0.0257, "step": 29884 }, { "epoch": 3.543815961105182, "grad_norm": 0.6122016230596661, "learning_rate": 1.6861036235290311e-06, "loss": 0.0315, "step": 29885 }, { "epoch": 3.543934542867307, "grad_norm": 0.34210628862133163, "learning_rate": 1.6852371388638294e-06, "loss": 0.0184, "step": 29886 }, { "epoch": 3.544053124629432, "grad_norm": 0.4373793186695634, "learning_rate": 1.684370869132143e-06, "loss": 0.0245, "step": 29887 }, { "epoch": 3.544171706391557, "grad_norm": 0.5107717647310136, "learning_rate": 1.683504814341974e-06, "loss": 0.0178, "step": 29888 }, { "epoch": 3.544290288153682, "grad_norm": 0.44761255884693124, "learning_rate": 1.682638974501291e-06, "loss": 0.0288, "step": 29889 }, { "epoch": 3.5444088699158067, "grad_norm": 0.33191912929559714, "learning_rate": 1.681773349618085e-06, "loss": 0.0166, "step": 29890 }, { "epoch": 3.544527451677932, "grad_norm": 0.37192234722833595, "learning_rate": 1.6809079397003353e-06, "loss": 0.0174, "step": 29891 }, { "epoch": 3.544646033440057, "grad_norm": 0.3263127026053091, "learning_rate": 1.6800427447560163e-06, "loss": 0.012, "step": 29892 }, { "epoch": 3.544764615202182, "grad_norm": 0.44511726798150447, "learning_rate": 1.6791777647931133e-06, "loss": 0.0274, "step": 29893 }, { "epoch": 3.5448831969643066, "grad_norm": 0.6437111225647386, "learning_rate": 1.6783129998195869e-06, "loss": 0.0391, "step": 29894 }, { "epoch": 3.545001778726432, "grad_norm": 0.621662052605796, "learning_rate": 1.6774484498434162e-06, "loss": 0.0259, "step": 29895 }, { "epoch": 3.545120360488557, "grad_norm": 0.709357268680643, "learning_rate": 1.6765841148725703e-06, "loss": 0.041, "step": 29896 }, { "epoch": 3.545238942250682, "grad_norm": 0.3424418985850515, "learning_rate": 1.6757199949150232e-06, "loss": 0.0207, "step": 29897 }, { "epoch": 3.5453575240128066, "grad_norm": 0.4004155183000806, "learning_rate": 1.6748560899787297e-06, "loss": 0.0231, "step": 29898 }, { "epoch": 3.5454761057749318, "grad_norm": 0.6804897141456072, "learning_rate": 1.673992400071661e-06, "loss": 0.0393, "step": 29899 }, { "epoch": 3.545594687537057, "grad_norm": 0.42614911785263093, "learning_rate": 1.673128925201778e-06, "loss": 0.0159, "step": 29900 }, { "epoch": 3.5457132692991817, "grad_norm": 0.7692149305780882, "learning_rate": 1.672265665377043e-06, "loss": 0.0379, "step": 29901 }, { "epoch": 3.5458318510613065, "grad_norm": 0.3504587636278705, "learning_rate": 1.6714026206054085e-06, "loss": 0.013, "step": 29902 }, { "epoch": 3.5459504328234317, "grad_norm": 0.3271972630156085, "learning_rate": 1.6705397908948372e-06, "loss": 0.0139, "step": 29903 }, { "epoch": 3.546069014585557, "grad_norm": 0.5620409880710147, "learning_rate": 1.6696771762532843e-06, "loss": 0.0273, "step": 29904 }, { "epoch": 3.5461875963476817, "grad_norm": 0.6707015732144354, "learning_rate": 1.668814776688693e-06, "loss": 0.0379, "step": 29905 }, { "epoch": 3.546306178109807, "grad_norm": 0.6113438138680686, "learning_rate": 1.6679525922090184e-06, "loss": 0.0219, "step": 29906 }, { "epoch": 3.5464247598719316, "grad_norm": 0.3967795501035116, "learning_rate": 1.667090622822212e-06, "loss": 0.0207, "step": 29907 }, { "epoch": 3.546543341634057, "grad_norm": 0.5176036119763264, "learning_rate": 1.6662288685362182e-06, "loss": 0.0312, "step": 29908 }, { "epoch": 3.5466619233961816, "grad_norm": 0.6923300138796791, "learning_rate": 1.66536732935898e-06, "loss": 0.0287, "step": 29909 }, { "epoch": 3.546780505158307, "grad_norm": 0.6852033157374657, "learning_rate": 1.6645060052984385e-06, "loss": 0.024, "step": 29910 }, { "epoch": 3.5468990869204315, "grad_norm": 0.4795514573921929, "learning_rate": 1.6636448963625346e-06, "loss": 0.0231, "step": 29911 }, { "epoch": 3.5470176686825567, "grad_norm": 0.40009489720572866, "learning_rate": 1.662784002559209e-06, "loss": 0.0197, "step": 29912 }, { "epoch": 3.5471362504446815, "grad_norm": 0.7024274503316731, "learning_rate": 1.6619233238963999e-06, "loss": 0.0289, "step": 29913 }, { "epoch": 3.5472548322068067, "grad_norm": 0.2830419325591423, "learning_rate": 1.6610628603820316e-06, "loss": 0.0143, "step": 29914 }, { "epoch": 3.5473734139689315, "grad_norm": 0.5668692078642118, "learning_rate": 1.6602026120240504e-06, "loss": 0.0277, "step": 29915 }, { "epoch": 3.5474919957310567, "grad_norm": 0.3545266990688527, "learning_rate": 1.659342578830378e-06, "loss": 0.0213, "step": 29916 }, { "epoch": 3.5476105774931814, "grad_norm": 0.5359235261662698, "learning_rate": 1.6584827608089438e-06, "loss": 0.0239, "step": 29917 }, { "epoch": 3.5477291592553066, "grad_norm": 0.6596112053968065, "learning_rate": 1.6576231579676749e-06, "loss": 0.0353, "step": 29918 }, { "epoch": 3.5478477410174314, "grad_norm": 0.40766524546466537, "learning_rate": 1.656763770314501e-06, "loss": 0.0177, "step": 29919 }, { "epoch": 3.5479663227795566, "grad_norm": 0.5274222364424056, "learning_rate": 1.6559045978573356e-06, "loss": 0.0325, "step": 29920 }, { "epoch": 3.5480849045416814, "grad_norm": 0.8957081181116467, "learning_rate": 1.6550456406041026e-06, "loss": 0.0334, "step": 29921 }, { "epoch": 3.5482034863038066, "grad_norm": 0.5689893845287324, "learning_rate": 1.6541868985627235e-06, "loss": 0.0204, "step": 29922 }, { "epoch": 3.5483220680659313, "grad_norm": 0.3754533223834487, "learning_rate": 1.6533283717411085e-06, "loss": 0.0202, "step": 29923 }, { "epoch": 3.5484406498280565, "grad_norm": 0.9014796454673579, "learning_rate": 1.6524700601471848e-06, "loss": 0.0558, "step": 29924 }, { "epoch": 3.5485592315901813, "grad_norm": 0.479610826399354, "learning_rate": 1.651611963788846e-06, "loss": 0.0206, "step": 29925 }, { "epoch": 3.5486778133523065, "grad_norm": 0.5031512092864114, "learning_rate": 1.650754082674022e-06, "loss": 0.0232, "step": 29926 }, { "epoch": 3.5487963951144312, "grad_norm": 0.4948598407278654, "learning_rate": 1.649896416810609e-06, "loss": 0.02, "step": 29927 }, { "epoch": 3.5489149768765564, "grad_norm": 0.41450905942702393, "learning_rate": 1.6490389662065148e-06, "loss": 0.0235, "step": 29928 }, { "epoch": 3.5490335586386816, "grad_norm": 0.8275891066045656, "learning_rate": 1.6481817308696495e-06, "loss": 0.0523, "step": 29929 }, { "epoch": 3.5491521404008064, "grad_norm": 0.5097201178551718, "learning_rate": 1.6473247108079127e-06, "loss": 0.0197, "step": 29930 }, { "epoch": 3.549270722162931, "grad_norm": 0.569110279203461, "learning_rate": 1.646467906029206e-06, "loss": 0.0165, "step": 29931 }, { "epoch": 3.5493893039250564, "grad_norm": 0.5139208042449925, "learning_rate": 1.6456113165414234e-06, "loss": 0.0256, "step": 29932 }, { "epoch": 3.5495078856871816, "grad_norm": 0.39636152900265587, "learning_rate": 1.6447549423524694e-06, "loss": 0.0161, "step": 29933 }, { "epoch": 3.5496264674493063, "grad_norm": 0.48408208156945964, "learning_rate": 1.6438987834702324e-06, "loss": 0.0252, "step": 29934 }, { "epoch": 3.549745049211431, "grad_norm": 0.4902991578629438, "learning_rate": 1.6430428399026115e-06, "loss": 0.0255, "step": 29935 }, { "epoch": 3.5498636309735563, "grad_norm": 0.38776333101601707, "learning_rate": 1.6421871116574865e-06, "loss": 0.0284, "step": 29936 }, { "epoch": 3.5499822127356815, "grad_norm": 1.024216384161609, "learning_rate": 1.6413315987427597e-06, "loss": 0.0462, "step": 29937 }, { "epoch": 3.5501007944978062, "grad_norm": 0.5995667051344792, "learning_rate": 1.6404763011663076e-06, "loss": 0.0333, "step": 29938 }, { "epoch": 3.550219376259931, "grad_norm": 0.7818462698163368, "learning_rate": 1.6396212189360189e-06, "loss": 0.0423, "step": 29939 }, { "epoch": 3.550337958022056, "grad_norm": 0.4889185858247401, "learning_rate": 1.6387663520597758e-06, "loss": 0.0306, "step": 29940 }, { "epoch": 3.5504565397841814, "grad_norm": 0.45069208414725576, "learning_rate": 1.6379117005454607e-06, "loss": 0.0177, "step": 29941 }, { "epoch": 3.550575121546306, "grad_norm": 0.36663106784241933, "learning_rate": 1.637057264400954e-06, "loss": 0.0159, "step": 29942 }, { "epoch": 3.550693703308431, "grad_norm": 0.44398399306133945, "learning_rate": 1.6362030436341213e-06, "loss": 0.0223, "step": 29943 }, { "epoch": 3.550812285070556, "grad_norm": 0.45340747302936346, "learning_rate": 1.6353490382528563e-06, "loss": 0.0237, "step": 29944 }, { "epoch": 3.5509308668326813, "grad_norm": 0.42217185779061445, "learning_rate": 1.6344952482650166e-06, "loss": 0.02, "step": 29945 }, { "epoch": 3.551049448594806, "grad_norm": 0.578523733760798, "learning_rate": 1.6336416736784793e-06, "loss": 0.0303, "step": 29946 }, { "epoch": 3.551168030356931, "grad_norm": 0.5026515720556103, "learning_rate": 1.6327883145011075e-06, "loss": 0.0268, "step": 29947 }, { "epoch": 3.551286612119056, "grad_norm": 0.5949737603261039, "learning_rate": 1.6319351707407783e-06, "loss": 0.0305, "step": 29948 }, { "epoch": 3.5514051938811813, "grad_norm": 0.5575747739988298, "learning_rate": 1.631082242405349e-06, "loss": 0.0309, "step": 29949 }, { "epoch": 3.551523775643306, "grad_norm": 0.4320104524668897, "learning_rate": 1.630229529502683e-06, "loss": 0.0178, "step": 29950 }, { "epoch": 3.5516423574054308, "grad_norm": 0.251569566290122, "learning_rate": 1.6293770320406437e-06, "loss": 0.0105, "step": 29951 }, { "epoch": 3.551760939167556, "grad_norm": 0.6561059886653068, "learning_rate": 1.6285247500270883e-06, "loss": 0.0483, "step": 29952 }, { "epoch": 3.551879520929681, "grad_norm": 0.33644250945698745, "learning_rate": 1.6276726834698803e-06, "loss": 0.0165, "step": 29953 }, { "epoch": 3.551998102691806, "grad_norm": 0.28193489333838684, "learning_rate": 1.626820832376863e-06, "loss": 0.0124, "step": 29954 }, { "epoch": 3.552116684453931, "grad_norm": 0.3991332043774468, "learning_rate": 1.6259691967558971e-06, "loss": 0.02, "step": 29955 }, { "epoch": 3.552235266216056, "grad_norm": 0.6746520863943626, "learning_rate": 1.6251177766148318e-06, "loss": 0.0318, "step": 29956 }, { "epoch": 3.552353847978181, "grad_norm": 0.7186690893604176, "learning_rate": 1.624266571961522e-06, "loss": 0.0335, "step": 29957 }, { "epoch": 3.552472429740306, "grad_norm": 0.40318216696219716, "learning_rate": 1.6234155828037972e-06, "loss": 0.0157, "step": 29958 }, { "epoch": 3.552591011502431, "grad_norm": 0.4088844434067984, "learning_rate": 1.6225648091495266e-06, "loss": 0.0171, "step": 29959 }, { "epoch": 3.552709593264556, "grad_norm": 0.6014233319504014, "learning_rate": 1.6217142510065342e-06, "loss": 0.0233, "step": 29960 }, { "epoch": 3.552828175026681, "grad_norm": 0.6325520703068507, "learning_rate": 1.6208639083826693e-06, "loss": 0.0324, "step": 29961 }, { "epoch": 3.552946756788806, "grad_norm": 0.5586233433904104, "learning_rate": 1.6200137812857701e-06, "loss": 0.0234, "step": 29962 }, { "epoch": 3.553065338550931, "grad_norm": 0.4392821476936729, "learning_rate": 1.6191638697236721e-06, "loss": 0.0257, "step": 29963 }, { "epoch": 3.5531839203130557, "grad_norm": 0.7398689062198018, "learning_rate": 1.6183141737042163e-06, "loss": 0.0287, "step": 29964 }, { "epoch": 3.553302502075181, "grad_norm": 0.7087316739561472, "learning_rate": 1.6174646932352294e-06, "loss": 0.0332, "step": 29965 }, { "epoch": 3.5534210838373057, "grad_norm": 0.6911970843534968, "learning_rate": 1.6166154283245417e-06, "loss": 0.0253, "step": 29966 }, { "epoch": 3.553539665599431, "grad_norm": 0.6176333771047589, "learning_rate": 1.6157663789799854e-06, "loss": 0.0427, "step": 29967 }, { "epoch": 3.5536582473615557, "grad_norm": 0.3478029549539082, "learning_rate": 1.6149175452093933e-06, "loss": 0.0191, "step": 29968 }, { "epoch": 3.553776829123681, "grad_norm": 0.6018647652046477, "learning_rate": 1.6140689270205788e-06, "loss": 0.0256, "step": 29969 }, { "epoch": 3.5538954108858056, "grad_norm": 1.062596683589643, "learning_rate": 1.6132205244213716e-06, "loss": 0.0531, "step": 29970 }, { "epoch": 3.554013992647931, "grad_norm": 0.5785545681191002, "learning_rate": 1.6123723374195932e-06, "loss": 0.0222, "step": 29971 }, { "epoch": 3.5541325744100556, "grad_norm": 1.135298567295015, "learning_rate": 1.6115243660230622e-06, "loss": 0.064, "step": 29972 }, { "epoch": 3.554251156172181, "grad_norm": 0.5735655818436343, "learning_rate": 1.6106766102395949e-06, "loss": 0.0329, "step": 29973 }, { "epoch": 3.5543697379343056, "grad_norm": 0.20924832487709397, "learning_rate": 1.609829070077007e-06, "loss": 0.0116, "step": 29974 }, { "epoch": 3.5544883196964308, "grad_norm": 0.5269674014941343, "learning_rate": 1.6089817455431172e-06, "loss": 0.0242, "step": 29975 }, { "epoch": 3.5546069014585555, "grad_norm": 0.5618217938536881, "learning_rate": 1.608134636645728e-06, "loss": 0.0273, "step": 29976 }, { "epoch": 3.5547254832206807, "grad_norm": 0.8166111997006785, "learning_rate": 1.607287743392652e-06, "loss": 0.0554, "step": 29977 }, { "epoch": 3.554844064982806, "grad_norm": 0.37541114300031697, "learning_rate": 1.6064410657916946e-06, "loss": 0.0131, "step": 29978 }, { "epoch": 3.5549626467449307, "grad_norm": 0.3466812415782502, "learning_rate": 1.6055946038506658e-06, "loss": 0.0126, "step": 29979 }, { "epoch": 3.5550812285070554, "grad_norm": 0.35927746607831496, "learning_rate": 1.604748357577371e-06, "loss": 0.0153, "step": 29980 }, { "epoch": 3.5551998102691806, "grad_norm": 0.22567532861973727, "learning_rate": 1.6039023269795978e-06, "loss": 0.0113, "step": 29981 }, { "epoch": 3.555318392031306, "grad_norm": 0.8692775933358761, "learning_rate": 1.6030565120651654e-06, "loss": 0.0349, "step": 29982 }, { "epoch": 3.5554369737934306, "grad_norm": 0.46116516600696195, "learning_rate": 1.6022109128418534e-06, "loss": 0.0272, "step": 29983 }, { "epoch": 3.5555555555555554, "grad_norm": 0.5383122555697191, "learning_rate": 1.6013655293174722e-06, "loss": 0.0227, "step": 29984 }, { "epoch": 3.5556741373176806, "grad_norm": 0.7516918401642313, "learning_rate": 1.6005203614997965e-06, "loss": 0.0512, "step": 29985 }, { "epoch": 3.5557927190798058, "grad_norm": 0.4724607816892919, "learning_rate": 1.5996754093966365e-06, "loss": 0.0272, "step": 29986 }, { "epoch": 3.5559113008419305, "grad_norm": 0.7915142616075803, "learning_rate": 1.598830673015772e-06, "loss": 0.0354, "step": 29987 }, { "epoch": 3.5560298826040553, "grad_norm": 0.329079008806292, "learning_rate": 1.5979861523649913e-06, "loss": 0.014, "step": 29988 }, { "epoch": 3.5561484643661805, "grad_norm": 0.5377450554898543, "learning_rate": 1.5971418474520827e-06, "loss": 0.0203, "step": 29989 }, { "epoch": 3.5562670461283057, "grad_norm": 0.4814357541977004, "learning_rate": 1.596297758284826e-06, "loss": 0.0232, "step": 29990 }, { "epoch": 3.5563856278904304, "grad_norm": 0.4106003903344417, "learning_rate": 1.5954538848710065e-06, "loss": 0.0256, "step": 29991 }, { "epoch": 3.556504209652555, "grad_norm": 0.807240381865862, "learning_rate": 1.594610227218396e-06, "loss": 0.0417, "step": 29992 }, { "epoch": 3.5566227914146804, "grad_norm": 0.568221777996751, "learning_rate": 1.5937667853347853e-06, "loss": 0.0306, "step": 29993 }, { "epoch": 3.5567413731768056, "grad_norm": 0.3165360637766609, "learning_rate": 1.5929235592279379e-06, "loss": 0.0152, "step": 29994 }, { "epoch": 3.5568599549389304, "grad_norm": 0.5255532261684843, "learning_rate": 1.5920805489056361e-06, "loss": 0.0237, "step": 29995 }, { "epoch": 3.556978536701055, "grad_norm": 0.45128055420581636, "learning_rate": 1.591237754375638e-06, "loss": 0.0245, "step": 29996 }, { "epoch": 3.5570971184631803, "grad_norm": 0.5010781763217002, "learning_rate": 1.5903951756457314e-06, "loss": 0.0208, "step": 29997 }, { "epoch": 3.5572157002253055, "grad_norm": 0.5525397571463377, "learning_rate": 1.5895528127236687e-06, "loss": 0.0311, "step": 29998 }, { "epoch": 3.5573342819874303, "grad_norm": 0.5243811881358227, "learning_rate": 1.588710665617224e-06, "loss": 0.0233, "step": 29999 }, { "epoch": 3.557452863749555, "grad_norm": 0.3692116089041567, "learning_rate": 1.5878687343341553e-06, "loss": 0.0206, "step": 30000 }, { "epoch": 3.5575714455116803, "grad_norm": 0.35217078212237884, "learning_rate": 1.5870270188822284e-06, "loss": 0.0127, "step": 30001 }, { "epoch": 3.5576900272738055, "grad_norm": 0.5238778502767756, "learning_rate": 1.5861855192692066e-06, "loss": 0.0277, "step": 30002 }, { "epoch": 3.55780860903593, "grad_norm": 0.34041688069371806, "learning_rate": 1.5853442355028336e-06, "loss": 0.0148, "step": 30003 }, { "epoch": 3.5579271907980554, "grad_norm": 0.5133758415713613, "learning_rate": 1.584503167590881e-06, "loss": 0.0236, "step": 30004 }, { "epoch": 3.55804577256018, "grad_norm": 0.4645220097703041, "learning_rate": 1.5836623155410901e-06, "loss": 0.0195, "step": 30005 }, { "epoch": 3.5581643543223054, "grad_norm": 0.8164833269348722, "learning_rate": 1.5828216793612239e-06, "loss": 0.0316, "step": 30006 }, { "epoch": 3.55828293608443, "grad_norm": 0.4694432006666502, "learning_rate": 1.5819812590590178e-06, "loss": 0.0202, "step": 30007 }, { "epoch": 3.5584015178465553, "grad_norm": 0.2659830114618431, "learning_rate": 1.581141054642235e-06, "loss": 0.0112, "step": 30008 }, { "epoch": 3.55852009960868, "grad_norm": 0.4586838689754872, "learning_rate": 1.5803010661186085e-06, "loss": 0.0222, "step": 30009 }, { "epoch": 3.5586386813708053, "grad_norm": 0.5397377508827675, "learning_rate": 1.5794612934958874e-06, "loss": 0.0178, "step": 30010 }, { "epoch": 3.55875726313293, "grad_norm": 0.5392763788022126, "learning_rate": 1.578621736781813e-06, "loss": 0.0244, "step": 30011 }, { "epoch": 3.5588758448950553, "grad_norm": 0.4140569400246805, "learning_rate": 1.577782395984126e-06, "loss": 0.0194, "step": 30012 }, { "epoch": 3.55899442665718, "grad_norm": 0.9388399589066896, "learning_rate": 1.5769432711105675e-06, "loss": 0.0242, "step": 30013 }, { "epoch": 3.5591130084193052, "grad_norm": 0.4058999737148781, "learning_rate": 1.5761043621688592e-06, "loss": 0.0176, "step": 30014 }, { "epoch": 3.55923159018143, "grad_norm": 0.3546123218547797, "learning_rate": 1.5752656691667533e-06, "loss": 0.0175, "step": 30015 }, { "epoch": 3.559350171943555, "grad_norm": 0.5312485239492029, "learning_rate": 1.5744271921119685e-06, "loss": 0.0248, "step": 30016 }, { "epoch": 3.55946875370568, "grad_norm": 0.5465577417705304, "learning_rate": 1.573588931012243e-06, "loss": 0.0224, "step": 30017 }, { "epoch": 3.559587335467805, "grad_norm": 0.7322253658273026, "learning_rate": 1.5727508858752904e-06, "loss": 0.0354, "step": 30018 }, { "epoch": 3.55970591722993, "grad_norm": 0.38806517777266897, "learning_rate": 1.571913056708857e-06, "loss": 0.0205, "step": 30019 }, { "epoch": 3.559824498992055, "grad_norm": 0.7409219957793765, "learning_rate": 1.5710754435206477e-06, "loss": 0.0454, "step": 30020 }, { "epoch": 3.55994308075418, "grad_norm": 0.7082368068433967, "learning_rate": 1.5702380463183952e-06, "loss": 0.0373, "step": 30021 }, { "epoch": 3.560061662516305, "grad_norm": 0.4963651393255425, "learning_rate": 1.5694008651098185e-06, "loss": 0.0208, "step": 30022 }, { "epoch": 3.56018024427843, "grad_norm": 0.48267698631423234, "learning_rate": 1.5685638999026308e-06, "loss": 0.0203, "step": 30023 }, { "epoch": 3.560298826040555, "grad_norm": 0.37701542961131057, "learning_rate": 1.5677271507045538e-06, "loss": 0.0146, "step": 30024 }, { "epoch": 3.56041740780268, "grad_norm": 0.4920119277417836, "learning_rate": 1.566890617523295e-06, "loss": 0.0256, "step": 30025 }, { "epoch": 3.560535989564805, "grad_norm": 0.5556869350704297, "learning_rate": 1.5660543003665679e-06, "loss": 0.0248, "step": 30026 }, { "epoch": 3.56065457132693, "grad_norm": 0.6174871403036227, "learning_rate": 1.5652181992420855e-06, "loss": 0.0229, "step": 30027 }, { "epoch": 3.560773153089055, "grad_norm": 0.4864580860819047, "learning_rate": 1.564382314157556e-06, "loss": 0.0287, "step": 30028 }, { "epoch": 3.5608917348511797, "grad_norm": 0.45340356659461445, "learning_rate": 1.5635466451206754e-06, "loss": 0.0219, "step": 30029 }, { "epoch": 3.561010316613305, "grad_norm": 0.5582507038640341, "learning_rate": 1.5627111921391629e-06, "loss": 0.0309, "step": 30030 }, { "epoch": 3.56112889837543, "grad_norm": 0.43234621698050907, "learning_rate": 1.5618759552207096e-06, "loss": 0.0212, "step": 30031 }, { "epoch": 3.561247480137555, "grad_norm": 0.7471332561130336, "learning_rate": 1.5610409343730176e-06, "loss": 0.0226, "step": 30032 }, { "epoch": 3.5613660618996796, "grad_norm": 0.3845422930992347, "learning_rate": 1.5602061296037863e-06, "loss": 0.0152, "step": 30033 }, { "epoch": 3.561484643661805, "grad_norm": 0.49760888615140814, "learning_rate": 1.5593715409207093e-06, "loss": 0.0258, "step": 30034 }, { "epoch": 3.56160322542393, "grad_norm": 0.5130249489254282, "learning_rate": 1.5585371683314865e-06, "loss": 0.032, "step": 30035 }, { "epoch": 3.561721807186055, "grad_norm": 0.4225825107885747, "learning_rate": 1.5577030118438002e-06, "loss": 0.0152, "step": 30036 }, { "epoch": 3.5618403889481796, "grad_norm": 0.8256545379696574, "learning_rate": 1.5568690714653472e-06, "loss": 0.0401, "step": 30037 }, { "epoch": 3.5619589707103048, "grad_norm": 0.5560339326913208, "learning_rate": 1.556035347203813e-06, "loss": 0.0231, "step": 30038 }, { "epoch": 3.56207755247243, "grad_norm": 0.6385053514559972, "learning_rate": 1.5552018390668832e-06, "loss": 0.036, "step": 30039 }, { "epoch": 3.5621961342345547, "grad_norm": 0.435340152351553, "learning_rate": 1.554368547062246e-06, "loss": 0.0197, "step": 30040 }, { "epoch": 3.5623147159966795, "grad_norm": 0.4090590185530221, "learning_rate": 1.553535471197573e-06, "loss": 0.0165, "step": 30041 }, { "epoch": 3.5624332977588047, "grad_norm": 0.9694452014596588, "learning_rate": 1.5527026114805582e-06, "loss": 0.0294, "step": 30042 }, { "epoch": 3.56255187952093, "grad_norm": 0.7191129483617982, "learning_rate": 1.5518699679188676e-06, "loss": 0.0223, "step": 30043 }, { "epoch": 3.5626704612830546, "grad_norm": 0.502164659818671, "learning_rate": 1.5510375405201838e-06, "loss": 0.0237, "step": 30044 }, { "epoch": 3.5627890430451794, "grad_norm": 0.5763228980028721, "learning_rate": 1.5502053292921787e-06, "loss": 0.0273, "step": 30045 }, { "epoch": 3.5629076248073046, "grad_norm": 0.5189930726253942, "learning_rate": 1.5493733342425265e-06, "loss": 0.0253, "step": 30046 }, { "epoch": 3.56302620656943, "grad_norm": 0.5882753359327298, "learning_rate": 1.5485415553788935e-06, "loss": 0.0236, "step": 30047 }, { "epoch": 3.5631447883315546, "grad_norm": 0.6101135964047424, "learning_rate": 1.5477099927089484e-06, "loss": 0.0433, "step": 30048 }, { "epoch": 3.5632633700936793, "grad_norm": 0.7599953128610448, "learning_rate": 1.5468786462403572e-06, "loss": 0.0284, "step": 30049 }, { "epoch": 3.5633819518558045, "grad_norm": 0.7518660190047474, "learning_rate": 1.5460475159807864e-06, "loss": 0.0248, "step": 30050 }, { "epoch": 3.5635005336179297, "grad_norm": 0.4318573214262153, "learning_rate": 1.5452166019378989e-06, "loss": 0.0236, "step": 30051 }, { "epoch": 3.5636191153800545, "grad_norm": 0.5105526416540964, "learning_rate": 1.5443859041193443e-06, "loss": 0.0239, "step": 30052 }, { "epoch": 3.5637376971421797, "grad_norm": 0.584251032917032, "learning_rate": 1.5435554225327969e-06, "loss": 0.0238, "step": 30053 }, { "epoch": 3.5638562789043045, "grad_norm": 0.5910906587720097, "learning_rate": 1.5427251571859008e-06, "loss": 0.0269, "step": 30054 }, { "epoch": 3.5639748606664297, "grad_norm": 0.4586741574125221, "learning_rate": 1.5418951080863165e-06, "loss": 0.0161, "step": 30055 }, { "epoch": 3.5640934424285544, "grad_norm": 0.5220581214778582, "learning_rate": 1.5410652752416876e-06, "loss": 0.0273, "step": 30056 }, { "epoch": 3.5642120241906796, "grad_norm": 0.5628344719363101, "learning_rate": 1.540235658659675e-06, "loss": 0.0273, "step": 30057 }, { "epoch": 3.5643306059528044, "grad_norm": 0.5342029969866551, "learning_rate": 1.539406258347917e-06, "loss": 0.0246, "step": 30058 }, { "epoch": 3.5644491877149296, "grad_norm": 0.557613239837276, "learning_rate": 1.5385770743140654e-06, "loss": 0.0394, "step": 30059 }, { "epoch": 3.5645677694770543, "grad_norm": 0.4546104611622254, "learning_rate": 1.5377481065657646e-06, "loss": 0.0256, "step": 30060 }, { "epoch": 3.5646863512391795, "grad_norm": 0.33253951876902177, "learning_rate": 1.5369193551106526e-06, "loss": 0.0131, "step": 30061 }, { "epoch": 3.5648049330013043, "grad_norm": 0.36376880000087486, "learning_rate": 1.5360908199563763e-06, "loss": 0.0137, "step": 30062 }, { "epoch": 3.5649235147634295, "grad_norm": 0.9307190055754025, "learning_rate": 1.5352625011105598e-06, "loss": 0.0366, "step": 30063 }, { "epoch": 3.5650420965255543, "grad_norm": 0.4372689487175602, "learning_rate": 1.5344343985808585e-06, "loss": 0.0163, "step": 30064 }, { "epoch": 3.5651606782876795, "grad_norm": 0.4242021130486182, "learning_rate": 1.533606512374891e-06, "loss": 0.0189, "step": 30065 }, { "epoch": 3.565279260049804, "grad_norm": 0.5302353913096258, "learning_rate": 1.5327788425003014e-06, "loss": 0.0277, "step": 30066 }, { "epoch": 3.5653978418119294, "grad_norm": 0.4554240038071623, "learning_rate": 1.531951388964703e-06, "loss": 0.021, "step": 30067 }, { "epoch": 3.565516423574054, "grad_norm": 0.3135853347379759, "learning_rate": 1.5311241517757453e-06, "loss": 0.0175, "step": 30068 }, { "epoch": 3.5656350053361794, "grad_norm": 0.48671369992284946, "learning_rate": 1.5302971309410358e-06, "loss": 0.0253, "step": 30069 }, { "epoch": 3.565753587098304, "grad_norm": 0.7548377620477537, "learning_rate": 1.5294703264682102e-06, "loss": 0.0372, "step": 30070 }, { "epoch": 3.5658721688604293, "grad_norm": 0.5240169255414604, "learning_rate": 1.528643738364885e-06, "loss": 0.0287, "step": 30071 }, { "epoch": 3.565990750622554, "grad_norm": 0.5095023296783273, "learning_rate": 1.5278173666386814e-06, "loss": 0.0177, "step": 30072 }, { "epoch": 3.5661093323846793, "grad_norm": 0.46254119973484825, "learning_rate": 1.5269912112972213e-06, "loss": 0.0191, "step": 30073 }, { "epoch": 3.566227914146804, "grad_norm": 0.7684736664760576, "learning_rate": 1.5261652723481125e-06, "loss": 0.0481, "step": 30074 }, { "epoch": 3.5663464959089293, "grad_norm": 0.28601561381516194, "learning_rate": 1.525339549798982e-06, "loss": 0.0107, "step": 30075 }, { "epoch": 3.566465077671054, "grad_norm": 0.41611206116061633, "learning_rate": 1.5245140436574267e-06, "loss": 0.0159, "step": 30076 }, { "epoch": 3.5665836594331792, "grad_norm": 0.8008433733582215, "learning_rate": 1.5236887539310712e-06, "loss": 0.0326, "step": 30077 }, { "epoch": 3.566702241195304, "grad_norm": 0.35424824213375455, "learning_rate": 1.5228636806275094e-06, "loss": 0.0137, "step": 30078 }, { "epoch": 3.566820822957429, "grad_norm": 0.9087045971112346, "learning_rate": 1.5220388237543626e-06, "loss": 0.0399, "step": 30079 }, { "epoch": 3.5669394047195544, "grad_norm": 0.5869437234910722, "learning_rate": 1.5212141833192222e-06, "loss": 0.0313, "step": 30080 }, { "epoch": 3.567057986481679, "grad_norm": 0.44236554207531187, "learning_rate": 1.5203897593296962e-06, "loss": 0.023, "step": 30081 }, { "epoch": 3.567176568243804, "grad_norm": 0.26767154018734, "learning_rate": 1.5195655517933837e-06, "loss": 0.0074, "step": 30082 }, { "epoch": 3.567295150005929, "grad_norm": 0.6405047289384412, "learning_rate": 1.5187415607178845e-06, "loss": 0.0277, "step": 30083 }, { "epoch": 3.5674137317680543, "grad_norm": 0.6072315629008704, "learning_rate": 1.5179177861107951e-06, "loss": 0.0233, "step": 30084 }, { "epoch": 3.567532313530179, "grad_norm": 0.310058871217455, "learning_rate": 1.5170942279797069e-06, "loss": 0.0145, "step": 30085 }, { "epoch": 3.567650895292304, "grad_norm": 0.44351874196828783, "learning_rate": 1.5162708863322106e-06, "loss": 0.0182, "step": 30086 }, { "epoch": 3.567769477054429, "grad_norm": 0.6265170292991432, "learning_rate": 1.5154477611759004e-06, "loss": 0.0245, "step": 30087 }, { "epoch": 3.5678880588165542, "grad_norm": 0.6072798526011636, "learning_rate": 1.5146248525183622e-06, "loss": 0.0331, "step": 30088 }, { "epoch": 3.568006640578679, "grad_norm": 0.4599697646723456, "learning_rate": 1.5138021603671837e-06, "loss": 0.0244, "step": 30089 }, { "epoch": 3.5681252223408038, "grad_norm": 0.5250927423579496, "learning_rate": 1.5129796847299482e-06, "loss": 0.0219, "step": 30090 }, { "epoch": 3.568243804102929, "grad_norm": 0.39220686301185403, "learning_rate": 1.512157425614244e-06, "loss": 0.0145, "step": 30091 }, { "epoch": 3.568362385865054, "grad_norm": 0.2997802290036812, "learning_rate": 1.51133538302764e-06, "loss": 0.0096, "step": 30092 }, { "epoch": 3.568480967627179, "grad_norm": 0.40040458714869887, "learning_rate": 1.5105135569777217e-06, "loss": 0.0214, "step": 30093 }, { "epoch": 3.5685995493893037, "grad_norm": 0.7387450876434745, "learning_rate": 1.509691947472061e-06, "loss": 0.0285, "step": 30094 }, { "epoch": 3.568718131151429, "grad_norm": 0.5098867407258862, "learning_rate": 1.5088705545182408e-06, "loss": 0.0218, "step": 30095 }, { "epoch": 3.568836712913554, "grad_norm": 0.39273462304362505, "learning_rate": 1.5080493781238241e-06, "loss": 0.0205, "step": 30096 }, { "epoch": 3.568955294675679, "grad_norm": 0.8524243710150383, "learning_rate": 1.507228418296383e-06, "loss": 0.0262, "step": 30097 }, { "epoch": 3.5690738764378036, "grad_norm": 0.42771895411959926, "learning_rate": 1.506407675043489e-06, "loss": 0.0169, "step": 30098 }, { "epoch": 3.569192458199929, "grad_norm": 0.3865740886145615, "learning_rate": 1.505587148372703e-06, "loss": 0.0197, "step": 30099 }, { "epoch": 3.569311039962054, "grad_norm": 0.42565484679495663, "learning_rate": 1.5047668382915963e-06, "loss": 0.0121, "step": 30100 }, { "epoch": 3.5694296217241788, "grad_norm": 0.7512441249659887, "learning_rate": 1.5039467448077243e-06, "loss": 0.0381, "step": 30101 }, { "epoch": 3.569548203486304, "grad_norm": 0.3594417272262605, "learning_rate": 1.5031268679286558e-06, "loss": 0.0162, "step": 30102 }, { "epoch": 3.5696667852484287, "grad_norm": 0.480056404015315, "learning_rate": 1.5023072076619404e-06, "loss": 0.021, "step": 30103 }, { "epoch": 3.569785367010554, "grad_norm": 0.5793711386099695, "learning_rate": 1.5014877640151386e-06, "loss": 0.0272, "step": 30104 }, { "epoch": 3.5699039487726787, "grad_norm": 0.3908351447146127, "learning_rate": 1.500668536995803e-06, "loss": 0.026, "step": 30105 }, { "epoch": 3.570022530534804, "grad_norm": 0.46288861116315444, "learning_rate": 1.499849526611491e-06, "loss": 0.0184, "step": 30106 }, { "epoch": 3.5701411122969287, "grad_norm": 0.6686129818432112, "learning_rate": 1.4990307328697439e-06, "loss": 0.0278, "step": 30107 }, { "epoch": 3.570259694059054, "grad_norm": 0.48582555044203174, "learning_rate": 1.4982121557781142e-06, "loss": 0.0183, "step": 30108 }, { "epoch": 3.5703782758211786, "grad_norm": 0.345321718238118, "learning_rate": 1.4973937953441513e-06, "loss": 0.0156, "step": 30109 }, { "epoch": 3.570496857583304, "grad_norm": 0.7227905898507918, "learning_rate": 1.4965756515753938e-06, "loss": 0.0338, "step": 30110 }, { "epoch": 3.5706154393454286, "grad_norm": 0.5009105534581844, "learning_rate": 1.4957577244793907e-06, "loss": 0.0253, "step": 30111 }, { "epoch": 3.570734021107554, "grad_norm": 0.525567375141117, "learning_rate": 1.4949400140636727e-06, "loss": 0.0241, "step": 30112 }, { "epoch": 3.5708526028696785, "grad_norm": 0.8812447370347065, "learning_rate": 1.4941225203357918e-06, "loss": 0.0382, "step": 30113 }, { "epoch": 3.5709711846318037, "grad_norm": 0.8522853630355219, "learning_rate": 1.4933052433032723e-06, "loss": 0.0439, "step": 30114 }, { "epoch": 3.5710897663939285, "grad_norm": 1.0313134683296077, "learning_rate": 1.4924881829736504e-06, "loss": 0.0453, "step": 30115 }, { "epoch": 3.5712083481560537, "grad_norm": 0.5798696775410367, "learning_rate": 1.491671339354464e-06, "loss": 0.0252, "step": 30116 }, { "epoch": 3.5713269299181785, "grad_norm": 0.5911808416844393, "learning_rate": 1.4908547124532408e-06, "loss": 0.0223, "step": 30117 }, { "epoch": 3.5714455116803037, "grad_norm": 0.5835053489910308, "learning_rate": 1.4900383022775076e-06, "loss": 0.0247, "step": 30118 }, { "epoch": 3.5715640934424284, "grad_norm": 0.5183484702422192, "learning_rate": 1.4892221088347897e-06, "loss": 0.0237, "step": 30119 }, { "epoch": 3.5716826752045536, "grad_norm": 0.4117125617169833, "learning_rate": 1.4884061321326136e-06, "loss": 0.0151, "step": 30120 }, { "epoch": 3.5718012569666784, "grad_norm": 0.3297757490748373, "learning_rate": 1.487590372178499e-06, "loss": 0.0097, "step": 30121 }, { "epoch": 3.5719198387288036, "grad_norm": 0.5356220423522735, "learning_rate": 1.4867748289799755e-06, "loss": 0.0245, "step": 30122 }, { "epoch": 3.5720384204909283, "grad_norm": 0.41454064347213443, "learning_rate": 1.4859595025445455e-06, "loss": 0.027, "step": 30123 }, { "epoch": 3.5721570022530535, "grad_norm": 0.4739533544103678, "learning_rate": 1.4851443928797393e-06, "loss": 0.0286, "step": 30124 }, { "epoch": 3.5722755840151783, "grad_norm": 0.45275413877596293, "learning_rate": 1.4843294999930645e-06, "loss": 0.0153, "step": 30125 }, { "epoch": 3.5723941657773035, "grad_norm": 0.48844293020472607, "learning_rate": 1.4835148238920377e-06, "loss": 0.0264, "step": 30126 }, { "epoch": 3.5725127475394283, "grad_norm": 0.34613397956225567, "learning_rate": 1.4827003645841608e-06, "loss": 0.0143, "step": 30127 }, { "epoch": 3.5726313293015535, "grad_norm": 0.6760761093402609, "learning_rate": 1.4818861220769532e-06, "loss": 0.0369, "step": 30128 }, { "epoch": 3.5727499110636787, "grad_norm": 0.5572199007616827, "learning_rate": 1.481072096377914e-06, "loss": 0.0222, "step": 30129 }, { "epoch": 3.5728684928258034, "grad_norm": 0.49660146981027625, "learning_rate": 1.4802582874945487e-06, "loss": 0.0224, "step": 30130 }, { "epoch": 3.572987074587928, "grad_norm": 0.45475625589863317, "learning_rate": 1.4794446954343593e-06, "loss": 0.0215, "step": 30131 }, { "epoch": 3.5731056563500534, "grad_norm": 0.5057242214404188, "learning_rate": 1.4786313202048458e-06, "loss": 0.0243, "step": 30132 }, { "epoch": 3.5732242381121786, "grad_norm": 0.6308129775631055, "learning_rate": 1.4778181618135129e-06, "loss": 0.0279, "step": 30133 }, { "epoch": 3.5733428198743034, "grad_norm": 0.6374663888763795, "learning_rate": 1.4770052202678436e-06, "loss": 0.0318, "step": 30134 }, { "epoch": 3.573461401636428, "grad_norm": 0.4916321295336519, "learning_rate": 1.4761924955753488e-06, "loss": 0.0228, "step": 30135 }, { "epoch": 3.5735799833985533, "grad_norm": 0.3904353696508918, "learning_rate": 1.4753799877435082e-06, "loss": 0.0218, "step": 30136 }, { "epoch": 3.5736985651606785, "grad_norm": 0.5444669308436115, "learning_rate": 1.4745676967798162e-06, "loss": 0.0269, "step": 30137 }, { "epoch": 3.5738171469228033, "grad_norm": 0.5495992000230598, "learning_rate": 1.473755622691761e-06, "loss": 0.0259, "step": 30138 }, { "epoch": 3.573935728684928, "grad_norm": 0.521420353414674, "learning_rate": 1.472943765486831e-06, "loss": 0.0216, "step": 30139 }, { "epoch": 3.5740543104470532, "grad_norm": 0.6099362093522628, "learning_rate": 1.4721321251725095e-06, "loss": 0.0314, "step": 30140 }, { "epoch": 3.5741728922091784, "grad_norm": 0.36488737102495644, "learning_rate": 1.4713207017562764e-06, "loss": 0.016, "step": 30141 }, { "epoch": 3.574291473971303, "grad_norm": 0.6617017931176538, "learning_rate": 1.4705094952456143e-06, "loss": 0.0332, "step": 30142 }, { "epoch": 3.574410055733428, "grad_norm": 0.7460817069033274, "learning_rate": 1.4696985056479985e-06, "loss": 0.042, "step": 30143 }, { "epoch": 3.574528637495553, "grad_norm": 0.664507818082829, "learning_rate": 1.4688877329709138e-06, "loss": 0.0311, "step": 30144 }, { "epoch": 3.5746472192576784, "grad_norm": 1.0108894937967527, "learning_rate": 1.4680771772218187e-06, "loss": 0.0431, "step": 30145 }, { "epoch": 3.574765801019803, "grad_norm": 0.5700142121433461, "learning_rate": 1.4672668384082043e-06, "loss": 0.0368, "step": 30146 }, { "epoch": 3.574884382781928, "grad_norm": 0.34182165738566817, "learning_rate": 1.4664567165375288e-06, "loss": 0.0162, "step": 30147 }, { "epoch": 3.575002964544053, "grad_norm": 0.9671365344356334, "learning_rate": 1.4656468116172605e-06, "loss": 0.0383, "step": 30148 }, { "epoch": 3.5751215463061783, "grad_norm": 0.6167850941511666, "learning_rate": 1.464837123654872e-06, "loss": 0.0338, "step": 30149 }, { "epoch": 3.575240128068303, "grad_norm": 0.5634191159746649, "learning_rate": 1.4640276526578234e-06, "loss": 0.0244, "step": 30150 }, { "epoch": 3.575358709830428, "grad_norm": 0.4786720188286685, "learning_rate": 1.4632183986335811e-06, "loss": 0.0267, "step": 30151 }, { "epoch": 3.575477291592553, "grad_norm": 0.9356357118991877, "learning_rate": 1.4624093615896007e-06, "loss": 0.0434, "step": 30152 }, { "epoch": 3.575595873354678, "grad_norm": 0.3670885514404565, "learning_rate": 1.4616005415333394e-06, "loss": 0.0165, "step": 30153 }, { "epoch": 3.575714455116803, "grad_norm": 0.32157747586531715, "learning_rate": 1.4607919384722584e-06, "loss": 0.0156, "step": 30154 }, { "epoch": 3.575833036878928, "grad_norm": 0.7341527050643404, "learning_rate": 1.4599835524138157e-06, "loss": 0.0419, "step": 30155 }, { "epoch": 3.575951618641053, "grad_norm": 0.5863377848366002, "learning_rate": 1.459175383365452e-06, "loss": 0.0282, "step": 30156 }, { "epoch": 3.576070200403178, "grad_norm": 0.4971885698719047, "learning_rate": 1.458367431334623e-06, "loss": 0.023, "step": 30157 }, { "epoch": 3.576188782165303, "grad_norm": 0.31312107103703557, "learning_rate": 1.457559696328778e-06, "loss": 0.0105, "step": 30158 }, { "epoch": 3.576307363927428, "grad_norm": 0.4588022985015596, "learning_rate": 1.456752178355364e-06, "loss": 0.0214, "step": 30159 }, { "epoch": 3.576425945689553, "grad_norm": 0.6436649723503096, "learning_rate": 1.4559448774218222e-06, "loss": 0.0298, "step": 30160 }, { "epoch": 3.576544527451678, "grad_norm": 0.6078981068915077, "learning_rate": 1.4551377935355993e-06, "loss": 0.0254, "step": 30161 }, { "epoch": 3.576663109213803, "grad_norm": 0.46483401917500616, "learning_rate": 1.4543309267041338e-06, "loss": 0.022, "step": 30162 }, { "epoch": 3.576781690975928, "grad_norm": 0.47836529241323295, "learning_rate": 1.4535242769348644e-06, "loss": 0.0165, "step": 30163 }, { "epoch": 3.5769002727380528, "grad_norm": 0.36808681874271565, "learning_rate": 1.452717844235224e-06, "loss": 0.0148, "step": 30164 }, { "epoch": 3.577018854500178, "grad_norm": 0.6177649321783102, "learning_rate": 1.4519116286126482e-06, "loss": 0.0306, "step": 30165 }, { "epoch": 3.5771374362623027, "grad_norm": 0.6915113028904264, "learning_rate": 1.4511056300745756e-06, "loss": 0.0362, "step": 30166 }, { "epoch": 3.577256018024428, "grad_norm": 0.33442447208556253, "learning_rate": 1.4502998486284281e-06, "loss": 0.0125, "step": 30167 }, { "epoch": 3.5773745997865527, "grad_norm": 0.5994882880616493, "learning_rate": 1.4494942842816383e-06, "loss": 0.0313, "step": 30168 }, { "epoch": 3.577493181548678, "grad_norm": 0.7621724754153495, "learning_rate": 1.4486889370416312e-06, "loss": 0.0262, "step": 30169 }, { "epoch": 3.5776117633108027, "grad_norm": 0.39499502354252297, "learning_rate": 1.4478838069158314e-06, "loss": 0.0159, "step": 30170 }, { "epoch": 3.577730345072928, "grad_norm": 0.6173366072348511, "learning_rate": 1.4470788939116631e-06, "loss": 0.0349, "step": 30171 }, { "epoch": 3.5778489268350526, "grad_norm": 0.55722416533573, "learning_rate": 1.4462741980365402e-06, "loss": 0.0334, "step": 30172 }, { "epoch": 3.577967508597178, "grad_norm": 0.5343985384985563, "learning_rate": 1.4454697192978927e-06, "loss": 0.0267, "step": 30173 }, { "epoch": 3.5780860903593026, "grad_norm": 0.4232612456446319, "learning_rate": 1.444665457703126e-06, "loss": 0.0217, "step": 30174 }, { "epoch": 3.578204672121428, "grad_norm": 0.5796007099746037, "learning_rate": 1.4438614132596562e-06, "loss": 0.0262, "step": 30175 }, { "epoch": 3.5783232538835525, "grad_norm": 0.5292459635485008, "learning_rate": 1.4430575859748995e-06, "loss": 0.0194, "step": 30176 }, { "epoch": 3.5784418356456777, "grad_norm": 0.4033338247622254, "learning_rate": 1.4422539758562698e-06, "loss": 0.0159, "step": 30177 }, { "epoch": 3.578560417407803, "grad_norm": 0.5465657061199971, "learning_rate": 1.4414505829111635e-06, "loss": 0.0276, "step": 30178 }, { "epoch": 3.5786789991699277, "grad_norm": 0.5914487912869928, "learning_rate": 1.4406474071469945e-06, "loss": 0.0327, "step": 30179 }, { "epoch": 3.5787975809320525, "grad_norm": 0.5613148305920457, "learning_rate": 1.4398444485711677e-06, "loss": 0.0261, "step": 30180 }, { "epoch": 3.5789161626941777, "grad_norm": 0.4398068291148326, "learning_rate": 1.43904170719108e-06, "loss": 0.0219, "step": 30181 }, { "epoch": 3.579034744456303, "grad_norm": 0.4713130203991182, "learning_rate": 1.4382391830141423e-06, "loss": 0.0274, "step": 30182 }, { "epoch": 3.5791533262184276, "grad_norm": 0.7047375132792658, "learning_rate": 1.4374368760477374e-06, "loss": 0.0369, "step": 30183 }, { "epoch": 3.5792719079805524, "grad_norm": 0.45968419961745155, "learning_rate": 1.4366347862992763e-06, "loss": 0.0227, "step": 30184 }, { "epoch": 3.5793904897426776, "grad_norm": 0.723586962711405, "learning_rate": 1.4358329137761472e-06, "loss": 0.0331, "step": 30185 }, { "epoch": 3.579509071504803, "grad_norm": 0.5773694103199726, "learning_rate": 1.435031258485739e-06, "loss": 0.0281, "step": 30186 }, { "epoch": 3.5796276532669276, "grad_norm": 0.7330006163560095, "learning_rate": 1.4342298204354454e-06, "loss": 0.0309, "step": 30187 }, { "epoch": 3.5797462350290523, "grad_norm": 0.5705001862591704, "learning_rate": 1.4334285996326553e-06, "loss": 0.0269, "step": 30188 }, { "epoch": 3.5798648167911775, "grad_norm": 0.44102317275747777, "learning_rate": 1.4326275960847596e-06, "loss": 0.0225, "step": 30189 }, { "epoch": 3.5799833985533027, "grad_norm": 0.5429712416916096, "learning_rate": 1.4318268097991277e-06, "loss": 0.0245, "step": 30190 }, { "epoch": 3.5801019803154275, "grad_norm": 0.7053375790836898, "learning_rate": 1.4310262407831593e-06, "loss": 0.038, "step": 30191 }, { "epoch": 3.5802205620775522, "grad_norm": 0.34586628547433695, "learning_rate": 1.4302258890442233e-06, "loss": 0.0164, "step": 30192 }, { "epoch": 3.5803391438396774, "grad_norm": 0.6424959924922977, "learning_rate": 1.4294257545897055e-06, "loss": 0.0266, "step": 30193 }, { "epoch": 3.5804577256018026, "grad_norm": 0.7086249689804694, "learning_rate": 1.4286258374269696e-06, "loss": 0.0248, "step": 30194 }, { "epoch": 3.5805763073639274, "grad_norm": 0.6142599354769825, "learning_rate": 1.4278261375634067e-06, "loss": 0.0253, "step": 30195 }, { "epoch": 3.580694889126052, "grad_norm": 0.4231629030790049, "learning_rate": 1.4270266550063776e-06, "loss": 0.0193, "step": 30196 }, { "epoch": 3.5808134708881774, "grad_norm": 0.4001135938926872, "learning_rate": 1.4262273897632543e-06, "loss": 0.0193, "step": 30197 }, { "epoch": 3.5809320526503026, "grad_norm": 0.466162310101244, "learning_rate": 1.425428341841406e-06, "loss": 0.0185, "step": 30198 }, { "epoch": 3.5810506344124273, "grad_norm": 0.6478625938498463, "learning_rate": 1.4246295112482017e-06, "loss": 0.0326, "step": 30199 }, { "epoch": 3.581169216174552, "grad_norm": 0.2168925023765598, "learning_rate": 1.4238308979910048e-06, "loss": 0.0074, "step": 30200 }, { "epoch": 3.5812877979366773, "grad_norm": 0.5316924361051629, "learning_rate": 1.4230325020771706e-06, "loss": 0.0231, "step": 30201 }, { "epoch": 3.5814063796988025, "grad_norm": 0.3983785733450617, "learning_rate": 1.422234323514071e-06, "loss": 0.0211, "step": 30202 }, { "epoch": 3.5815249614609272, "grad_norm": 0.6979182831063909, "learning_rate": 1.4214363623090531e-06, "loss": 0.0365, "step": 30203 }, { "epoch": 3.5816435432230525, "grad_norm": 0.4654900933798464, "learning_rate": 1.4206386184694832e-06, "loss": 0.0223, "step": 30204 }, { "epoch": 3.581762124985177, "grad_norm": 0.37856064669224276, "learning_rate": 1.4198410920027022e-06, "loss": 0.012, "step": 30205 }, { "epoch": 3.5818807067473024, "grad_norm": 0.36512000626516167, "learning_rate": 1.4190437829160768e-06, "loss": 0.0141, "step": 30206 }, { "epoch": 3.581999288509427, "grad_norm": 0.46344444884277636, "learning_rate": 1.4182466912169513e-06, "loss": 0.024, "step": 30207 }, { "epoch": 3.5821178702715524, "grad_norm": 0.535308525934628, "learning_rate": 1.4174498169126694e-06, "loss": 0.0203, "step": 30208 }, { "epoch": 3.582236452033677, "grad_norm": 0.44902857176668093, "learning_rate": 1.4166531600105837e-06, "loss": 0.0234, "step": 30209 }, { "epoch": 3.5823550337958023, "grad_norm": 0.7627272613485054, "learning_rate": 1.4158567205180328e-06, "loss": 0.0431, "step": 30210 }, { "epoch": 3.582473615557927, "grad_norm": 0.3967597840991206, "learning_rate": 1.4150604984423694e-06, "loss": 0.0158, "step": 30211 }, { "epoch": 3.5825921973200523, "grad_norm": 0.5064453912959849, "learning_rate": 1.4142644937909206e-06, "loss": 0.0243, "step": 30212 }, { "epoch": 3.582710779082177, "grad_norm": 0.7971578625968672, "learning_rate": 1.4134687065710305e-06, "loss": 0.0405, "step": 30213 }, { "epoch": 3.5828293608443023, "grad_norm": 0.6426646970838484, "learning_rate": 1.4126731367900353e-06, "loss": 0.0361, "step": 30214 }, { "epoch": 3.582947942606427, "grad_norm": 0.5192334140661772, "learning_rate": 1.4118777844552733e-06, "loss": 0.0285, "step": 30215 }, { "epoch": 3.583066524368552, "grad_norm": 0.4520274850288314, "learning_rate": 1.4110826495740664e-06, "loss": 0.0132, "step": 30216 }, { "epoch": 3.583185106130677, "grad_norm": 0.5516968868510739, "learning_rate": 1.410287732153756e-06, "loss": 0.0355, "step": 30217 }, { "epoch": 3.583303687892802, "grad_norm": 0.44543517731972326, "learning_rate": 1.409493032201664e-06, "loss": 0.0235, "step": 30218 }, { "epoch": 3.583422269654927, "grad_norm": 0.49807527449970823, "learning_rate": 1.4086985497251154e-06, "loss": 0.0266, "step": 30219 }, { "epoch": 3.583540851417052, "grad_norm": 0.34749079695790547, "learning_rate": 1.4079042847314373e-06, "loss": 0.017, "step": 30220 }, { "epoch": 3.583659433179177, "grad_norm": 0.3475673768913157, "learning_rate": 1.4071102372279516e-06, "loss": 0.0132, "step": 30221 }, { "epoch": 3.583778014941302, "grad_norm": 0.5186616412362917, "learning_rate": 1.4063164072219803e-06, "loss": 0.0209, "step": 30222 }, { "epoch": 3.583896596703427, "grad_norm": 0.5117286897480118, "learning_rate": 1.4055227947208371e-06, "loss": 0.0244, "step": 30223 }, { "epoch": 3.584015178465552, "grad_norm": 0.4980679139389545, "learning_rate": 1.4047293997318385e-06, "loss": 0.02, "step": 30224 }, { "epoch": 3.584133760227677, "grad_norm": 0.661765244830287, "learning_rate": 1.4039362222623004e-06, "loss": 0.04, "step": 30225 }, { "epoch": 3.584252341989802, "grad_norm": 0.551156126229845, "learning_rate": 1.4031432623195423e-06, "loss": 0.0227, "step": 30226 }, { "epoch": 3.5843709237519272, "grad_norm": 0.6158359095205442, "learning_rate": 1.4023505199108582e-06, "loss": 0.0374, "step": 30227 }, { "epoch": 3.584489505514052, "grad_norm": 0.36961769744529166, "learning_rate": 1.4015579950435676e-06, "loss": 0.018, "step": 30228 }, { "epoch": 3.5846080872761767, "grad_norm": 0.2786572953000568, "learning_rate": 1.4007656877249752e-06, "loss": 0.0134, "step": 30229 }, { "epoch": 3.584726669038302, "grad_norm": 0.5037170955809285, "learning_rate": 1.3999735979623812e-06, "loss": 0.0259, "step": 30230 }, { "epoch": 3.584845250800427, "grad_norm": 0.551496025186819, "learning_rate": 1.3991817257630934e-06, "loss": 0.0221, "step": 30231 }, { "epoch": 3.584963832562552, "grad_norm": 0.4063908852982394, "learning_rate": 1.398390071134406e-06, "loss": 0.0185, "step": 30232 }, { "epoch": 3.5850824143246767, "grad_norm": 0.6892338092446865, "learning_rate": 1.3975986340836245e-06, "loss": 0.0283, "step": 30233 }, { "epoch": 3.585200996086802, "grad_norm": 0.5810069274088375, "learning_rate": 1.396807414618037e-06, "loss": 0.03, "step": 30234 }, { "epoch": 3.585319577848927, "grad_norm": 0.5543781124196173, "learning_rate": 1.396016412744941e-06, "loss": 0.022, "step": 30235 }, { "epoch": 3.585438159611052, "grad_norm": 0.3096828503424397, "learning_rate": 1.3952256284716275e-06, "loss": 0.0121, "step": 30236 }, { "epoch": 3.5855567413731766, "grad_norm": 0.5959748767753301, "learning_rate": 1.3944350618053881e-06, "loss": 0.0356, "step": 30237 }, { "epoch": 3.585675323135302, "grad_norm": 0.3515735812647097, "learning_rate": 1.3936447127535168e-06, "loss": 0.0167, "step": 30238 }, { "epoch": 3.585793904897427, "grad_norm": 0.5147945535823616, "learning_rate": 1.392854581323283e-06, "loss": 0.0195, "step": 30239 }, { "epoch": 3.5859124866595518, "grad_norm": 0.5906359626905819, "learning_rate": 1.392064667521989e-06, "loss": 0.023, "step": 30240 }, { "epoch": 3.5860310684216765, "grad_norm": 0.4557009719596823, "learning_rate": 1.391274971356904e-06, "loss": 0.02, "step": 30241 }, { "epoch": 3.5861496501838017, "grad_norm": 0.2579029641019736, "learning_rate": 1.3904854928353167e-06, "loss": 0.0107, "step": 30242 }, { "epoch": 3.586268231945927, "grad_norm": 0.5847814535002371, "learning_rate": 1.3896962319644963e-06, "loss": 0.0253, "step": 30243 }, { "epoch": 3.5863868137080517, "grad_norm": 0.701551211744982, "learning_rate": 1.3889071887517286e-06, "loss": 0.0293, "step": 30244 }, { "epoch": 3.5865053954701764, "grad_norm": 0.7383910076423557, "learning_rate": 1.38811836320428e-06, "loss": 0.0271, "step": 30245 }, { "epoch": 3.5866239772323016, "grad_norm": 0.4268343022622213, "learning_rate": 1.3873297553294252e-06, "loss": 0.0266, "step": 30246 }, { "epoch": 3.586742558994427, "grad_norm": 0.5227073682562776, "learning_rate": 1.3865413651344362e-06, "loss": 0.0242, "step": 30247 }, { "epoch": 3.5868611407565516, "grad_norm": 0.36801860769607947, "learning_rate": 1.3857531926265765e-06, "loss": 0.0148, "step": 30248 }, { "epoch": 3.5869797225186764, "grad_norm": 0.5748469060298314, "learning_rate": 1.3849652378131185e-06, "loss": 0.0305, "step": 30249 }, { "epoch": 3.5870983042808016, "grad_norm": 0.5021037507456586, "learning_rate": 1.384177500701317e-06, "loss": 0.0225, "step": 30250 }, { "epoch": 3.5872168860429268, "grad_norm": 0.3829667815171854, "learning_rate": 1.3833899812984442e-06, "loss": 0.015, "step": 30251 }, { "epoch": 3.5873354678050515, "grad_norm": 0.6752813856962608, "learning_rate": 1.3826026796117526e-06, "loss": 0.0314, "step": 30252 }, { "epoch": 3.5874540495671767, "grad_norm": 0.30455119211741927, "learning_rate": 1.3818155956485058e-06, "loss": 0.0104, "step": 30253 }, { "epoch": 3.5875726313293015, "grad_norm": 0.5945483359424341, "learning_rate": 1.3810287294159507e-06, "loss": 0.0229, "step": 30254 }, { "epoch": 3.5876912130914267, "grad_norm": 0.4388049565029634, "learning_rate": 1.380242080921354e-06, "loss": 0.0203, "step": 30255 }, { "epoch": 3.5878097948535514, "grad_norm": 0.5985533555925228, "learning_rate": 1.379455650171957e-06, "loss": 0.0168, "step": 30256 }, { "epoch": 3.5879283766156767, "grad_norm": 0.5099580919125285, "learning_rate": 1.378669437175012e-06, "loss": 0.0203, "step": 30257 }, { "epoch": 3.5880469583778014, "grad_norm": 0.5466179328201661, "learning_rate": 1.3778834419377717e-06, "loss": 0.0182, "step": 30258 }, { "epoch": 3.5881655401399266, "grad_norm": 0.6060356876233227, "learning_rate": 1.3770976644674748e-06, "loss": 0.0396, "step": 30259 }, { "epoch": 3.5882841219020514, "grad_norm": 0.46264957350694935, "learning_rate": 1.3763121047713767e-06, "loss": 0.0206, "step": 30260 }, { "epoch": 3.5884027036641766, "grad_norm": 0.32126412114701736, "learning_rate": 1.375526762856702e-06, "loss": 0.0148, "step": 30261 }, { "epoch": 3.5885212854263013, "grad_norm": 0.4013040324725195, "learning_rate": 1.3747416387307089e-06, "loss": 0.0167, "step": 30262 }, { "epoch": 3.5886398671884265, "grad_norm": 0.6185740361497303, "learning_rate": 1.3739567324006219e-06, "loss": 0.0286, "step": 30263 }, { "epoch": 3.5887584489505513, "grad_norm": 0.6351834016981115, "learning_rate": 1.3731720438736856e-06, "loss": 0.0297, "step": 30264 }, { "epoch": 3.5888770307126765, "grad_norm": 0.4081973505923881, "learning_rate": 1.3723875731571218e-06, "loss": 0.0214, "step": 30265 }, { "epoch": 3.5889956124748013, "grad_norm": 0.8295961652404419, "learning_rate": 1.3716033202581802e-06, "loss": 0.042, "step": 30266 }, { "epoch": 3.5891141942369265, "grad_norm": 0.623301873230482, "learning_rate": 1.3708192851840745e-06, "loss": 0.0275, "step": 30267 }, { "epoch": 3.589232775999051, "grad_norm": 0.7107597569985098, "learning_rate": 1.3700354679420406e-06, "loss": 0.0341, "step": 30268 }, { "epoch": 3.5893513577611764, "grad_norm": 0.9026699733918335, "learning_rate": 1.3692518685393036e-06, "loss": 0.0418, "step": 30269 }, { "epoch": 3.589469939523301, "grad_norm": 0.3672656394805999, "learning_rate": 1.368468486983085e-06, "loss": 0.0178, "step": 30270 }, { "epoch": 3.5895885212854264, "grad_norm": 0.7170911884541669, "learning_rate": 1.3676853232806098e-06, "loss": 0.0324, "step": 30271 }, { "epoch": 3.589707103047551, "grad_norm": 0.850512728987101, "learning_rate": 1.3669023774390915e-06, "loss": 0.0328, "step": 30272 }, { "epoch": 3.5898256848096763, "grad_norm": 0.3512914914345569, "learning_rate": 1.3661196494657607e-06, "loss": 0.0173, "step": 30273 }, { "epoch": 3.589944266571801, "grad_norm": 0.42976427924895244, "learning_rate": 1.3653371393678198e-06, "loss": 0.0171, "step": 30274 }, { "epoch": 3.5900628483339263, "grad_norm": 0.4813012950407259, "learning_rate": 1.364554847152491e-06, "loss": 0.0297, "step": 30275 }, { "epoch": 3.590181430096051, "grad_norm": 0.6444139921377733, "learning_rate": 1.3637727728269738e-06, "loss": 0.0282, "step": 30276 }, { "epoch": 3.5903000118581763, "grad_norm": 0.4768329913330175, "learning_rate": 1.3629909163984962e-06, "loss": 0.028, "step": 30277 }, { "epoch": 3.590418593620301, "grad_norm": 0.8180344887343912, "learning_rate": 1.3622092778742546e-06, "loss": 0.0334, "step": 30278 }, { "epoch": 3.5905371753824262, "grad_norm": 0.48057423130767285, "learning_rate": 1.3614278572614547e-06, "loss": 0.0204, "step": 30279 }, { "epoch": 3.5906557571445514, "grad_norm": 0.5289233180412584, "learning_rate": 1.3606466545673047e-06, "loss": 0.0266, "step": 30280 }, { "epoch": 3.590774338906676, "grad_norm": 0.4596876924668516, "learning_rate": 1.3598656697990015e-06, "loss": 0.0151, "step": 30281 }, { "epoch": 3.590892920668801, "grad_norm": 0.7003836063276891, "learning_rate": 1.3590849029637531e-06, "loss": 0.0259, "step": 30282 }, { "epoch": 3.591011502430926, "grad_norm": 0.6483051263953187, "learning_rate": 1.3583043540687457e-06, "loss": 0.0203, "step": 30283 }, { "epoch": 3.5911300841930514, "grad_norm": 0.5417793914587848, "learning_rate": 1.3575240231211818e-06, "loss": 0.0291, "step": 30284 }, { "epoch": 3.591248665955176, "grad_norm": 0.5513085452304264, "learning_rate": 1.3567439101282553e-06, "loss": 0.0231, "step": 30285 }, { "epoch": 3.591367247717301, "grad_norm": 0.5248834160907412, "learning_rate": 1.355964015097158e-06, "loss": 0.0178, "step": 30286 }, { "epoch": 3.591485829479426, "grad_norm": 0.8564798948935965, "learning_rate": 1.3551843380350732e-06, "loss": 0.0386, "step": 30287 }, { "epoch": 3.5916044112415513, "grad_norm": 0.5975209234364856, "learning_rate": 1.3544048789492004e-06, "loss": 0.0315, "step": 30288 }, { "epoch": 3.591722993003676, "grad_norm": 0.4416000909476542, "learning_rate": 1.3536256378467144e-06, "loss": 0.0277, "step": 30289 }, { "epoch": 3.591841574765801, "grad_norm": 0.44688924719015616, "learning_rate": 1.3528466147348012e-06, "loss": 0.0184, "step": 30290 }, { "epoch": 3.591960156527926, "grad_norm": 0.33973487491199045, "learning_rate": 1.3520678096206468e-06, "loss": 0.0207, "step": 30291 }, { "epoch": 3.592078738290051, "grad_norm": 0.6316063789536117, "learning_rate": 1.351289222511426e-06, "loss": 0.0331, "step": 30292 }, { "epoch": 3.592197320052176, "grad_norm": 0.6461935401947546, "learning_rate": 1.3505108534143246e-06, "loss": 0.033, "step": 30293 }, { "epoch": 3.5923159018143007, "grad_norm": 0.6482903230276174, "learning_rate": 1.3497327023365063e-06, "loss": 0.0362, "step": 30294 }, { "epoch": 3.592434483576426, "grad_norm": 0.4549044543607977, "learning_rate": 1.3489547692851518e-06, "loss": 0.0173, "step": 30295 }, { "epoch": 3.592553065338551, "grad_norm": 0.7161529086519499, "learning_rate": 1.3481770542674327e-06, "loss": 0.0314, "step": 30296 }, { "epoch": 3.592671647100676, "grad_norm": 0.5230857802070488, "learning_rate": 1.3473995572905158e-06, "loss": 0.0172, "step": 30297 }, { "epoch": 3.5927902288628006, "grad_norm": 0.4404898678110975, "learning_rate": 1.3466222783615728e-06, "loss": 0.0223, "step": 30298 }, { "epoch": 3.592908810624926, "grad_norm": 0.4363215862809811, "learning_rate": 1.3458452174877623e-06, "loss": 0.0226, "step": 30299 }, { "epoch": 3.593027392387051, "grad_norm": 0.536731332870012, "learning_rate": 1.345068374676256e-06, "loss": 0.0236, "step": 30300 }, { "epoch": 3.593145974149176, "grad_norm": 0.4751426658631583, "learning_rate": 1.3442917499342124e-06, "loss": 0.0256, "step": 30301 }, { "epoch": 3.593264555911301, "grad_norm": 0.3615109810710253, "learning_rate": 1.3435153432687864e-06, "loss": 0.0191, "step": 30302 }, { "epoch": 3.5933831376734258, "grad_norm": 0.6328096156304654, "learning_rate": 1.3427391546871422e-06, "loss": 0.025, "step": 30303 }, { "epoch": 3.593501719435551, "grad_norm": 0.733430964987023, "learning_rate": 1.3419631841964347e-06, "loss": 0.024, "step": 30304 }, { "epoch": 3.5936203011976757, "grad_norm": 0.8292625931294112, "learning_rate": 1.3411874318038114e-06, "loss": 0.0367, "step": 30305 }, { "epoch": 3.593738882959801, "grad_norm": 0.36124375045537555, "learning_rate": 1.3404118975164277e-06, "loss": 0.0147, "step": 30306 }, { "epoch": 3.5938574647219257, "grad_norm": 0.46970033336320577, "learning_rate": 1.339636581341433e-06, "loss": 0.0122, "step": 30307 }, { "epoch": 3.593976046484051, "grad_norm": 0.5995219996728539, "learning_rate": 1.3388614832859775e-06, "loss": 0.027, "step": 30308 }, { "epoch": 3.5940946282461756, "grad_norm": 0.4369893331388282, "learning_rate": 1.3380866033572026e-06, "loss": 0.0143, "step": 30309 }, { "epoch": 3.594213210008301, "grad_norm": 0.363076299049197, "learning_rate": 1.3373119415622498e-06, "loss": 0.0146, "step": 30310 }, { "epoch": 3.5943317917704256, "grad_norm": 0.5685428110441739, "learning_rate": 1.336537497908269e-06, "loss": 0.0293, "step": 30311 }, { "epoch": 3.594450373532551, "grad_norm": 0.5258199248321362, "learning_rate": 1.3357632724023933e-06, "loss": 0.0273, "step": 30312 }, { "epoch": 3.5945689552946756, "grad_norm": 0.5685894308807649, "learning_rate": 1.3349892650517614e-06, "loss": 0.0261, "step": 30313 }, { "epoch": 3.5946875370568008, "grad_norm": 0.4644948564367096, "learning_rate": 1.334215475863504e-06, "loss": 0.0227, "step": 30314 }, { "epoch": 3.5948061188189255, "grad_norm": 0.6987853920549577, "learning_rate": 1.333441904844765e-06, "loss": 0.0329, "step": 30315 }, { "epoch": 3.5949247005810507, "grad_norm": 0.5922993707309314, "learning_rate": 1.3326685520026667e-06, "loss": 0.0219, "step": 30316 }, { "epoch": 3.5950432823431755, "grad_norm": 0.2836174895684248, "learning_rate": 1.3318954173443393e-06, "loss": 0.0156, "step": 30317 }, { "epoch": 3.5951618641053007, "grad_norm": 0.6170543911869771, "learning_rate": 1.3311225008769162e-06, "loss": 0.031, "step": 30318 }, { "epoch": 3.5952804458674255, "grad_norm": 0.5619463692533767, "learning_rate": 1.3303498026075168e-06, "loss": 0.0324, "step": 30319 }, { "epoch": 3.5953990276295507, "grad_norm": 0.604970339439302, "learning_rate": 1.329577322543271e-06, "loss": 0.0276, "step": 30320 }, { "epoch": 3.5955176093916754, "grad_norm": 0.5346071940322767, "learning_rate": 1.3288050606912878e-06, "loss": 0.0255, "step": 30321 }, { "epoch": 3.5956361911538006, "grad_norm": 0.9078898211994952, "learning_rate": 1.3280330170586996e-06, "loss": 0.0634, "step": 30322 }, { "epoch": 3.5957547729159254, "grad_norm": 0.5139273177111484, "learning_rate": 1.3272611916526178e-06, "loss": 0.0222, "step": 30323 }, { "epoch": 3.5958733546780506, "grad_norm": 0.5186386178726711, "learning_rate": 1.3264895844801616e-06, "loss": 0.0184, "step": 30324 }, { "epoch": 3.5959919364401753, "grad_norm": 0.6440691646718099, "learning_rate": 1.3257181955484338e-06, "loss": 0.0265, "step": 30325 }, { "epoch": 3.5961105182023005, "grad_norm": 0.5351225292547905, "learning_rate": 1.324947024864559e-06, "loss": 0.0324, "step": 30326 }, { "epoch": 3.5962290999644253, "grad_norm": 0.5520469391578139, "learning_rate": 1.3241760724356373e-06, "loss": 0.0249, "step": 30327 }, { "epoch": 3.5963476817265505, "grad_norm": 0.3824146972043754, "learning_rate": 1.3234053382687794e-06, "loss": 0.0277, "step": 30328 }, { "epoch": 3.5964662634886757, "grad_norm": 0.5104377800249257, "learning_rate": 1.322634822371091e-06, "loss": 0.0212, "step": 30329 }, { "epoch": 3.5965848452508005, "grad_norm": 0.5681686069516065, "learning_rate": 1.3218645247496719e-06, "loss": 0.0329, "step": 30330 }, { "epoch": 3.5967034270129252, "grad_norm": 0.5088068917585812, "learning_rate": 1.321094445411633e-06, "loss": 0.0251, "step": 30331 }, { "epoch": 3.5968220087750504, "grad_norm": 0.4443175846846231, "learning_rate": 1.3203245843640549e-06, "loss": 0.0185, "step": 30332 }, { "epoch": 3.5969405905371756, "grad_norm": 0.5819513212000545, "learning_rate": 1.3195549416140567e-06, "loss": 0.0292, "step": 30333 }, { "epoch": 3.5970591722993004, "grad_norm": 0.4894368380563326, "learning_rate": 1.318785517168719e-06, "loss": 0.0292, "step": 30334 }, { "epoch": 3.597177754061425, "grad_norm": 0.43151356400347396, "learning_rate": 1.3180163110351418e-06, "loss": 0.0194, "step": 30335 }, { "epoch": 3.5972963358235504, "grad_norm": 0.565229986310172, "learning_rate": 1.3172473232204053e-06, "loss": 0.0251, "step": 30336 }, { "epoch": 3.5974149175856756, "grad_norm": 0.64280787485463, "learning_rate": 1.3164785537316148e-06, "loss": 0.0341, "step": 30337 }, { "epoch": 3.5975334993478003, "grad_norm": 1.0610112371845717, "learning_rate": 1.3157100025758456e-06, "loss": 0.0659, "step": 30338 }, { "epoch": 3.597652081109925, "grad_norm": 0.3217830712227758, "learning_rate": 1.3149416697601892e-06, "loss": 0.0203, "step": 30339 }, { "epoch": 3.5977706628720503, "grad_norm": 0.6945476911684376, "learning_rate": 1.3141735552917228e-06, "loss": 0.048, "step": 30340 }, { "epoch": 3.5978892446341755, "grad_norm": 0.42402235416848116, "learning_rate": 1.313405659177533e-06, "loss": 0.0205, "step": 30341 }, { "epoch": 3.5980078263963002, "grad_norm": 0.6223403490971303, "learning_rate": 1.312637981424697e-06, "loss": 0.0348, "step": 30342 }, { "epoch": 3.598126408158425, "grad_norm": 0.6312144806902849, "learning_rate": 1.311870522040287e-06, "loss": 0.0282, "step": 30343 }, { "epoch": 3.59824498992055, "grad_norm": 0.39917887315445083, "learning_rate": 1.3111032810313895e-06, "loss": 0.0148, "step": 30344 }, { "epoch": 3.5983635716826754, "grad_norm": 0.43679791204966456, "learning_rate": 1.310336258405065e-06, "loss": 0.0208, "step": 30345 }, { "epoch": 3.5984821534448, "grad_norm": 0.4223785377744873, "learning_rate": 1.309569454168391e-06, "loss": 0.0183, "step": 30346 }, { "epoch": 3.598600735206925, "grad_norm": 0.5339585567369409, "learning_rate": 1.3088028683284375e-06, "loss": 0.0228, "step": 30347 }, { "epoch": 3.59871931696905, "grad_norm": 0.5349026896871063, "learning_rate": 1.308036500892268e-06, "loss": 0.0291, "step": 30348 }, { "epoch": 3.5988378987311753, "grad_norm": 0.38992376486664304, "learning_rate": 1.3072703518669517e-06, "loss": 0.0163, "step": 30349 }, { "epoch": 3.5989564804933, "grad_norm": 0.3166618827630165, "learning_rate": 1.3065044212595467e-06, "loss": 0.0103, "step": 30350 }, { "epoch": 3.599075062255425, "grad_norm": 0.5519439832807934, "learning_rate": 1.3057387090771145e-06, "loss": 0.0235, "step": 30351 }, { "epoch": 3.59919364401755, "grad_norm": 0.2754363773447477, "learning_rate": 1.3049732153267158e-06, "loss": 0.0108, "step": 30352 }, { "epoch": 3.5993122257796752, "grad_norm": 0.6629239486298609, "learning_rate": 1.3042079400154117e-06, "loss": 0.0324, "step": 30353 }, { "epoch": 3.5994308075418, "grad_norm": 0.6508274752246452, "learning_rate": 1.3034428831502494e-06, "loss": 0.0289, "step": 30354 }, { "epoch": 3.599549389303925, "grad_norm": 0.4610925372135447, "learning_rate": 1.302678044738284e-06, "loss": 0.0258, "step": 30355 }, { "epoch": 3.59966797106605, "grad_norm": 0.6121515927604324, "learning_rate": 1.301913424786569e-06, "loss": 0.0196, "step": 30356 }, { "epoch": 3.599786552828175, "grad_norm": 0.8106806691639675, "learning_rate": 1.3011490233021506e-06, "loss": 0.0451, "step": 30357 }, { "epoch": 3.5999051345903, "grad_norm": 0.5243810637975386, "learning_rate": 1.3003848402920766e-06, "loss": 0.0266, "step": 30358 }, { "epoch": 3.600023716352425, "grad_norm": 0.6394709802420081, "learning_rate": 1.299620875763391e-06, "loss": 0.0224, "step": 30359 }, { "epoch": 3.60014229811455, "grad_norm": 0.33580994533329256, "learning_rate": 1.298857129723144e-06, "loss": 0.0239, "step": 30360 }, { "epoch": 3.600260879876675, "grad_norm": 0.5589027749905028, "learning_rate": 1.2980936021783629e-06, "loss": 0.0234, "step": 30361 }, { "epoch": 3.6003794616388, "grad_norm": 0.49338318549184335, "learning_rate": 1.2973302931360953e-06, "loss": 0.0204, "step": 30362 }, { "epoch": 3.600498043400925, "grad_norm": 0.48810186423604685, "learning_rate": 1.296567202603377e-06, "loss": 0.0253, "step": 30363 }, { "epoch": 3.60061662516305, "grad_norm": 0.5350445640081319, "learning_rate": 1.2958043305872469e-06, "loss": 0.0288, "step": 30364 }, { "epoch": 3.600735206925175, "grad_norm": 0.37217784609998505, "learning_rate": 1.295041677094727e-06, "loss": 0.0115, "step": 30365 }, { "epoch": 3.6008537886872998, "grad_norm": 0.6657106046679836, "learning_rate": 1.2942792421328536e-06, "loss": 0.0403, "step": 30366 }, { "epoch": 3.600972370449425, "grad_norm": 0.321268975630263, "learning_rate": 1.293517025708657e-06, "loss": 0.014, "step": 30367 }, { "epoch": 3.6010909522115497, "grad_norm": 0.48104765141911393, "learning_rate": 1.2927550278291622e-06, "loss": 0.0286, "step": 30368 }, { "epoch": 3.601209533973675, "grad_norm": 0.4430877217711643, "learning_rate": 1.2919932485013969e-06, "loss": 0.0209, "step": 30369 }, { "epoch": 3.6013281157357997, "grad_norm": 0.8013327441623865, "learning_rate": 1.2912316877323748e-06, "loss": 0.0471, "step": 30370 }, { "epoch": 3.601446697497925, "grad_norm": 0.31631741166454796, "learning_rate": 1.2904703455291295e-06, "loss": 0.0145, "step": 30371 }, { "epoch": 3.6015652792600497, "grad_norm": 0.25634319506295367, "learning_rate": 1.2897092218986717e-06, "loss": 0.0099, "step": 30372 }, { "epoch": 3.601683861022175, "grad_norm": 0.5198574320101339, "learning_rate": 1.2889483168480182e-06, "loss": 0.0196, "step": 30373 }, { "epoch": 3.6018024427842996, "grad_norm": 0.6418413816825265, "learning_rate": 1.2881876303841828e-06, "loss": 0.0353, "step": 30374 }, { "epoch": 3.601921024546425, "grad_norm": 0.668682676236825, "learning_rate": 1.2874271625141848e-06, "loss": 0.0215, "step": 30375 }, { "epoch": 3.6020396063085496, "grad_norm": 1.1730074338915435, "learning_rate": 1.286666913245027e-06, "loss": 0.0513, "step": 30376 }, { "epoch": 3.602158188070675, "grad_norm": 0.5223525170822428, "learning_rate": 1.2859068825837235e-06, "loss": 0.0235, "step": 30377 }, { "epoch": 3.6022767698328, "grad_norm": 0.38527589625829123, "learning_rate": 1.2851470705372737e-06, "loss": 0.0146, "step": 30378 }, { "epoch": 3.6023953515949247, "grad_norm": 0.32756128211569696, "learning_rate": 1.284387477112689e-06, "loss": 0.015, "step": 30379 }, { "epoch": 3.6025139333570495, "grad_norm": 0.36848131010489127, "learning_rate": 1.283628102316975e-06, "loss": 0.0142, "step": 30380 }, { "epoch": 3.6026325151191747, "grad_norm": 0.6730801250251812, "learning_rate": 1.2828689461571175e-06, "loss": 0.0338, "step": 30381 }, { "epoch": 3.6027510968813, "grad_norm": 0.5000134317946437, "learning_rate": 1.2821100086401334e-06, "loss": 0.0282, "step": 30382 }, { "epoch": 3.6028696786434247, "grad_norm": 0.6563473946948736, "learning_rate": 1.2813512897730056e-06, "loss": 0.0325, "step": 30383 }, { "epoch": 3.6029882604055494, "grad_norm": 0.7019319815947362, "learning_rate": 1.2805927895627374e-06, "loss": 0.0292, "step": 30384 }, { "epoch": 3.6031068421676746, "grad_norm": 0.4820497721622835, "learning_rate": 1.2798345080163088e-06, "loss": 0.0227, "step": 30385 }, { "epoch": 3.6032254239298, "grad_norm": 0.725804874823848, "learning_rate": 1.2790764451407256e-06, "loss": 0.0445, "step": 30386 }, { "epoch": 3.6033440056919246, "grad_norm": 0.6120256475185081, "learning_rate": 1.2783186009429654e-06, "loss": 0.0399, "step": 30387 }, { "epoch": 3.6034625874540493, "grad_norm": 0.4866485402412959, "learning_rate": 1.27756097543002e-06, "loss": 0.016, "step": 30388 }, { "epoch": 3.6035811692161746, "grad_norm": 0.3301485330401239, "learning_rate": 1.2768035686088698e-06, "loss": 0.0128, "step": 30389 }, { "epoch": 3.6036997509782998, "grad_norm": 0.6464208810821416, "learning_rate": 1.2760463804865008e-06, "loss": 0.0313, "step": 30390 }, { "epoch": 3.6038183327404245, "grad_norm": 0.6324344825747182, "learning_rate": 1.2752894110698937e-06, "loss": 0.0339, "step": 30391 }, { "epoch": 3.6039369145025493, "grad_norm": 0.3986661118699562, "learning_rate": 1.2745326603660207e-06, "loss": 0.0139, "step": 30392 }, { "epoch": 3.6040554962646745, "grad_norm": 0.44402784142177126, "learning_rate": 1.2737761283818679e-06, "loss": 0.0205, "step": 30393 }, { "epoch": 3.6041740780267997, "grad_norm": 0.36246948879912033, "learning_rate": 1.2730198151244015e-06, "loss": 0.0159, "step": 30394 }, { "epoch": 3.6042926597889244, "grad_norm": 0.37366928246584086, "learning_rate": 1.2722637206005944e-06, "loss": 0.0157, "step": 30395 }, { "epoch": 3.604411241551049, "grad_norm": 0.4221244436643952, "learning_rate": 1.2715078448174212e-06, "loss": 0.024, "step": 30396 }, { "epoch": 3.6045298233131744, "grad_norm": 0.423537985500025, "learning_rate": 1.2707521877818485e-06, "loss": 0.0181, "step": 30397 }, { "epoch": 3.6046484050752996, "grad_norm": 0.42929062556922154, "learning_rate": 1.269996749500843e-06, "loss": 0.0179, "step": 30398 }, { "epoch": 3.6047669868374244, "grad_norm": 0.5462231168804258, "learning_rate": 1.2692415299813659e-06, "loss": 0.03, "step": 30399 }, { "epoch": 3.604885568599549, "grad_norm": 0.33812986254103033, "learning_rate": 1.268486529230381e-06, "loss": 0.0165, "step": 30400 }, { "epoch": 3.6050041503616743, "grad_norm": 0.6806013476541539, "learning_rate": 1.2677317472548466e-06, "loss": 0.0181, "step": 30401 }, { "epoch": 3.6051227321237995, "grad_norm": 0.34146144930760397, "learning_rate": 1.2669771840617296e-06, "loss": 0.0131, "step": 30402 }, { "epoch": 3.6052413138859243, "grad_norm": 0.8588688925463029, "learning_rate": 1.2662228396579712e-06, "loss": 0.0317, "step": 30403 }, { "epoch": 3.6053598956480495, "grad_norm": 0.4873909807391941, "learning_rate": 1.2654687140505412e-06, "loss": 0.0227, "step": 30404 }, { "epoch": 3.6054784774101742, "grad_norm": 0.6580435634417815, "learning_rate": 1.264714807246381e-06, "loss": 0.0192, "step": 30405 }, { "epoch": 3.6055970591722994, "grad_norm": 0.41635077913622104, "learning_rate": 1.2639611192524438e-06, "loss": 0.0159, "step": 30406 }, { "epoch": 3.605715640934424, "grad_norm": 0.7239021554809697, "learning_rate": 1.263207650075679e-06, "loss": 0.032, "step": 30407 }, { "epoch": 3.6058342226965494, "grad_norm": 0.7021823153122966, "learning_rate": 1.2624543997230314e-06, "loss": 0.0336, "step": 30408 }, { "epoch": 3.605952804458674, "grad_norm": 0.7762180045302212, "learning_rate": 1.2617013682014484e-06, "loss": 0.0447, "step": 30409 }, { "epoch": 3.6060713862207994, "grad_norm": 0.681978534315145, "learning_rate": 1.2609485555178657e-06, "loss": 0.0334, "step": 30410 }, { "epoch": 3.606189967982924, "grad_norm": 0.6179031373926082, "learning_rate": 1.2601959616792252e-06, "loss": 0.0279, "step": 30411 }, { "epoch": 3.6063085497450493, "grad_norm": 0.6049850499618598, "learning_rate": 1.2594435866924686e-06, "loss": 0.0316, "step": 30412 }, { "epoch": 3.606427131507174, "grad_norm": 0.6858849181702288, "learning_rate": 1.258691430564532e-06, "loss": 0.0299, "step": 30413 }, { "epoch": 3.6065457132692993, "grad_norm": 0.8692945097271496, "learning_rate": 1.257939493302343e-06, "loss": 0.0438, "step": 30414 }, { "epoch": 3.606664295031424, "grad_norm": 0.44130299498077163, "learning_rate": 1.257187774912838e-06, "loss": 0.0168, "step": 30415 }, { "epoch": 3.6067828767935493, "grad_norm": 0.5552812043547174, "learning_rate": 1.2564362754029447e-06, "loss": 0.0273, "step": 30416 }, { "epoch": 3.606901458555674, "grad_norm": 1.1576392147005896, "learning_rate": 1.2556849947795963e-06, "loss": 0.0365, "step": 30417 }, { "epoch": 3.607020040317799, "grad_norm": 0.6212658092453522, "learning_rate": 1.2549339330497124e-06, "loss": 0.0187, "step": 30418 }, { "epoch": 3.607138622079924, "grad_norm": 0.31304659243795074, "learning_rate": 1.2541830902202178e-06, "loss": 0.0216, "step": 30419 }, { "epoch": 3.607257203842049, "grad_norm": 0.8150061668845272, "learning_rate": 1.2534324662980435e-06, "loss": 0.0355, "step": 30420 }, { "epoch": 3.607375785604174, "grad_norm": 0.34918103365931524, "learning_rate": 1.2526820612900975e-06, "loss": 0.0123, "step": 30421 }, { "epoch": 3.607494367366299, "grad_norm": 0.38071184627512517, "learning_rate": 1.2519318752032993e-06, "loss": 0.0162, "step": 30422 }, { "epoch": 3.607612949128424, "grad_norm": 0.8002619596869618, "learning_rate": 1.2511819080445685e-06, "loss": 0.0307, "step": 30423 }, { "epoch": 3.607731530890549, "grad_norm": 0.7225420620050265, "learning_rate": 1.250432159820822e-06, "loss": 0.0268, "step": 30424 }, { "epoch": 3.607850112652674, "grad_norm": 0.33269416030619986, "learning_rate": 1.2496826305389647e-06, "loss": 0.017, "step": 30425 }, { "epoch": 3.607968694414799, "grad_norm": 0.5883511294944097, "learning_rate": 1.2489333202059084e-06, "loss": 0.0241, "step": 30426 }, { "epoch": 3.6080872761769243, "grad_norm": 0.38571949992598753, "learning_rate": 1.2481842288285612e-06, "loss": 0.0196, "step": 30427 }, { "epoch": 3.608205857939049, "grad_norm": 0.406164929376791, "learning_rate": 1.2474353564138286e-06, "loss": 0.019, "step": 30428 }, { "epoch": 3.608324439701174, "grad_norm": 0.7278512943937971, "learning_rate": 1.2466867029686164e-06, "loss": 0.0226, "step": 30429 }, { "epoch": 3.608443021463299, "grad_norm": 0.46466300722054243, "learning_rate": 1.2459382684998216e-06, "loss": 0.0316, "step": 30430 }, { "epoch": 3.608561603225424, "grad_norm": 0.42822597626595904, "learning_rate": 1.245190053014353e-06, "loss": 0.0173, "step": 30431 }, { "epoch": 3.608680184987549, "grad_norm": 0.6702021911969323, "learning_rate": 1.244442056519096e-06, "loss": 0.0299, "step": 30432 }, { "epoch": 3.6087987667496737, "grad_norm": 0.6178361036093637, "learning_rate": 1.2436942790209543e-06, "loss": 0.0229, "step": 30433 }, { "epoch": 3.608917348511799, "grad_norm": 0.6023694502327289, "learning_rate": 1.2429467205268192e-06, "loss": 0.0191, "step": 30434 }, { "epoch": 3.609035930273924, "grad_norm": 0.6732546401250891, "learning_rate": 1.242199381043585e-06, "loss": 0.0317, "step": 30435 }, { "epoch": 3.609154512036049, "grad_norm": 0.4482966253486542, "learning_rate": 1.2414522605781353e-06, "loss": 0.0186, "step": 30436 }, { "epoch": 3.6092730937981736, "grad_norm": 0.42215151229210157, "learning_rate": 1.240705359137362e-06, "loss": 0.0156, "step": 30437 }, { "epoch": 3.609391675560299, "grad_norm": 0.5008823477166998, "learning_rate": 1.2399586767281506e-06, "loss": 0.0309, "step": 30438 }, { "epoch": 3.609510257322424, "grad_norm": 0.3747924143098685, "learning_rate": 1.2392122133573825e-06, "loss": 0.0227, "step": 30439 }, { "epoch": 3.609628839084549, "grad_norm": 0.6601264566000912, "learning_rate": 1.2384659690319433e-06, "loss": 0.0279, "step": 30440 }, { "epoch": 3.6097474208466735, "grad_norm": 0.5207826689928697, "learning_rate": 1.2377199437587027e-06, "loss": 0.016, "step": 30441 }, { "epoch": 3.6098660026087988, "grad_norm": 0.6466758305028387, "learning_rate": 1.2369741375445527e-06, "loss": 0.0328, "step": 30442 }, { "epoch": 3.609984584370924, "grad_norm": 0.48134773058425706, "learning_rate": 1.2362285503963567e-06, "loss": 0.0204, "step": 30443 }, { "epoch": 3.6101031661330487, "grad_norm": 0.43335346366538474, "learning_rate": 1.2354831823209927e-06, "loss": 0.0211, "step": 30444 }, { "epoch": 3.6102217478951735, "grad_norm": 0.8168127494480738, "learning_rate": 1.2347380333253333e-06, "loss": 0.0394, "step": 30445 }, { "epoch": 3.6103403296572987, "grad_norm": 0.5054991007716786, "learning_rate": 1.233993103416245e-06, "loss": 0.0263, "step": 30446 }, { "epoch": 3.610458911419424, "grad_norm": 0.47710803778487104, "learning_rate": 1.2332483926006e-06, "loss": 0.0231, "step": 30447 }, { "epoch": 3.6105774931815486, "grad_norm": 0.6598110494398781, "learning_rate": 1.232503900885254e-06, "loss": 0.0375, "step": 30448 }, { "epoch": 3.6106960749436734, "grad_norm": 0.4427284590325418, "learning_rate": 1.2317596282770821e-06, "loss": 0.0178, "step": 30449 }, { "epoch": 3.6108146567057986, "grad_norm": 0.4876119335488241, "learning_rate": 1.2310155747829372e-06, "loss": 0.0248, "step": 30450 }, { "epoch": 3.610933238467924, "grad_norm": 0.5196729179444166, "learning_rate": 1.230271740409683e-06, "loss": 0.0284, "step": 30451 }, { "epoch": 3.6110518202300486, "grad_norm": 0.4970340344234, "learning_rate": 1.22952812516417e-06, "loss": 0.0243, "step": 30452 }, { "epoch": 3.6111704019921738, "grad_norm": 0.3638066936963504, "learning_rate": 1.2287847290532644e-06, "loss": 0.0179, "step": 30453 }, { "epoch": 3.6112889837542985, "grad_norm": 0.6469294874298182, "learning_rate": 1.2280415520838113e-06, "loss": 0.027, "step": 30454 }, { "epoch": 3.6114075655164237, "grad_norm": 0.41226554161366635, "learning_rate": 1.227298594262663e-06, "loss": 0.0282, "step": 30455 }, { "epoch": 3.6115261472785485, "grad_norm": 0.4481417158677059, "learning_rate": 1.22655585559667e-06, "loss": 0.0263, "step": 30456 }, { "epoch": 3.6116447290406737, "grad_norm": 0.46952881127907065, "learning_rate": 1.2258133360926792e-06, "loss": 0.0206, "step": 30457 }, { "epoch": 3.6117633108027984, "grad_norm": 0.4184908754745878, "learning_rate": 1.2250710357575384e-06, "loss": 0.015, "step": 30458 }, { "epoch": 3.6118818925649236, "grad_norm": 0.8234643783745917, "learning_rate": 1.2243289545980807e-06, "loss": 0.0381, "step": 30459 }, { "epoch": 3.6120004743270484, "grad_norm": 0.5166817825700368, "learning_rate": 1.2235870926211619e-06, "loss": 0.0199, "step": 30460 }, { "epoch": 3.6121190560891736, "grad_norm": 0.468711878575777, "learning_rate": 1.2228454498336096e-06, "loss": 0.0372, "step": 30461 }, { "epoch": 3.6122376378512984, "grad_norm": 0.6581615728914298, "learning_rate": 1.2221040262422685e-06, "loss": 0.0243, "step": 30462 }, { "epoch": 3.6123562196134236, "grad_norm": 0.5461102216305043, "learning_rate": 1.2213628218539636e-06, "loss": 0.0233, "step": 30463 }, { "epoch": 3.6124748013755483, "grad_norm": 0.3739209886861195, "learning_rate": 1.2206218366755395e-06, "loss": 0.0171, "step": 30464 }, { "epoch": 3.6125933831376735, "grad_norm": 0.34396371297689216, "learning_rate": 1.2198810707138215e-06, "loss": 0.0186, "step": 30465 }, { "epoch": 3.6127119648997983, "grad_norm": 0.8761287295493088, "learning_rate": 1.2191405239756372e-06, "loss": 0.0466, "step": 30466 }, { "epoch": 3.6128305466619235, "grad_norm": 0.5669898972188444, "learning_rate": 1.2184001964678143e-06, "loss": 0.0162, "step": 30467 }, { "epoch": 3.6129491284240483, "grad_norm": 0.9190986103567917, "learning_rate": 1.2176600881971783e-06, "loss": 0.0401, "step": 30468 }, { "epoch": 3.6130677101861735, "grad_norm": 0.8323548286218649, "learning_rate": 1.216920199170557e-06, "loss": 0.0491, "step": 30469 }, { "epoch": 3.613186291948298, "grad_norm": 0.5778218251033751, "learning_rate": 1.2161805293947642e-06, "loss": 0.0333, "step": 30470 }, { "epoch": 3.6133048737104234, "grad_norm": 0.8239101046903867, "learning_rate": 1.2154410788766196e-06, "loss": 0.0482, "step": 30471 }, { "epoch": 3.613423455472548, "grad_norm": 0.5494366479821569, "learning_rate": 1.2147018476229428e-06, "loss": 0.0209, "step": 30472 }, { "epoch": 3.6135420372346734, "grad_norm": 0.4911879246783786, "learning_rate": 1.2139628356405502e-06, "loss": 0.0182, "step": 30473 }, { "epoch": 3.613660618996798, "grad_norm": 0.4901819477112913, "learning_rate": 1.213224042936245e-06, "loss": 0.0159, "step": 30474 }, { "epoch": 3.6137792007589233, "grad_norm": 0.9245787352782184, "learning_rate": 1.2124854695168525e-06, "loss": 0.0218, "step": 30475 }, { "epoch": 3.613897782521048, "grad_norm": 0.3579254956170745, "learning_rate": 1.2117471153891696e-06, "loss": 0.0169, "step": 30476 }, { "epoch": 3.6140163642831733, "grad_norm": 0.6029620638038913, "learning_rate": 1.2110089805600077e-06, "loss": 0.0279, "step": 30477 }, { "epoch": 3.614134946045298, "grad_norm": 0.7405846600802909, "learning_rate": 1.2102710650361698e-06, "loss": 0.0291, "step": 30478 }, { "epoch": 3.6142535278074233, "grad_norm": 0.8555791305751681, "learning_rate": 1.2095333688244614e-06, "loss": 0.0297, "step": 30479 }, { "epoch": 3.6143721095695485, "grad_norm": 0.2967213319644539, "learning_rate": 1.2087958919316854e-06, "loss": 0.0152, "step": 30480 }, { "epoch": 3.6144906913316732, "grad_norm": 0.2822985261447789, "learning_rate": 1.2080586343646312e-06, "loss": 0.0134, "step": 30481 }, { "epoch": 3.614609273093798, "grad_norm": 0.5721537524129262, "learning_rate": 1.2073215961301037e-06, "loss": 0.0236, "step": 30482 }, { "epoch": 3.614727854855923, "grad_norm": 0.3909600738809005, "learning_rate": 1.2065847772348926e-06, "loss": 0.0203, "step": 30483 }, { "epoch": 3.6148464366180484, "grad_norm": 0.47344557321411923, "learning_rate": 1.2058481776857948e-06, "loss": 0.0177, "step": 30484 }, { "epoch": 3.614965018380173, "grad_norm": 0.6201353870455142, "learning_rate": 1.205111797489597e-06, "loss": 0.0279, "step": 30485 }, { "epoch": 3.615083600142298, "grad_norm": 0.38199758190170147, "learning_rate": 1.2043756366530878e-06, "loss": 0.0182, "step": 30486 }, { "epoch": 3.615202181904423, "grad_norm": 0.5436926653009105, "learning_rate": 1.2036396951830564e-06, "loss": 0.0296, "step": 30487 }, { "epoch": 3.6153207636665483, "grad_norm": 0.3786940107728946, "learning_rate": 1.2029039730862835e-06, "loss": 0.0184, "step": 30488 }, { "epoch": 3.615439345428673, "grad_norm": 0.37165092323899124, "learning_rate": 1.202168470369558e-06, "loss": 0.0165, "step": 30489 }, { "epoch": 3.615557927190798, "grad_norm": 0.6534510836237717, "learning_rate": 1.2014331870396522e-06, "loss": 0.0239, "step": 30490 }, { "epoch": 3.615676508952923, "grad_norm": 0.74453196904478, "learning_rate": 1.2006981231033555e-06, "loss": 0.0258, "step": 30491 }, { "epoch": 3.6157950907150482, "grad_norm": 0.3863198686620523, "learning_rate": 1.1999632785674313e-06, "loss": 0.0173, "step": 30492 }, { "epoch": 3.615913672477173, "grad_norm": 0.46563828548063463, "learning_rate": 1.199228653438661e-06, "loss": 0.0229, "step": 30493 }, { "epoch": 3.6160322542392977, "grad_norm": 0.5066160099179331, "learning_rate": 1.1984942477238164e-06, "loss": 0.0232, "step": 30494 }, { "epoch": 3.616150836001423, "grad_norm": 0.4766012794171598, "learning_rate": 1.197760061429673e-06, "loss": 0.0202, "step": 30495 }, { "epoch": 3.616269417763548, "grad_norm": 0.4353777709728239, "learning_rate": 1.197026094562989e-06, "loss": 0.0197, "step": 30496 }, { "epoch": 3.616387999525673, "grad_norm": 0.3689722599407613, "learning_rate": 1.1962923471305343e-06, "loss": 0.0146, "step": 30497 }, { "epoch": 3.6165065812877977, "grad_norm": 0.3134070954676824, "learning_rate": 1.1955588191390727e-06, "loss": 0.0133, "step": 30498 }, { "epoch": 3.616625163049923, "grad_norm": 0.686187453685838, "learning_rate": 1.194825510595371e-06, "loss": 0.0376, "step": 30499 }, { "epoch": 3.616743744812048, "grad_norm": 0.3942578055375481, "learning_rate": 1.1940924215061878e-06, "loss": 0.0193, "step": 30500 }, { "epoch": 3.616862326574173, "grad_norm": 0.7745334121454815, "learning_rate": 1.193359551878273e-06, "loss": 0.0357, "step": 30501 }, { "epoch": 3.616980908336298, "grad_norm": 0.3928216352700221, "learning_rate": 1.1926269017183967e-06, "loss": 0.0186, "step": 30502 }, { "epoch": 3.617099490098423, "grad_norm": 0.30799992766457096, "learning_rate": 1.191894471033303e-06, "loss": 0.016, "step": 30503 }, { "epoch": 3.617218071860548, "grad_norm": 0.8540695337650546, "learning_rate": 1.1911622598297451e-06, "loss": 0.0412, "step": 30504 }, { "epoch": 3.6173366536226728, "grad_norm": 0.28160149388635836, "learning_rate": 1.1904302681144758e-06, "loss": 0.0162, "step": 30505 }, { "epoch": 3.617455235384798, "grad_norm": 0.3524961367654006, "learning_rate": 1.1896984958942425e-06, "loss": 0.0151, "step": 30506 }, { "epoch": 3.6175738171469227, "grad_norm": 0.47186184047554947, "learning_rate": 1.1889669431757956e-06, "loss": 0.0278, "step": 30507 }, { "epoch": 3.617692398909048, "grad_norm": 0.40646423001298093, "learning_rate": 1.1882356099658653e-06, "loss": 0.0156, "step": 30508 }, { "epoch": 3.6178109806711727, "grad_norm": 0.4500759621618194, "learning_rate": 1.1875044962712107e-06, "loss": 0.0257, "step": 30509 }, { "epoch": 3.617929562433298, "grad_norm": 0.3859043607809819, "learning_rate": 1.1867736020985592e-06, "loss": 0.0155, "step": 30510 }, { "epoch": 3.6180481441954226, "grad_norm": 0.5792923737317358, "learning_rate": 1.1860429274546586e-06, "loss": 0.0162, "step": 30511 }, { "epoch": 3.618166725957548, "grad_norm": 0.5343420159760938, "learning_rate": 1.1853124723462311e-06, "loss": 0.017, "step": 30512 }, { "epoch": 3.6182853077196726, "grad_norm": 0.4929647469757851, "learning_rate": 1.184582236780027e-06, "loss": 0.0271, "step": 30513 }, { "epoch": 3.618403889481798, "grad_norm": 0.4977106174379826, "learning_rate": 1.1838522207627683e-06, "loss": 0.0213, "step": 30514 }, { "epoch": 3.6185224712439226, "grad_norm": 0.40538629382586716, "learning_rate": 1.1831224243011835e-06, "loss": 0.0183, "step": 30515 }, { "epoch": 3.6186410530060478, "grad_norm": 0.7841006602948055, "learning_rate": 1.1823928474020057e-06, "loss": 0.0317, "step": 30516 }, { "epoch": 3.6187596347681725, "grad_norm": 0.8172656875040033, "learning_rate": 1.1816634900719603e-06, "loss": 0.0373, "step": 30517 }, { "epoch": 3.6188782165302977, "grad_norm": 0.4735320497174433, "learning_rate": 1.1809343523177696e-06, "loss": 0.027, "step": 30518 }, { "epoch": 3.6189967982924225, "grad_norm": 0.40633979006197113, "learning_rate": 1.1802054341461504e-06, "loss": 0.0174, "step": 30519 }, { "epoch": 3.6191153800545477, "grad_norm": 0.4814182072589396, "learning_rate": 1.1794767355638337e-06, "loss": 0.0178, "step": 30520 }, { "epoch": 3.6192339618166725, "grad_norm": 0.5313535841645073, "learning_rate": 1.1787482565775276e-06, "loss": 0.0259, "step": 30521 }, { "epoch": 3.6193525435787977, "grad_norm": 0.3075287336283624, "learning_rate": 1.1780199971939549e-06, "loss": 0.0167, "step": 30522 }, { "epoch": 3.6194711253409224, "grad_norm": 0.6344612003967877, "learning_rate": 1.1772919574198182e-06, "loss": 0.0242, "step": 30523 }, { "epoch": 3.6195897071030476, "grad_norm": 0.42695676050363446, "learning_rate": 1.176564137261843e-06, "loss": 0.0201, "step": 30524 }, { "epoch": 3.6197082888651724, "grad_norm": 0.6866624136465029, "learning_rate": 1.1758365367267293e-06, "loss": 0.0412, "step": 30525 }, { "epoch": 3.6198268706272976, "grad_norm": 0.6222353886614134, "learning_rate": 1.1751091558211858e-06, "loss": 0.0295, "step": 30526 }, { "epoch": 3.6199454523894223, "grad_norm": 0.5473138302846593, "learning_rate": 1.1743819945519235e-06, "loss": 0.0254, "step": 30527 }, { "epoch": 3.6200640341515475, "grad_norm": 0.5955191737939642, "learning_rate": 1.1736550529256402e-06, "loss": 0.0276, "step": 30528 }, { "epoch": 3.6201826159136727, "grad_norm": 0.654399913155924, "learning_rate": 1.1729283309490412e-06, "loss": 0.0232, "step": 30529 }, { "epoch": 3.6203011976757975, "grad_norm": 0.3660895174996888, "learning_rate": 1.1722018286288189e-06, "loss": 0.0163, "step": 30530 }, { "epoch": 3.6204197794379223, "grad_norm": 0.7619497763211113, "learning_rate": 1.1714755459716843e-06, "loss": 0.0327, "step": 30531 }, { "epoch": 3.6205383612000475, "grad_norm": 0.49527109410450376, "learning_rate": 1.1707494829843207e-06, "loss": 0.0197, "step": 30532 }, { "epoch": 3.6206569429621727, "grad_norm": 0.5121989145062326, "learning_rate": 1.170023639673426e-06, "loss": 0.0179, "step": 30533 }, { "epoch": 3.6207755247242974, "grad_norm": 0.41768344224975723, "learning_rate": 1.169298016045689e-06, "loss": 0.0253, "step": 30534 }, { "epoch": 3.620894106486422, "grad_norm": 0.5824321739819032, "learning_rate": 1.1685726121078045e-06, "loss": 0.0295, "step": 30535 }, { "epoch": 3.6210126882485474, "grad_norm": 0.415628161291174, "learning_rate": 1.1678474278664531e-06, "loss": 0.0243, "step": 30536 }, { "epoch": 3.6211312700106726, "grad_norm": 0.30177048150454755, "learning_rate": 1.1671224633283268e-06, "loss": 0.0139, "step": 30537 }, { "epoch": 3.6212498517727973, "grad_norm": 0.5396862394321039, "learning_rate": 1.1663977185001034e-06, "loss": 0.0214, "step": 30538 }, { "epoch": 3.621368433534922, "grad_norm": 0.5969687065996008, "learning_rate": 1.165673193388464e-06, "loss": 0.0316, "step": 30539 }, { "epoch": 3.6214870152970473, "grad_norm": 0.7857542417714591, "learning_rate": 1.1649488880000976e-06, "loss": 0.0271, "step": 30540 }, { "epoch": 3.6216055970591725, "grad_norm": 0.48420929374805155, "learning_rate": 1.1642248023416679e-06, "loss": 0.0165, "step": 30541 }, { "epoch": 3.6217241788212973, "grad_norm": 0.6239722186824026, "learning_rate": 1.163500936419859e-06, "loss": 0.0327, "step": 30542 }, { "epoch": 3.621842760583422, "grad_norm": 0.4078541072499883, "learning_rate": 1.1627772902413376e-06, "loss": 0.0189, "step": 30543 }, { "epoch": 3.6219613423455472, "grad_norm": 0.6571278197321746, "learning_rate": 1.1620538638127843e-06, "loss": 0.033, "step": 30544 }, { "epoch": 3.6220799241076724, "grad_norm": 0.4424230531443711, "learning_rate": 1.161330657140855e-06, "loss": 0.0232, "step": 30545 }, { "epoch": 3.622198505869797, "grad_norm": 0.5522192249713966, "learning_rate": 1.1606076702322304e-06, "loss": 0.0237, "step": 30546 }, { "epoch": 3.622317087631922, "grad_norm": 0.6235627419261496, "learning_rate": 1.1598849030935665e-06, "loss": 0.035, "step": 30547 }, { "epoch": 3.622435669394047, "grad_norm": 0.8060443539856338, "learning_rate": 1.15916235573153e-06, "loss": 0.0316, "step": 30548 }, { "epoch": 3.6225542511561724, "grad_norm": 0.4836612956016723, "learning_rate": 1.1584400281527797e-06, "loss": 0.0251, "step": 30549 }, { "epoch": 3.622672832918297, "grad_norm": 0.6387762860602652, "learning_rate": 1.1577179203639766e-06, "loss": 0.0246, "step": 30550 }, { "epoch": 3.6227914146804223, "grad_norm": 0.4773753194285187, "learning_rate": 1.1569960323717794e-06, "loss": 0.0164, "step": 30551 }, { "epoch": 3.622909996442547, "grad_norm": 0.7917534091220493, "learning_rate": 1.1562743641828355e-06, "loss": 0.035, "step": 30552 }, { "epoch": 3.6230285782046723, "grad_norm": 0.43549290999975643, "learning_rate": 1.1555529158038065e-06, "loss": 0.0171, "step": 30553 }, { "epoch": 3.623147159966797, "grad_norm": 0.35466284041066237, "learning_rate": 1.1548316872413366e-06, "loss": 0.0128, "step": 30554 }, { "epoch": 3.6232657417289222, "grad_norm": 0.3910257982030923, "learning_rate": 1.1541106785020795e-06, "loss": 0.0248, "step": 30555 }, { "epoch": 3.623384323491047, "grad_norm": 0.8081899407835952, "learning_rate": 1.1533898895926847e-06, "loss": 0.0361, "step": 30556 }, { "epoch": 3.623502905253172, "grad_norm": 0.553763405208823, "learning_rate": 1.1526693205197835e-06, "loss": 0.0277, "step": 30557 }, { "epoch": 3.623621487015297, "grad_norm": 0.5158763135067869, "learning_rate": 1.1519489712900345e-06, "loss": 0.021, "step": 30558 }, { "epoch": 3.623740068777422, "grad_norm": 0.42385789914460464, "learning_rate": 1.151228841910068e-06, "loss": 0.0208, "step": 30559 }, { "epoch": 3.623858650539547, "grad_norm": 0.5656910138302355, "learning_rate": 1.1505089323865293e-06, "loss": 0.0241, "step": 30560 }, { "epoch": 3.623977232301672, "grad_norm": 0.41170694574023087, "learning_rate": 1.1497892427260486e-06, "loss": 0.0198, "step": 30561 }, { "epoch": 3.624095814063797, "grad_norm": 0.7074591825567287, "learning_rate": 1.1490697729352683e-06, "loss": 0.0344, "step": 30562 }, { "epoch": 3.624214395825922, "grad_norm": 0.39698514581374866, "learning_rate": 1.1483505230208136e-06, "loss": 0.017, "step": 30563 }, { "epoch": 3.624332977588047, "grad_norm": 0.5217813675787172, "learning_rate": 1.1476314929893206e-06, "loss": 0.0262, "step": 30564 }, { "epoch": 3.624451559350172, "grad_norm": 0.5945184260369211, "learning_rate": 1.146912682847412e-06, "loss": 0.0254, "step": 30565 }, { "epoch": 3.624570141112297, "grad_norm": 0.6710703820051761, "learning_rate": 1.1461940926017183e-06, "loss": 0.0271, "step": 30566 }, { "epoch": 3.624688722874422, "grad_norm": 0.475917037825512, "learning_rate": 1.145475722258868e-06, "loss": 0.0208, "step": 30567 }, { "epoch": 3.6248073046365468, "grad_norm": 0.6642161150057283, "learning_rate": 1.1447575718254721e-06, "loss": 0.0218, "step": 30568 }, { "epoch": 3.624925886398672, "grad_norm": 0.547932231773108, "learning_rate": 1.1440396413081672e-06, "loss": 0.0285, "step": 30569 }, { "epoch": 3.6250444681607967, "grad_norm": 0.31449918163465435, "learning_rate": 1.1433219307135563e-06, "loss": 0.0104, "step": 30570 }, { "epoch": 3.625163049922922, "grad_norm": 0.2861137998563407, "learning_rate": 1.1426044400482672e-06, "loss": 0.0116, "step": 30571 }, { "epoch": 3.6252816316850467, "grad_norm": 0.5537989095527303, "learning_rate": 1.1418871693189005e-06, "loss": 0.0317, "step": 30572 }, { "epoch": 3.625400213447172, "grad_norm": 0.553716280192245, "learning_rate": 1.1411701185320866e-06, "loss": 0.0308, "step": 30573 }, { "epoch": 3.6255187952092967, "grad_norm": 0.4509675912645148, "learning_rate": 1.1404532876944207e-06, "loss": 0.0205, "step": 30574 }, { "epoch": 3.625637376971422, "grad_norm": 0.7513963086508453, "learning_rate": 1.1397366768125195e-06, "loss": 0.0353, "step": 30575 }, { "epoch": 3.6257559587335466, "grad_norm": 0.4986654413304326, "learning_rate": 1.139020285892986e-06, "loss": 0.0238, "step": 30576 }, { "epoch": 3.625874540495672, "grad_norm": 0.4105601778278942, "learning_rate": 1.1383041149424233e-06, "loss": 0.0148, "step": 30577 }, { "epoch": 3.625993122257797, "grad_norm": 0.2808480891265015, "learning_rate": 1.1375881639674402e-06, "loss": 0.0125, "step": 30578 }, { "epoch": 3.6261117040199218, "grad_norm": 0.5240268171312453, "learning_rate": 1.1368724329746256e-06, "loss": 0.0212, "step": 30579 }, { "epoch": 3.6262302857820465, "grad_norm": 0.5236605028219777, "learning_rate": 1.1361569219705908e-06, "loss": 0.0302, "step": 30580 }, { "epoch": 3.6263488675441717, "grad_norm": 0.6193294769222107, "learning_rate": 1.1354416309619226e-06, "loss": 0.0317, "step": 30581 }, { "epoch": 3.626467449306297, "grad_norm": 0.5619745054588182, "learning_rate": 1.1347265599552209e-06, "loss": 0.0296, "step": 30582 }, { "epoch": 3.6265860310684217, "grad_norm": 0.4772974317596211, "learning_rate": 1.1340117089570695e-06, "loss": 0.023, "step": 30583 }, { "epoch": 3.6267046128305465, "grad_norm": 0.7798652979120075, "learning_rate": 1.1332970779740687e-06, "loss": 0.0312, "step": 30584 }, { "epoch": 3.6268231945926717, "grad_norm": 0.6002425581025375, "learning_rate": 1.1325826670127992e-06, "loss": 0.028, "step": 30585 }, { "epoch": 3.626941776354797, "grad_norm": 0.6840424862935617, "learning_rate": 1.1318684760798476e-06, "loss": 0.0311, "step": 30586 }, { "epoch": 3.6270603581169216, "grad_norm": 0.46708067781406, "learning_rate": 1.1311545051818e-06, "loss": 0.0195, "step": 30587 }, { "epoch": 3.6271789398790464, "grad_norm": 0.5467220682116494, "learning_rate": 1.1304407543252404e-06, "loss": 0.0276, "step": 30588 }, { "epoch": 3.6272975216411716, "grad_norm": 0.5979449142684701, "learning_rate": 1.1297272235167466e-06, "loss": 0.0248, "step": 30589 }, { "epoch": 3.627416103403297, "grad_norm": 0.36135620351451775, "learning_rate": 1.1290139127628912e-06, "loss": 0.0177, "step": 30590 }, { "epoch": 3.6275346851654215, "grad_norm": 0.47268123570992593, "learning_rate": 1.1283008220702606e-06, "loss": 0.0226, "step": 30591 }, { "epoch": 3.6276532669275463, "grad_norm": 0.4110461040884841, "learning_rate": 1.1275879514454218e-06, "loss": 0.02, "step": 30592 }, { "epoch": 3.6277718486896715, "grad_norm": 0.46893417988084124, "learning_rate": 1.1268753008949502e-06, "loss": 0.0211, "step": 30593 }, { "epoch": 3.6278904304517967, "grad_norm": 0.8147366839824469, "learning_rate": 1.126162870425404e-06, "loss": 0.0325, "step": 30594 }, { "epoch": 3.6280090122139215, "grad_norm": 0.5122236934921792, "learning_rate": 1.12545066004337e-06, "loss": 0.0211, "step": 30595 }, { "epoch": 3.6281275939760462, "grad_norm": 0.624299873868875, "learning_rate": 1.1247386697553986e-06, "loss": 0.0262, "step": 30596 }, { "epoch": 3.6282461757381714, "grad_norm": 0.3175810713865264, "learning_rate": 1.1240268995680593e-06, "loss": 0.0121, "step": 30597 }, { "epoch": 3.6283647575002966, "grad_norm": 0.45376718457550325, "learning_rate": 1.1233153494879134e-06, "loss": 0.0196, "step": 30598 }, { "epoch": 3.6284833392624214, "grad_norm": 0.6814902003338795, "learning_rate": 1.1226040195215226e-06, "loss": 0.0309, "step": 30599 }, { "epoch": 3.628601921024546, "grad_norm": 0.886054269676723, "learning_rate": 1.1218929096754427e-06, "loss": 0.0397, "step": 30600 }, { "epoch": 3.6287205027866714, "grad_norm": 0.7101241906598454, "learning_rate": 1.121182019956224e-06, "loss": 0.0392, "step": 30601 }, { "epoch": 3.6288390845487966, "grad_norm": 0.37783827599086106, "learning_rate": 1.1204713503704305e-06, "loss": 0.018, "step": 30602 }, { "epoch": 3.6289576663109213, "grad_norm": 0.6718328795990541, "learning_rate": 1.1197609009246045e-06, "loss": 0.0338, "step": 30603 }, { "epoch": 3.6290762480730465, "grad_norm": 0.48448002450394395, "learning_rate": 1.119050671625299e-06, "loss": 0.0205, "step": 30604 }, { "epoch": 3.6291948298351713, "grad_norm": 0.4786484100410678, "learning_rate": 1.1183406624790643e-06, "loss": 0.0283, "step": 30605 }, { "epoch": 3.6293134115972965, "grad_norm": 0.7278625257043722, "learning_rate": 1.1176308734924395e-06, "loss": 0.0196, "step": 30606 }, { "epoch": 3.6294319933594212, "grad_norm": 0.4281238009634022, "learning_rate": 1.116921304671978e-06, "loss": 0.0138, "step": 30607 }, { "epoch": 3.6295505751215464, "grad_norm": 0.4386010608780737, "learning_rate": 1.11621195602421e-06, "loss": 0.031, "step": 30608 }, { "epoch": 3.629669156883671, "grad_norm": 0.47146162720263646, "learning_rate": 1.1155028275556783e-06, "loss": 0.0198, "step": 30609 }, { "epoch": 3.6297877386457964, "grad_norm": 0.3062928532874005, "learning_rate": 1.1147939192729218e-06, "loss": 0.011, "step": 30610 }, { "epoch": 3.629906320407921, "grad_norm": 0.393814502978095, "learning_rate": 1.1140852311824794e-06, "loss": 0.015, "step": 30611 }, { "epoch": 3.6300249021700464, "grad_norm": 0.4980923676472471, "learning_rate": 1.1133767632908799e-06, "loss": 0.0178, "step": 30612 }, { "epoch": 3.630143483932171, "grad_norm": 0.4330434066733445, "learning_rate": 1.1126685156046506e-06, "loss": 0.0164, "step": 30613 }, { "epoch": 3.6302620656942963, "grad_norm": 0.49210634166012224, "learning_rate": 1.111960488130326e-06, "loss": 0.0224, "step": 30614 }, { "epoch": 3.630380647456421, "grad_norm": 0.5273597182052229, "learning_rate": 1.1112526808744333e-06, "loss": 0.0295, "step": 30615 }, { "epoch": 3.6304992292185463, "grad_norm": 0.6588991345900581, "learning_rate": 1.1105450938434957e-06, "loss": 0.0314, "step": 30616 }, { "epoch": 3.630617810980671, "grad_norm": 0.6375462421735701, "learning_rate": 1.1098377270440386e-06, "loss": 0.0362, "step": 30617 }, { "epoch": 3.6307363927427962, "grad_norm": 0.5723075993197545, "learning_rate": 1.1091305804825842e-06, "loss": 0.0174, "step": 30618 }, { "epoch": 3.630854974504921, "grad_norm": 0.7995138441445279, "learning_rate": 1.108423654165644e-06, "loss": 0.0457, "step": 30619 }, { "epoch": 3.630973556267046, "grad_norm": 0.5269905355224596, "learning_rate": 1.1077169480997407e-06, "loss": 0.0281, "step": 30620 }, { "epoch": 3.631092138029171, "grad_norm": 0.36743227401754575, "learning_rate": 1.1070104622913884e-06, "loss": 0.0147, "step": 30621 }, { "epoch": 3.631210719791296, "grad_norm": 0.3906455992659679, "learning_rate": 1.1063041967471043e-06, "loss": 0.0228, "step": 30622 }, { "epoch": 3.631329301553421, "grad_norm": 0.6787659985633776, "learning_rate": 1.1055981514733915e-06, "loss": 0.0387, "step": 30623 }, { "epoch": 3.631447883315546, "grad_norm": 0.5071885413085371, "learning_rate": 1.104892326476764e-06, "loss": 0.0294, "step": 30624 }, { "epoch": 3.631566465077671, "grad_norm": 0.5124258356595751, "learning_rate": 1.1041867217637253e-06, "loss": 0.0219, "step": 30625 }, { "epoch": 3.631685046839796, "grad_norm": 0.4984115913511895, "learning_rate": 1.103481337340781e-06, "loss": 0.0264, "step": 30626 }, { "epoch": 3.6318036286019213, "grad_norm": 0.852481167722574, "learning_rate": 1.1027761732144399e-06, "loss": 0.0455, "step": 30627 }, { "epoch": 3.631922210364046, "grad_norm": 0.3482456807001078, "learning_rate": 1.102071229391191e-06, "loss": 0.0173, "step": 30628 }, { "epoch": 3.632040792126171, "grad_norm": 0.670199702535674, "learning_rate": 1.1013665058775464e-06, "loss": 0.0366, "step": 30629 }, { "epoch": 3.632159373888296, "grad_norm": 0.5698865615371608, "learning_rate": 1.1006620026799918e-06, "loss": 0.0236, "step": 30630 }, { "epoch": 3.632277955650421, "grad_norm": 0.4412866954016474, "learning_rate": 1.099957719805028e-06, "loss": 0.0182, "step": 30631 }, { "epoch": 3.632396537412546, "grad_norm": 0.631225172224313, "learning_rate": 1.0992536572591444e-06, "loss": 0.0242, "step": 30632 }, { "epoch": 3.6325151191746707, "grad_norm": 0.39510451154442877, "learning_rate": 1.0985498150488383e-06, "loss": 0.0187, "step": 30633 }, { "epoch": 3.632633700936796, "grad_norm": 0.4601137296545177, "learning_rate": 1.0978461931805877e-06, "loss": 0.0213, "step": 30634 }, { "epoch": 3.632752282698921, "grad_norm": 0.5131007109721877, "learning_rate": 1.0971427916608851e-06, "loss": 0.0213, "step": 30635 }, { "epoch": 3.632870864461046, "grad_norm": 0.3959398099509853, "learning_rate": 1.0964396104962138e-06, "loss": 0.0157, "step": 30636 }, { "epoch": 3.6329894462231707, "grad_norm": 0.34960841471469123, "learning_rate": 1.0957366496930576e-06, "loss": 0.0118, "step": 30637 }, { "epoch": 3.633108027985296, "grad_norm": 0.40591177248575283, "learning_rate": 1.0950339092579003e-06, "loss": 0.0158, "step": 30638 }, { "epoch": 3.633226609747421, "grad_norm": 0.7222169943984436, "learning_rate": 1.094331389197209e-06, "loss": 0.0437, "step": 30639 }, { "epoch": 3.633345191509546, "grad_norm": 0.39182381363488095, "learning_rate": 1.0936290895174727e-06, "loss": 0.016, "step": 30640 }, { "epoch": 3.6334637732716706, "grad_norm": 0.8655694778086095, "learning_rate": 1.092927010225156e-06, "loss": 0.0373, "step": 30641 }, { "epoch": 3.633582355033796, "grad_norm": 0.5217586368718115, "learning_rate": 1.0922251513267423e-06, "loss": 0.0177, "step": 30642 }, { "epoch": 3.633700936795921, "grad_norm": 0.7468190457018233, "learning_rate": 1.091523512828685e-06, "loss": 0.0403, "step": 30643 }, { "epoch": 3.6338195185580457, "grad_norm": 0.39627669803961263, "learning_rate": 1.0908220947374704e-06, "loss": 0.0188, "step": 30644 }, { "epoch": 3.6339381003201705, "grad_norm": 0.690009901686504, "learning_rate": 1.0901208970595545e-06, "loss": 0.022, "step": 30645 }, { "epoch": 3.6340566820822957, "grad_norm": 0.6040736522335324, "learning_rate": 1.0894199198014016e-06, "loss": 0.0331, "step": 30646 }, { "epoch": 3.634175263844421, "grad_norm": 0.6467558430326676, "learning_rate": 1.0887191629694787e-06, "loss": 0.0306, "step": 30647 }, { "epoch": 3.6342938456065457, "grad_norm": 0.576721047794708, "learning_rate": 1.0880186265702391e-06, "loss": 0.0266, "step": 30648 }, { "epoch": 3.6344124273686704, "grad_norm": 0.38249385804760627, "learning_rate": 1.0873183106101526e-06, "loss": 0.0174, "step": 30649 }, { "epoch": 3.6345310091307956, "grad_norm": 0.3794906405903929, "learning_rate": 1.0866182150956584e-06, "loss": 0.0134, "step": 30650 }, { "epoch": 3.634649590892921, "grad_norm": 0.36877271968139186, "learning_rate": 1.0859183400332262e-06, "loss": 0.0233, "step": 30651 }, { "epoch": 3.6347681726550456, "grad_norm": 0.7898336122284042, "learning_rate": 1.0852186854292984e-06, "loss": 0.0481, "step": 30652 }, { "epoch": 3.634886754417171, "grad_norm": 0.4264282989734277, "learning_rate": 1.0845192512903307e-06, "loss": 0.0189, "step": 30653 }, { "epoch": 3.6350053361792956, "grad_norm": 0.8131114137859514, "learning_rate": 1.0838200376227654e-06, "loss": 0.0306, "step": 30654 }, { "epoch": 3.6351239179414208, "grad_norm": 0.31729420762093946, "learning_rate": 1.0831210444330525e-06, "loss": 0.0167, "step": 30655 }, { "epoch": 3.6352424997035455, "grad_norm": 0.8174195814930126, "learning_rate": 1.0824222717276373e-06, "loss": 0.0263, "step": 30656 }, { "epoch": 3.6353610814656707, "grad_norm": 0.7458543463677608, "learning_rate": 1.0817237195129559e-06, "loss": 0.0337, "step": 30657 }, { "epoch": 3.6354796632277955, "grad_norm": 0.7008790766703236, "learning_rate": 1.0810253877954535e-06, "loss": 0.0375, "step": 30658 }, { "epoch": 3.6355982449899207, "grad_norm": 0.609589981001295, "learning_rate": 1.0803272765815636e-06, "loss": 0.032, "step": 30659 }, { "epoch": 3.6357168267520454, "grad_norm": 0.38860936427966913, "learning_rate": 1.0796293858777283e-06, "loss": 0.0163, "step": 30660 }, { "epoch": 3.6358354085141706, "grad_norm": 0.7519226016784752, "learning_rate": 1.0789317156903678e-06, "loss": 0.0416, "step": 30661 }, { "epoch": 3.6359539902762954, "grad_norm": 0.31591328263438584, "learning_rate": 1.078234266025932e-06, "loss": 0.0181, "step": 30662 }, { "epoch": 3.6360725720384206, "grad_norm": 0.7234443597744468, "learning_rate": 1.0775370368908383e-06, "loss": 0.0332, "step": 30663 }, { "epoch": 3.6361911538005454, "grad_norm": 0.40058004308083744, "learning_rate": 1.0768400282915176e-06, "loss": 0.0229, "step": 30664 }, { "epoch": 3.6363097355626706, "grad_norm": 0.5088048546031009, "learning_rate": 1.0761432402343923e-06, "loss": 0.0268, "step": 30665 }, { "epoch": 3.6364283173247953, "grad_norm": 0.5366504070687661, "learning_rate": 1.0754466727258912e-06, "loss": 0.019, "step": 30666 }, { "epoch": 3.6365468990869205, "grad_norm": 0.29659187098947043, "learning_rate": 1.0747503257724362e-06, "loss": 0.0174, "step": 30667 }, { "epoch": 3.6366654808490453, "grad_norm": 0.7354955566175789, "learning_rate": 1.074054199380442e-06, "loss": 0.0424, "step": 30668 }, { "epoch": 3.6367840626111705, "grad_norm": 0.5221193521245354, "learning_rate": 1.0733582935563285e-06, "loss": 0.0269, "step": 30669 }, { "epoch": 3.6369026443732952, "grad_norm": 0.8033488362640042, "learning_rate": 1.0726626083065099e-06, "loss": 0.0305, "step": 30670 }, { "epoch": 3.6370212261354204, "grad_norm": 0.6220034798324674, "learning_rate": 1.0719671436374035e-06, "loss": 0.0244, "step": 30671 }, { "epoch": 3.637139807897545, "grad_norm": 0.7790805290016886, "learning_rate": 1.0712718995554095e-06, "loss": 0.0318, "step": 30672 }, { "epoch": 3.6372583896596704, "grad_norm": 0.5447436962992006, "learning_rate": 1.0705768760669532e-06, "loss": 0.0286, "step": 30673 }, { "epoch": 3.637376971421795, "grad_norm": 0.297441120829208, "learning_rate": 1.0698820731784326e-06, "loss": 0.0144, "step": 30674 }, { "epoch": 3.6374955531839204, "grad_norm": 0.586630596538867, "learning_rate": 1.0691874908962507e-06, "loss": 0.0334, "step": 30675 }, { "epoch": 3.6376141349460456, "grad_norm": 0.4900506786371344, "learning_rate": 1.0684931292268163e-06, "loss": 0.0165, "step": 30676 }, { "epoch": 3.6377327167081703, "grad_norm": 0.35638003840553806, "learning_rate": 1.0677989881765298e-06, "loss": 0.0171, "step": 30677 }, { "epoch": 3.637851298470295, "grad_norm": 0.625744453749271, "learning_rate": 1.0671050677517892e-06, "loss": 0.0194, "step": 30678 }, { "epoch": 3.6379698802324203, "grad_norm": 0.4531698286430672, "learning_rate": 1.0664113679589888e-06, "loss": 0.0171, "step": 30679 }, { "epoch": 3.6380884619945455, "grad_norm": 0.6334999272543679, "learning_rate": 1.0657178888045294e-06, "loss": 0.0263, "step": 30680 }, { "epoch": 3.6382070437566703, "grad_norm": 0.5177708249341996, "learning_rate": 1.0650246302947975e-06, "loss": 0.0286, "step": 30681 }, { "epoch": 3.638325625518795, "grad_norm": 0.3420156785895251, "learning_rate": 1.0643315924361908e-06, "loss": 0.0138, "step": 30682 }, { "epoch": 3.63844420728092, "grad_norm": 1.1128607514703148, "learning_rate": 1.063638775235093e-06, "loss": 0.041, "step": 30683 }, { "epoch": 3.6385627890430454, "grad_norm": 0.5650157715351332, "learning_rate": 1.0629461786978933e-06, "loss": 0.0228, "step": 30684 }, { "epoch": 3.63868137080517, "grad_norm": 0.40512484465721627, "learning_rate": 1.0622538028309758e-06, "loss": 0.0142, "step": 30685 }, { "epoch": 3.638799952567295, "grad_norm": 0.6272071028071852, "learning_rate": 1.0615616476407241e-06, "loss": 0.0204, "step": 30686 }, { "epoch": 3.63891853432942, "grad_norm": 0.3630553485298499, "learning_rate": 1.0608697131335193e-06, "loss": 0.0147, "step": 30687 }, { "epoch": 3.6390371160915453, "grad_norm": 0.42517620264925127, "learning_rate": 1.0601779993157368e-06, "loss": 0.0186, "step": 30688 }, { "epoch": 3.63915569785367, "grad_norm": 0.5980601686735284, "learning_rate": 1.0594865061937632e-06, "loss": 0.0291, "step": 30689 }, { "epoch": 3.639274279615795, "grad_norm": 0.5778785520366753, "learning_rate": 1.05879523377396e-06, "loss": 0.0247, "step": 30690 }, { "epoch": 3.63939286137792, "grad_norm": 0.40828118298805083, "learning_rate": 1.058104182062708e-06, "loss": 0.0234, "step": 30691 }, { "epoch": 3.6395114431400453, "grad_norm": 0.4791651378049535, "learning_rate": 1.0574133510663746e-06, "loss": 0.0248, "step": 30692 }, { "epoch": 3.63963002490217, "grad_norm": 0.5579434656669813, "learning_rate": 1.0567227407913328e-06, "loss": 0.0187, "step": 30693 }, { "epoch": 3.639748606664295, "grad_norm": 0.7655056683386534, "learning_rate": 1.0560323512439434e-06, "loss": 0.039, "step": 30694 }, { "epoch": 3.63986718842642, "grad_norm": 0.5058454670722392, "learning_rate": 1.0553421824305742e-06, "loss": 0.0189, "step": 30695 }, { "epoch": 3.639985770188545, "grad_norm": 0.6117841380441531, "learning_rate": 1.0546522343575866e-06, "loss": 0.028, "step": 30696 }, { "epoch": 3.64010435195067, "grad_norm": 0.4268376932353655, "learning_rate": 1.053962507031342e-06, "loss": 0.0207, "step": 30697 }, { "epoch": 3.6402229337127947, "grad_norm": 0.47860666007928626, "learning_rate": 1.053273000458202e-06, "loss": 0.0227, "step": 30698 }, { "epoch": 3.64034151547492, "grad_norm": 0.6438852151703016, "learning_rate": 1.0525837146445145e-06, "loss": 0.0288, "step": 30699 }, { "epoch": 3.640460097237045, "grad_norm": 0.8488920079996486, "learning_rate": 1.0518946495966437e-06, "loss": 0.0432, "step": 30700 }, { "epoch": 3.64057867899917, "grad_norm": 0.43925031223340694, "learning_rate": 1.0512058053209344e-06, "loss": 0.0126, "step": 30701 }, { "epoch": 3.640697260761295, "grad_norm": 0.52734470758089, "learning_rate": 1.0505171818237375e-06, "loss": 0.0206, "step": 30702 }, { "epoch": 3.64081584252342, "grad_norm": 0.47643069529588117, "learning_rate": 1.0498287791114058e-06, "loss": 0.02, "step": 30703 }, { "epoch": 3.640934424285545, "grad_norm": 0.5639808025168161, "learning_rate": 1.0491405971902817e-06, "loss": 0.0177, "step": 30704 }, { "epoch": 3.64105300604767, "grad_norm": 0.4404881872969116, "learning_rate": 1.0484526360667158e-06, "loss": 0.024, "step": 30705 }, { "epoch": 3.641171587809795, "grad_norm": 0.7921227398547116, "learning_rate": 1.047764895747036e-06, "loss": 0.0251, "step": 30706 }, { "epoch": 3.6412901695719198, "grad_norm": 0.5174579407253317, "learning_rate": 1.0470773762376012e-06, "loss": 0.0255, "step": 30707 }, { "epoch": 3.641408751334045, "grad_norm": 0.9024019145772182, "learning_rate": 1.0463900775447345e-06, "loss": 0.0402, "step": 30708 }, { "epoch": 3.6415273330961697, "grad_norm": 0.6682147988329953, "learning_rate": 1.045702999674783e-06, "loss": 0.0222, "step": 30709 }, { "epoch": 3.641645914858295, "grad_norm": 0.907505729860048, "learning_rate": 1.0450161426340671e-06, "loss": 0.0364, "step": 30710 }, { "epoch": 3.6417644966204197, "grad_norm": 0.4905456518604798, "learning_rate": 1.0443295064289315e-06, "loss": 0.0224, "step": 30711 }, { "epoch": 3.641883078382545, "grad_norm": 0.4334294047417947, "learning_rate": 1.043643091065702e-06, "loss": 0.0216, "step": 30712 }, { "epoch": 3.6420016601446696, "grad_norm": 0.5967516650356516, "learning_rate": 1.0429568965507037e-06, "loss": 0.0252, "step": 30713 }, { "epoch": 3.642120241906795, "grad_norm": 0.5267164567456263, "learning_rate": 1.042270922890265e-06, "loss": 0.0252, "step": 30714 }, { "epoch": 3.6422388236689196, "grad_norm": 0.5784802123160812, "learning_rate": 1.0415851700907087e-06, "loss": 0.0277, "step": 30715 }, { "epoch": 3.642357405431045, "grad_norm": 0.35489449320684713, "learning_rate": 1.040899638158363e-06, "loss": 0.0142, "step": 30716 }, { "epoch": 3.6424759871931696, "grad_norm": 0.45411351620407825, "learning_rate": 1.040214327099534e-06, "loss": 0.0192, "step": 30717 }, { "epoch": 3.6425945689552948, "grad_norm": 0.3925749437893302, "learning_rate": 1.0395292369205528e-06, "loss": 0.0148, "step": 30718 }, { "epoch": 3.6427131507174195, "grad_norm": 0.8319037569715956, "learning_rate": 1.038844367627728e-06, "loss": 0.0368, "step": 30719 }, { "epoch": 3.6428317324795447, "grad_norm": 0.31558837440884313, "learning_rate": 1.0381597192273773e-06, "loss": 0.0101, "step": 30720 }, { "epoch": 3.6429503142416695, "grad_norm": 0.4932000469815746, "learning_rate": 1.0374752917258063e-06, "loss": 0.0278, "step": 30721 }, { "epoch": 3.6430688960037947, "grad_norm": 0.5724717751161064, "learning_rate": 1.0367910851293321e-06, "loss": 0.0254, "step": 30722 }, { "epoch": 3.6431874777659194, "grad_norm": 0.5848934222981165, "learning_rate": 1.0361070994442556e-06, "loss": 0.0261, "step": 30723 }, { "epoch": 3.6433060595280446, "grad_norm": 0.4002857988538888, "learning_rate": 1.0354233346768854e-06, "loss": 0.0155, "step": 30724 }, { "epoch": 3.6434246412901694, "grad_norm": 0.4644293671474941, "learning_rate": 1.0347397908335248e-06, "loss": 0.0221, "step": 30725 }, { "epoch": 3.6435432230522946, "grad_norm": 0.6668469663354473, "learning_rate": 1.0340564679204772e-06, "loss": 0.0382, "step": 30726 }, { "epoch": 3.6436618048144194, "grad_norm": 0.5077603728381986, "learning_rate": 1.0333733659440404e-06, "loss": 0.015, "step": 30727 }, { "epoch": 3.6437803865765446, "grad_norm": 0.6398159348307507, "learning_rate": 1.032690484910509e-06, "loss": 0.0355, "step": 30728 }, { "epoch": 3.6438989683386698, "grad_norm": 0.48553316133926366, "learning_rate": 1.0320078248261811e-06, "loss": 0.0246, "step": 30729 }, { "epoch": 3.6440175501007945, "grad_norm": 0.5285293923422153, "learning_rate": 1.0313253856973487e-06, "loss": 0.0229, "step": 30730 }, { "epoch": 3.6441361318629193, "grad_norm": 0.4141971893368603, "learning_rate": 1.030643167530307e-06, "loss": 0.0198, "step": 30731 }, { "epoch": 3.6442547136250445, "grad_norm": 0.5023216191664538, "learning_rate": 1.0299611703313367e-06, "loss": 0.0227, "step": 30732 }, { "epoch": 3.6443732953871697, "grad_norm": 0.5467601754066086, "learning_rate": 1.0292793941067357e-06, "loss": 0.0207, "step": 30733 }, { "epoch": 3.6444918771492945, "grad_norm": 0.5487722928193859, "learning_rate": 1.0285978388627827e-06, "loss": 0.0276, "step": 30734 }, { "epoch": 3.644610458911419, "grad_norm": 0.7366583651625145, "learning_rate": 1.027916504605761e-06, "loss": 0.0352, "step": 30735 }, { "epoch": 3.6447290406735444, "grad_norm": 0.3914184791244896, "learning_rate": 1.027235391341952e-06, "loss": 0.0145, "step": 30736 }, { "epoch": 3.6448476224356696, "grad_norm": 0.6743120880902697, "learning_rate": 1.0265544990776338e-06, "loss": 0.0384, "step": 30737 }, { "epoch": 3.6449662041977944, "grad_norm": 0.5665212374043255, "learning_rate": 1.0258738278190904e-06, "loss": 0.0263, "step": 30738 }, { "epoch": 3.645084785959919, "grad_norm": 0.3818154045973056, "learning_rate": 1.0251933775725891e-06, "loss": 0.0168, "step": 30739 }, { "epoch": 3.6452033677220443, "grad_norm": 0.7412549518051832, "learning_rate": 1.0245131483444027e-06, "loss": 0.0325, "step": 30740 }, { "epoch": 3.6453219494841695, "grad_norm": 0.5501433283037643, "learning_rate": 1.0238331401408035e-06, "loss": 0.0196, "step": 30741 }, { "epoch": 3.6454405312462943, "grad_norm": 0.733553620066459, "learning_rate": 1.0231533529680676e-06, "loss": 0.0438, "step": 30742 }, { "epoch": 3.645559113008419, "grad_norm": 0.31944267037175916, "learning_rate": 1.0224737868324508e-06, "loss": 0.0092, "step": 30743 }, { "epoch": 3.6456776947705443, "grad_norm": 0.5768028188722137, "learning_rate": 1.0217944417402231e-06, "loss": 0.0252, "step": 30744 }, { "epoch": 3.6457962765326695, "grad_norm": 0.7275954053639971, "learning_rate": 1.0211153176976462e-06, "loss": 0.0299, "step": 30745 }, { "epoch": 3.6459148582947942, "grad_norm": 0.39899361306611053, "learning_rate": 1.020436414710979e-06, "loss": 0.0158, "step": 30746 }, { "epoch": 3.646033440056919, "grad_norm": 0.3575195387963944, "learning_rate": 1.019757732786486e-06, "loss": 0.0162, "step": 30747 }, { "epoch": 3.646152021819044, "grad_norm": 0.4618887751655598, "learning_rate": 1.0190792719304176e-06, "loss": 0.0164, "step": 30748 }, { "epoch": 3.6462706035811694, "grad_norm": 0.5971070362855041, "learning_rate": 1.0184010321490357e-06, "loss": 0.028, "step": 30749 }, { "epoch": 3.646389185343294, "grad_norm": 0.411180388567597, "learning_rate": 1.0177230134485848e-06, "loss": 0.0176, "step": 30750 }, { "epoch": 3.6465077671054194, "grad_norm": 0.6758945404867688, "learning_rate": 1.0170452158353161e-06, "loss": 0.0225, "step": 30751 }, { "epoch": 3.646626348867544, "grad_norm": 0.6242249614909375, "learning_rate": 1.0163676393154852e-06, "loss": 0.0347, "step": 30752 }, { "epoch": 3.6467449306296693, "grad_norm": 0.9072357586223859, "learning_rate": 1.0156902838953347e-06, "loss": 0.046, "step": 30753 }, { "epoch": 3.646863512391794, "grad_norm": 0.3553959725037362, "learning_rate": 1.0150131495811037e-06, "loss": 0.0141, "step": 30754 }, { "epoch": 3.6469820941539193, "grad_norm": 0.5580446049549092, "learning_rate": 1.0143362363790404e-06, "loss": 0.0154, "step": 30755 }, { "epoch": 3.647100675916044, "grad_norm": 0.4889402347642849, "learning_rate": 1.0136595442953838e-06, "loss": 0.0157, "step": 30756 }, { "epoch": 3.6472192576781692, "grad_norm": 1.1379669526954437, "learning_rate": 1.0129830733363737e-06, "loss": 0.0498, "step": 30757 }, { "epoch": 3.647337839440294, "grad_norm": 0.9548184195422669, "learning_rate": 1.0123068235082438e-06, "loss": 0.0454, "step": 30758 }, { "epoch": 3.647456421202419, "grad_norm": 0.7003532852684642, "learning_rate": 1.0116307948172283e-06, "loss": 0.0289, "step": 30759 }, { "epoch": 3.647575002964544, "grad_norm": 0.5951562912516722, "learning_rate": 1.0109549872695634e-06, "loss": 0.0297, "step": 30760 }, { "epoch": 3.647693584726669, "grad_norm": 0.3460190084968859, "learning_rate": 1.010279400871475e-06, "loss": 0.0216, "step": 30761 }, { "epoch": 3.647812166488794, "grad_norm": 0.8062567789201769, "learning_rate": 1.0096040356291913e-06, "loss": 0.0243, "step": 30762 }, { "epoch": 3.647930748250919, "grad_norm": 0.5724418652583871, "learning_rate": 1.008928891548941e-06, "loss": 0.0357, "step": 30763 }, { "epoch": 3.648049330013044, "grad_norm": 0.7175721716141188, "learning_rate": 1.0082539686369436e-06, "loss": 0.0377, "step": 30764 }, { "epoch": 3.648167911775169, "grad_norm": 0.44096074801936797, "learning_rate": 1.0075792668994277e-06, "loss": 0.0173, "step": 30765 }, { "epoch": 3.648286493537294, "grad_norm": 0.45816962211965484, "learning_rate": 1.0069047863426052e-06, "loss": 0.0266, "step": 30766 }, { "epoch": 3.648405075299419, "grad_norm": 0.7697897125175935, "learning_rate": 1.006230526972704e-06, "loss": 0.0367, "step": 30767 }, { "epoch": 3.648523657061544, "grad_norm": 0.3427298057451425, "learning_rate": 1.0055564887959307e-06, "loss": 0.0192, "step": 30768 }, { "epoch": 3.648642238823669, "grad_norm": 0.5469022493873739, "learning_rate": 1.0048826718185051e-06, "loss": 0.0281, "step": 30769 }, { "epoch": 3.6487608205857938, "grad_norm": 0.45272896826016035, "learning_rate": 1.0042090760466306e-06, "loss": 0.0207, "step": 30770 }, { "epoch": 3.648879402347919, "grad_norm": 0.3633449588356305, "learning_rate": 1.0035357014865298e-06, "loss": 0.0121, "step": 30771 }, { "epoch": 3.6489979841100437, "grad_norm": 0.6930461703965459, "learning_rate": 1.0028625481443982e-06, "loss": 0.032, "step": 30772 }, { "epoch": 3.649116565872169, "grad_norm": 0.4329847837111056, "learning_rate": 1.0021896160264498e-06, "loss": 0.0213, "step": 30773 }, { "epoch": 3.6492351476342937, "grad_norm": 0.5250541352728472, "learning_rate": 1.0015169051388828e-06, "loss": 0.0195, "step": 30774 }, { "epoch": 3.649353729396419, "grad_norm": 0.6354707156555486, "learning_rate": 1.0008444154879031e-06, "loss": 0.0299, "step": 30775 }, { "epoch": 3.6494723111585436, "grad_norm": 0.915663347476454, "learning_rate": 1.0001721470797088e-06, "loss": 0.0466, "step": 30776 }, { "epoch": 3.649590892920669, "grad_norm": 0.3704078158096794, "learning_rate": 9.995000999204918e-07, "loss": 0.0207, "step": 30777 }, { "epoch": 3.649709474682794, "grad_norm": 0.716134680994382, "learning_rate": 9.988282740164584e-07, "loss": 0.0272, "step": 30778 }, { "epoch": 3.649828056444919, "grad_norm": 0.47387834075884744, "learning_rate": 9.981566693737925e-07, "loss": 0.0225, "step": 30779 }, { "epoch": 3.6499466382070436, "grad_norm": 0.39377569442976534, "learning_rate": 9.97485285998695e-07, "loss": 0.0197, "step": 30780 }, { "epoch": 3.6500652199691688, "grad_norm": 0.5719952867072153, "learning_rate": 9.96814123897341e-07, "loss": 0.0215, "step": 30781 }, { "epoch": 3.650183801731294, "grad_norm": 0.4637190447627906, "learning_rate": 9.961431830759316e-07, "loss": 0.0227, "step": 30782 }, { "epoch": 3.6503023834934187, "grad_norm": 0.31235425074309975, "learning_rate": 9.954724635406448e-07, "loss": 0.0108, "step": 30783 }, { "epoch": 3.6504209652555435, "grad_norm": 0.4354200222448532, "learning_rate": 9.948019652976648e-07, "loss": 0.0207, "step": 30784 }, { "epoch": 3.6505395470176687, "grad_norm": 0.3958017460637726, "learning_rate": 9.941316883531726e-07, "loss": 0.0163, "step": 30785 }, { "epoch": 3.650658128779794, "grad_norm": 0.6783767335357475, "learning_rate": 9.934616327133521e-07, "loss": 0.0372, "step": 30786 }, { "epoch": 3.6507767105419187, "grad_norm": 0.41719118763922397, "learning_rate": 9.927917983843765e-07, "loss": 0.0156, "step": 30787 }, { "epoch": 3.6508952923040434, "grad_norm": 0.5462468032810734, "learning_rate": 9.921221853724155e-07, "loss": 0.0224, "step": 30788 }, { "epoch": 3.6510138740661686, "grad_norm": 0.40248421765697767, "learning_rate": 9.914527936836531e-07, "loss": 0.0157, "step": 30789 }, { "epoch": 3.651132455828294, "grad_norm": 0.5366933691422126, "learning_rate": 9.907836233242513e-07, "loss": 0.0218, "step": 30790 }, { "epoch": 3.6512510375904186, "grad_norm": 0.6118009876700656, "learning_rate": 9.90114674300388e-07, "loss": 0.0302, "step": 30791 }, { "epoch": 3.6513696193525433, "grad_norm": 0.37197161259626443, "learning_rate": 9.894459466182172e-07, "loss": 0.015, "step": 30792 }, { "epoch": 3.6514882011146685, "grad_norm": 0.6948446118315804, "learning_rate": 9.887774402839166e-07, "loss": 0.0313, "step": 30793 }, { "epoch": 3.6516067828767937, "grad_norm": 0.48090015200510056, "learning_rate": 9.881091553036403e-07, "loss": 0.0208, "step": 30794 }, { "epoch": 3.6517253646389185, "grad_norm": 0.5653941250367356, "learning_rate": 9.874410916835526e-07, "loss": 0.0374, "step": 30795 }, { "epoch": 3.6518439464010433, "grad_norm": 0.7305041392737872, "learning_rate": 9.867732494298121e-07, "loss": 0.0331, "step": 30796 }, { "epoch": 3.6519625281631685, "grad_norm": 0.7694055851689255, "learning_rate": 9.861056285485754e-07, "loss": 0.0361, "step": 30797 }, { "epoch": 3.6520811099252937, "grad_norm": 0.6995042053314955, "learning_rate": 9.854382290459985e-07, "loss": 0.0293, "step": 30798 }, { "epoch": 3.6521996916874184, "grad_norm": 0.39869941608356846, "learning_rate": 9.847710509282322e-07, "loss": 0.0155, "step": 30799 }, { "epoch": 3.652318273449543, "grad_norm": 0.5368727756555653, "learning_rate": 9.84104094201424e-07, "loss": 0.0236, "step": 30800 }, { "epoch": 3.6524368552116684, "grad_norm": 0.40330970282519696, "learning_rate": 9.834373588717277e-07, "loss": 0.0149, "step": 30801 }, { "epoch": 3.6525554369737936, "grad_norm": 0.447317031892217, "learning_rate": 9.82770844945291e-07, "loss": 0.018, "step": 30802 }, { "epoch": 3.6526740187359183, "grad_norm": 0.5596007059446945, "learning_rate": 9.821045524282506e-07, "loss": 0.0199, "step": 30803 }, { "epoch": 3.6527926004980436, "grad_norm": 0.4267489291614389, "learning_rate": 9.814384813267573e-07, "loss": 0.0195, "step": 30804 }, { "epoch": 3.6529111822601683, "grad_norm": 0.7069155513754719, "learning_rate": 9.80772631646945e-07, "loss": 0.0353, "step": 30805 }, { "epoch": 3.6530297640222935, "grad_norm": 0.37411700463372066, "learning_rate": 9.80107003394956e-07, "loss": 0.0136, "step": 30806 }, { "epoch": 3.6531483457844183, "grad_norm": 0.4908239377776413, "learning_rate": 9.794415965769243e-07, "loss": 0.0154, "step": 30807 }, { "epoch": 3.6532669275465435, "grad_norm": 0.4597669198402222, "learning_rate": 9.787764111989839e-07, "loss": 0.0221, "step": 30808 }, { "epoch": 3.6533855093086682, "grad_norm": 0.6493412652656324, "learning_rate": 9.781114472672742e-07, "loss": 0.0321, "step": 30809 }, { "epoch": 3.6535040910707934, "grad_norm": 0.3101810133702138, "learning_rate": 9.774467047879155e-07, "loss": 0.0111, "step": 30810 }, { "epoch": 3.653622672832918, "grad_norm": 0.525027735025754, "learning_rate": 9.767821837670387e-07, "loss": 0.0228, "step": 30811 }, { "epoch": 3.6537412545950434, "grad_norm": 0.38142228662514205, "learning_rate": 9.761178842107699e-07, "loss": 0.0209, "step": 30812 }, { "epoch": 3.653859836357168, "grad_norm": 1.0769591381145713, "learning_rate": 9.754538061252372e-07, "loss": 0.0509, "step": 30813 }, { "epoch": 3.6539784181192934, "grad_norm": 0.30923598922051543, "learning_rate": 9.747899495165607e-07, "loss": 0.0145, "step": 30814 }, { "epoch": 3.654096999881418, "grad_norm": 0.4214693051517658, "learning_rate": 9.741263143908524e-07, "loss": 0.0143, "step": 30815 }, { "epoch": 3.6542155816435433, "grad_norm": 0.80907303183163, "learning_rate": 9.734629007542462e-07, "loss": 0.0201, "step": 30816 }, { "epoch": 3.654334163405668, "grad_norm": 0.37627840766797627, "learning_rate": 9.727997086128426e-07, "loss": 0.0168, "step": 30817 }, { "epoch": 3.6544527451677933, "grad_norm": 0.33440031495537326, "learning_rate": 9.72136737972762e-07, "loss": 0.0104, "step": 30818 }, { "epoch": 3.654571326929918, "grad_norm": 0.5007304329570129, "learning_rate": 9.714739888401159e-07, "loss": 0.0276, "step": 30819 }, { "epoch": 3.6546899086920432, "grad_norm": 0.6046228305707222, "learning_rate": 9.70811461221019e-07, "loss": 0.0208, "step": 30820 }, { "epoch": 3.654808490454168, "grad_norm": 0.49369147642639954, "learning_rate": 9.70149155121569e-07, "loss": 0.0213, "step": 30821 }, { "epoch": 3.654927072216293, "grad_norm": 0.6989533715383505, "learning_rate": 9.694870705478754e-07, "loss": 0.0325, "step": 30822 }, { "epoch": 3.655045653978418, "grad_norm": 0.4268582798493749, "learning_rate": 9.68825207506044e-07, "loss": 0.0181, "step": 30823 }, { "epoch": 3.655164235740543, "grad_norm": 0.8290503493355689, "learning_rate": 9.681635660021755e-07, "loss": 0.0393, "step": 30824 }, { "epoch": 3.655282817502668, "grad_norm": 0.34679472271012074, "learning_rate": 9.675021460423733e-07, "loss": 0.0182, "step": 30825 }, { "epoch": 3.655401399264793, "grad_norm": 0.6063048616719918, "learning_rate": 9.668409476327245e-07, "loss": 0.0213, "step": 30826 }, { "epoch": 3.6555199810269183, "grad_norm": 0.565818836000189, "learning_rate": 9.66179970779335e-07, "loss": 0.0329, "step": 30827 }, { "epoch": 3.655638562789043, "grad_norm": 0.5794322045689522, "learning_rate": 9.655192154882919e-07, "loss": 0.0237, "step": 30828 }, { "epoch": 3.655757144551168, "grad_norm": 0.4388080369365939, "learning_rate": 9.648586817656873e-07, "loss": 0.03, "step": 30829 }, { "epoch": 3.655875726313293, "grad_norm": 0.5477246779636937, "learning_rate": 9.641983696176138e-07, "loss": 0.0265, "step": 30830 }, { "epoch": 3.6559943080754183, "grad_norm": 0.6887578730893199, "learning_rate": 9.63538279050158e-07, "loss": 0.0282, "step": 30831 }, { "epoch": 3.656112889837543, "grad_norm": 0.46292517563268715, "learning_rate": 9.62878410069401e-07, "loss": 0.0178, "step": 30832 }, { "epoch": 3.6562314715996678, "grad_norm": 0.4913996737944648, "learning_rate": 9.6221876268143e-07, "loss": 0.0176, "step": 30833 }, { "epoch": 3.656350053361793, "grad_norm": 0.7479795661575599, "learning_rate": 9.615593368923258e-07, "loss": 0.0322, "step": 30834 }, { "epoch": 3.656468635123918, "grad_norm": 0.4745376369249839, "learning_rate": 9.609001327081647e-07, "loss": 0.0194, "step": 30835 }, { "epoch": 3.656587216886043, "grad_norm": 0.5931324594177699, "learning_rate": 9.602411501350273e-07, "loss": 0.0305, "step": 30836 }, { "epoch": 3.6567057986481677, "grad_norm": 0.4638733242037661, "learning_rate": 9.59582389178984e-07, "loss": 0.0207, "step": 30837 }, { "epoch": 3.656824380410293, "grad_norm": 0.46698374994242825, "learning_rate": 9.58923849846116e-07, "loss": 0.0194, "step": 30838 }, { "epoch": 3.656942962172418, "grad_norm": 0.4113869771413013, "learning_rate": 9.582655321424855e-07, "loss": 0.0182, "step": 30839 }, { "epoch": 3.657061543934543, "grad_norm": 0.6606641110476835, "learning_rate": 9.576074360741677e-07, "loss": 0.0334, "step": 30840 }, { "epoch": 3.6571801256966676, "grad_norm": 0.4460010299665914, "learning_rate": 9.56949561647222e-07, "loss": 0.0201, "step": 30841 }, { "epoch": 3.657298707458793, "grad_norm": 0.34401701809269297, "learning_rate": 9.562919088677208e-07, "loss": 0.0136, "step": 30842 }, { "epoch": 3.657417289220918, "grad_norm": 0.5417242618608592, "learning_rate": 9.556344777417237e-07, "loss": 0.0249, "step": 30843 }, { "epoch": 3.657535870983043, "grad_norm": 0.9277052165607277, "learning_rate": 9.549772682752894e-07, "loss": 0.0455, "step": 30844 }, { "epoch": 3.6576544527451675, "grad_norm": 0.5366317071165856, "learning_rate": 9.5432028047448e-07, "loss": 0.0311, "step": 30845 }, { "epoch": 3.6577730345072927, "grad_norm": 0.5906026363927463, "learning_rate": 9.536635143453515e-07, "loss": 0.024, "step": 30846 }, { "epoch": 3.657891616269418, "grad_norm": 0.4783951071727468, "learning_rate": 9.530069698939604e-07, "loss": 0.022, "step": 30847 }, { "epoch": 3.6580101980315427, "grad_norm": 0.6612912746958202, "learning_rate": 9.523506471263488e-07, "loss": 0.0272, "step": 30848 }, { "epoch": 3.6581287797936675, "grad_norm": 0.4247209991608297, "learning_rate": 9.516945460485844e-07, "loss": 0.0149, "step": 30849 }, { "epoch": 3.6582473615557927, "grad_norm": 0.3515523193504108, "learning_rate": 9.51038666666701e-07, "loss": 0.0182, "step": 30850 }, { "epoch": 3.658365943317918, "grad_norm": 0.546388834651381, "learning_rate": 9.503830089867549e-07, "loss": 0.02, "step": 30851 }, { "epoch": 3.6584845250800426, "grad_norm": 0.4631248867351941, "learning_rate": 9.497275730147775e-07, "loss": 0.0209, "step": 30852 }, { "epoch": 3.658603106842168, "grad_norm": 0.414679910699122, "learning_rate": 9.490723587568279e-07, "loss": 0.0147, "step": 30853 }, { "epoch": 3.6587216886042926, "grad_norm": 0.38783154791255475, "learning_rate": 9.484173662189344e-07, "loss": 0.0172, "step": 30854 }, { "epoch": 3.658840270366418, "grad_norm": 1.0131366018902623, "learning_rate": 9.477625954071368e-07, "loss": 0.0509, "step": 30855 }, { "epoch": 3.6589588521285425, "grad_norm": 1.0200063174418568, "learning_rate": 9.471080463274746e-07, "loss": 0.0608, "step": 30856 }, { "epoch": 3.6590774338906678, "grad_norm": 0.798895868617623, "learning_rate": 9.464537189859818e-07, "loss": 0.0336, "step": 30857 }, { "epoch": 3.6591960156527925, "grad_norm": 0.661417240755807, "learning_rate": 9.457996133886899e-07, "loss": 0.023, "step": 30858 }, { "epoch": 3.6593145974149177, "grad_norm": 0.4717018135466655, "learning_rate": 9.451457295416243e-07, "loss": 0.0167, "step": 30859 }, { "epoch": 3.6594331791770425, "grad_norm": 0.38828293797291885, "learning_rate": 9.444920674508223e-07, "loss": 0.0141, "step": 30860 }, { "epoch": 3.6595517609391677, "grad_norm": 0.5686359430022389, "learning_rate": 9.43838627122301e-07, "loss": 0.028, "step": 30861 }, { "epoch": 3.6596703427012924, "grad_norm": 0.5428844482373684, "learning_rate": 9.431854085620889e-07, "loss": 0.0242, "step": 30862 }, { "epoch": 3.6597889244634176, "grad_norm": 0.2871714897187784, "learning_rate": 9.425324117762063e-07, "loss": 0.0155, "step": 30863 }, { "epoch": 3.6599075062255424, "grad_norm": 0.46075385599180424, "learning_rate": 9.418796367706762e-07, "loss": 0.0199, "step": 30864 }, { "epoch": 3.6600260879876676, "grad_norm": 0.44900248088590566, "learning_rate": 9.41227083551513e-07, "loss": 0.0206, "step": 30865 }, { "epoch": 3.6601446697497924, "grad_norm": 0.2601244636094451, "learning_rate": 9.405747521247316e-07, "loss": 0.0072, "step": 30866 }, { "epoch": 3.6602632515119176, "grad_norm": 0.46054871423506566, "learning_rate": 9.399226424963492e-07, "loss": 0.0243, "step": 30867 }, { "epoch": 3.6603818332740423, "grad_norm": 0.8279027603106353, "learning_rate": 9.39270754672375e-07, "loss": 0.0364, "step": 30868 }, { "epoch": 3.6605004150361675, "grad_norm": 0.5529850118711301, "learning_rate": 9.386190886588208e-07, "loss": 0.0259, "step": 30869 }, { "epoch": 3.6606189967982923, "grad_norm": 0.6074148993748872, "learning_rate": 9.379676444616902e-07, "loss": 0.021, "step": 30870 }, { "epoch": 3.6607375785604175, "grad_norm": 0.9837107971518343, "learning_rate": 9.373164220869895e-07, "loss": 0.0464, "step": 30871 }, { "epoch": 3.6608561603225422, "grad_norm": 0.7689524711371938, "learning_rate": 9.36665421540725e-07, "loss": 0.0313, "step": 30872 }, { "epoch": 3.6609747420846674, "grad_norm": 0.4954528423238879, "learning_rate": 9.360146428288974e-07, "loss": 0.0195, "step": 30873 }, { "epoch": 3.661093323846792, "grad_norm": 0.41261251581711345, "learning_rate": 9.353640859575075e-07, "loss": 0.0225, "step": 30874 }, { "epoch": 3.6612119056089174, "grad_norm": 0.5687398028956421, "learning_rate": 9.347137509325476e-07, "loss": 0.0251, "step": 30875 }, { "epoch": 3.6613304873710426, "grad_norm": 0.3318706722460189, "learning_rate": 9.340636377600215e-07, "loss": 0.0155, "step": 30876 }, { "epoch": 3.6614490691331674, "grad_norm": 0.4635168721839511, "learning_rate": 9.334137464459103e-07, "loss": 0.0223, "step": 30877 }, { "epoch": 3.661567650895292, "grad_norm": 0.5319375286027146, "learning_rate": 9.327640769962148e-07, "loss": 0.0256, "step": 30878 }, { "epoch": 3.6616862326574173, "grad_norm": 0.8446706374402909, "learning_rate": 9.32114629416922e-07, "loss": 0.0301, "step": 30879 }, { "epoch": 3.6618048144195425, "grad_norm": 0.678952772016791, "learning_rate": 9.314654037140213e-07, "loss": 0.0375, "step": 30880 }, { "epoch": 3.6619233961816673, "grad_norm": 0.4956575255385961, "learning_rate": 9.308163998934888e-07, "loss": 0.0296, "step": 30881 }, { "epoch": 3.662041977943792, "grad_norm": 0.6200457047672995, "learning_rate": 9.301676179613167e-07, "loss": 0.0343, "step": 30882 }, { "epoch": 3.6621605597059173, "grad_norm": 0.6296603630143992, "learning_rate": 9.295190579234808e-07, "loss": 0.0241, "step": 30883 }, { "epoch": 3.6622791414680425, "grad_norm": 0.3606554379426033, "learning_rate": 9.288707197859625e-07, "loss": 0.0167, "step": 30884 }, { "epoch": 3.662397723230167, "grad_norm": 0.6357627425965963, "learning_rate": 9.282226035547432e-07, "loss": 0.03, "step": 30885 }, { "epoch": 3.662516304992292, "grad_norm": 0.911522137551546, "learning_rate": 9.275747092357845e-07, "loss": 0.029, "step": 30886 }, { "epoch": 3.662634886754417, "grad_norm": 0.5366081079204362, "learning_rate": 9.269270368350736e-07, "loss": 0.0271, "step": 30887 }, { "epoch": 3.6627534685165424, "grad_norm": 0.7527376037539756, "learning_rate": 9.262795863585721e-07, "loss": 0.0422, "step": 30888 }, { "epoch": 3.662872050278667, "grad_norm": 0.5631382746496113, "learning_rate": 9.256323578122505e-07, "loss": 0.0293, "step": 30889 }, { "epoch": 3.662990632040792, "grad_norm": 0.39079528946302183, "learning_rate": 9.249853512020789e-07, "loss": 0.0157, "step": 30890 }, { "epoch": 3.663109213802917, "grad_norm": 0.40830515756005875, "learning_rate": 9.243385665340221e-07, "loss": 0.0172, "step": 30891 }, { "epoch": 3.6632277955650423, "grad_norm": 0.4791604822151707, "learning_rate": 9.236920038140362e-07, "loss": 0.0171, "step": 30892 }, { "epoch": 3.663346377327167, "grad_norm": 0.2798772554359452, "learning_rate": 9.23045663048086e-07, "loss": 0.0113, "step": 30893 }, { "epoch": 3.663464959089292, "grad_norm": 0.5319736097890487, "learning_rate": 9.223995442421307e-07, "loss": 0.0208, "step": 30894 }, { "epoch": 3.663583540851417, "grad_norm": 0.5617858124271375, "learning_rate": 9.217536474021266e-07, "loss": 0.0265, "step": 30895 }, { "epoch": 3.6637021226135422, "grad_norm": 0.9052159999147124, "learning_rate": 9.211079725340271e-07, "loss": 0.0541, "step": 30896 }, { "epoch": 3.663820704375667, "grad_norm": 0.6292964858703506, "learning_rate": 9.204625196437805e-07, "loss": 0.0337, "step": 30897 }, { "epoch": 3.6639392861377917, "grad_norm": 0.4911634111467709, "learning_rate": 9.198172887373458e-07, "loss": 0.0215, "step": 30898 }, { "epoch": 3.664057867899917, "grad_norm": 0.45876141442196444, "learning_rate": 9.191722798206653e-07, "loss": 0.0209, "step": 30899 }, { "epoch": 3.664176449662042, "grad_norm": 0.6695254840919819, "learning_rate": 9.185274928996901e-07, "loss": 0.0337, "step": 30900 }, { "epoch": 3.664295031424167, "grad_norm": 0.6018160975907306, "learning_rate": 9.178829279803513e-07, "loss": 0.0277, "step": 30901 }, { "epoch": 3.664413613186292, "grad_norm": 0.720707392462476, "learning_rate": 9.17238585068611e-07, "loss": 0.0274, "step": 30902 }, { "epoch": 3.664532194948417, "grad_norm": 0.30084657655885266, "learning_rate": 9.165944641703922e-07, "loss": 0.0156, "step": 30903 }, { "epoch": 3.664650776710542, "grad_norm": 0.6266154945375715, "learning_rate": 9.1595056529164e-07, "loss": 0.0287, "step": 30904 }, { "epoch": 3.664769358472667, "grad_norm": 0.3551372583815678, "learning_rate": 9.153068884382915e-07, "loss": 0.0177, "step": 30905 }, { "epoch": 3.664887940234792, "grad_norm": 0.38422280261800323, "learning_rate": 9.146634336162779e-07, "loss": 0.0177, "step": 30906 }, { "epoch": 3.665006521996917, "grad_norm": 0.5684370796587013, "learning_rate": 9.140202008315335e-07, "loss": 0.0271, "step": 30907 }, { "epoch": 3.665125103759042, "grad_norm": 0.4837785106329147, "learning_rate": 9.13377190089984e-07, "loss": 0.0227, "step": 30908 }, { "epoch": 3.6652436855211667, "grad_norm": 0.64547701515701, "learning_rate": 9.127344013975636e-07, "loss": 0.0367, "step": 30909 }, { "epoch": 3.665362267283292, "grad_norm": 0.4982831442792032, "learning_rate": 9.120918347601898e-07, "loss": 0.0204, "step": 30910 }, { "epoch": 3.6654808490454167, "grad_norm": 0.35131415105310154, "learning_rate": 9.114494901837939e-07, "loss": 0.0154, "step": 30911 }, { "epoch": 3.665599430807542, "grad_norm": 0.6735958878433023, "learning_rate": 9.108073676742935e-07, "loss": 0.0357, "step": 30912 }, { "epoch": 3.6657180125696667, "grad_norm": 0.707880976949085, "learning_rate": 9.101654672376087e-07, "loss": 0.029, "step": 30913 }, { "epoch": 3.665836594331792, "grad_norm": 0.5656303357251535, "learning_rate": 9.095237888796598e-07, "loss": 0.02, "step": 30914 }, { "epoch": 3.6659551760939166, "grad_norm": 0.6805278309362656, "learning_rate": 9.088823326063534e-07, "loss": 0.0366, "step": 30915 }, { "epoch": 3.666073757856042, "grad_norm": 0.5301975399665498, "learning_rate": 9.082410984236179e-07, "loss": 0.0232, "step": 30916 }, { "epoch": 3.6661923396181666, "grad_norm": 0.3520676776386184, "learning_rate": 9.076000863373513e-07, "loss": 0.0139, "step": 30917 }, { "epoch": 3.666310921380292, "grad_norm": 0.5756134272404034, "learning_rate": 9.069592963534712e-07, "loss": 0.0223, "step": 30918 }, { "epoch": 3.6664295031424166, "grad_norm": 0.6202913243827735, "learning_rate": 9.063187284778757e-07, "loss": 0.0332, "step": 30919 }, { "epoch": 3.6665480849045418, "grad_norm": 0.6124200768609447, "learning_rate": 9.056783827164822e-07, "loss": 0.0273, "step": 30920 }, { "epoch": 3.6666666666666665, "grad_norm": 0.4013732713576773, "learning_rate": 9.050382590751833e-07, "loss": 0.0155, "step": 30921 }, { "epoch": 3.6667852484287917, "grad_norm": 0.28996965331275276, "learning_rate": 9.043983575598852e-07, "loss": 0.0146, "step": 30922 }, { "epoch": 3.6669038301909165, "grad_norm": 0.4433668096754899, "learning_rate": 9.037586781764862e-07, "loss": 0.0211, "step": 30923 }, { "epoch": 3.6670224119530417, "grad_norm": 0.378209223576835, "learning_rate": 9.031192209308842e-07, "loss": 0.0206, "step": 30924 }, { "epoch": 3.6671409937151664, "grad_norm": 0.6405776361755516, "learning_rate": 9.024799858289745e-07, "loss": 0.0305, "step": 30925 }, { "epoch": 3.6672595754772916, "grad_norm": 0.5585057707742388, "learning_rate": 9.01840972876647e-07, "loss": 0.0387, "step": 30926 }, { "epoch": 3.6673781572394164, "grad_norm": 0.7158009707660422, "learning_rate": 9.012021820797967e-07, "loss": 0.0297, "step": 30927 }, { "epoch": 3.6674967390015416, "grad_norm": 0.5173971208680662, "learning_rate": 9.00563613444308e-07, "loss": 0.0306, "step": 30928 }, { "epoch": 3.667615320763667, "grad_norm": 0.856785639817149, "learning_rate": 8.999252669760732e-07, "loss": 0.0298, "step": 30929 }, { "epoch": 3.6677339025257916, "grad_norm": 0.47486922404220744, "learning_rate": 8.992871426809685e-07, "loss": 0.0237, "step": 30930 }, { "epoch": 3.6678524842879163, "grad_norm": 0.5124540675880215, "learning_rate": 8.986492405648889e-07, "loss": 0.0179, "step": 30931 }, { "epoch": 3.6679710660500415, "grad_norm": 0.358752551444331, "learning_rate": 8.980115606337048e-07, "loss": 0.0121, "step": 30932 }, { "epoch": 3.6680896478121667, "grad_norm": 0.6745591900432307, "learning_rate": 8.973741028932975e-07, "loss": 0.0334, "step": 30933 }, { "epoch": 3.6682082295742915, "grad_norm": 0.34787153083001304, "learning_rate": 8.96736867349543e-07, "loss": 0.0156, "step": 30934 }, { "epoch": 3.6683268113364162, "grad_norm": 0.693472835989991, "learning_rate": 8.960998540083199e-07, "loss": 0.0385, "step": 30935 }, { "epoch": 3.6684453930985415, "grad_norm": 0.5877550644877902, "learning_rate": 8.954630628755012e-07, "loss": 0.031, "step": 30936 }, { "epoch": 3.6685639748606667, "grad_norm": 0.20644002232128195, "learning_rate": 8.94826493956949e-07, "loss": 0.0066, "step": 30937 }, { "epoch": 3.6686825566227914, "grad_norm": 0.35438532797739863, "learning_rate": 8.94190147258539e-07, "loss": 0.0113, "step": 30938 }, { "epoch": 3.668801138384916, "grad_norm": 0.3876222792181888, "learning_rate": 8.935540227861333e-07, "loss": 0.0177, "step": 30939 }, { "epoch": 3.6689197201470414, "grad_norm": 0.742175586662613, "learning_rate": 8.929181205456022e-07, "loss": 0.034, "step": 30940 }, { "epoch": 3.6690383019091666, "grad_norm": 0.4740533958900053, "learning_rate": 8.922824405428021e-07, "loss": 0.0244, "step": 30941 }, { "epoch": 3.6691568836712913, "grad_norm": 0.39203447927110163, "learning_rate": 8.916469827835921e-07, "loss": 0.0161, "step": 30942 }, { "epoch": 3.669275465433416, "grad_norm": 0.5081296531294641, "learning_rate": 8.910117472738371e-07, "loss": 0.0205, "step": 30943 }, { "epoch": 3.6693940471955413, "grad_norm": 0.4690022756233995, "learning_rate": 8.903767340193853e-07, "loss": 0.0186, "step": 30944 }, { "epoch": 3.6695126289576665, "grad_norm": 0.9357953162376734, "learning_rate": 8.897419430260984e-07, "loss": 0.0394, "step": 30945 }, { "epoch": 3.6696312107197913, "grad_norm": 0.47406042166553075, "learning_rate": 8.891073742998219e-07, "loss": 0.0179, "step": 30946 }, { "epoch": 3.669749792481916, "grad_norm": 0.6840476500366959, "learning_rate": 8.884730278464149e-07, "loss": 0.0303, "step": 30947 }, { "epoch": 3.669868374244041, "grad_norm": 0.4367723782072395, "learning_rate": 8.878389036717144e-07, "loss": 0.0185, "step": 30948 }, { "epoch": 3.6699869560061664, "grad_norm": 0.2528627890750506, "learning_rate": 8.872050017815686e-07, "loss": 0.0114, "step": 30949 }, { "epoch": 3.670105537768291, "grad_norm": 0.43238102603355677, "learning_rate": 8.865713221818284e-07, "loss": 0.0188, "step": 30950 }, { "epoch": 3.6702241195304164, "grad_norm": 0.677407653991715, "learning_rate": 8.859378648783306e-07, "loss": 0.045, "step": 30951 }, { "epoch": 3.670342701292541, "grad_norm": 0.8367211666094263, "learning_rate": 8.853046298769124e-07, "loss": 0.027, "step": 30952 }, { "epoch": 3.6704612830546663, "grad_norm": 0.4553437450861238, "learning_rate": 8.846716171834135e-07, "loss": 0.0323, "step": 30953 }, { "epoch": 3.670579864816791, "grad_norm": 0.32630348618218813, "learning_rate": 8.840388268036709e-07, "loss": 0.0125, "step": 30954 }, { "epoch": 3.6706984465789163, "grad_norm": 0.5689989681857858, "learning_rate": 8.834062587435188e-07, "loss": 0.0315, "step": 30955 }, { "epoch": 3.670817028341041, "grad_norm": 0.5400152610659043, "learning_rate": 8.827739130087887e-07, "loss": 0.0248, "step": 30956 }, { "epoch": 3.6709356101031663, "grad_norm": 0.6058638383539696, "learning_rate": 8.821417896053036e-07, "loss": 0.031, "step": 30957 }, { "epoch": 3.671054191865291, "grad_norm": 0.4542127264684431, "learning_rate": 8.815098885389006e-07, "loss": 0.0168, "step": 30958 }, { "epoch": 3.6711727736274162, "grad_norm": 0.4495515802525583, "learning_rate": 8.808782098153973e-07, "loss": 0.0207, "step": 30959 }, { "epoch": 3.671291355389541, "grad_norm": 0.4688621800426598, "learning_rate": 8.802467534406222e-07, "loss": 0.0278, "step": 30960 }, { "epoch": 3.671409937151666, "grad_norm": 0.7501002129598162, "learning_rate": 8.796155194203931e-07, "loss": 0.0248, "step": 30961 }, { "epoch": 3.671528518913791, "grad_norm": 0.4199566565800323, "learning_rate": 8.789845077605302e-07, "loss": 0.0206, "step": 30962 }, { "epoch": 3.671647100675916, "grad_norm": 0.5892107545976208, "learning_rate": 8.783537184668539e-07, "loss": 0.0338, "step": 30963 }, { "epoch": 3.671765682438041, "grad_norm": 0.47722877849869033, "learning_rate": 8.777231515451706e-07, "loss": 0.0265, "step": 30964 }, { "epoch": 3.671884264200166, "grad_norm": 0.660480573001666, "learning_rate": 8.770928070013062e-07, "loss": 0.0349, "step": 30965 }, { "epoch": 3.672002845962291, "grad_norm": 0.7151338539339837, "learning_rate": 8.76462684841059e-07, "loss": 0.0399, "step": 30966 }, { "epoch": 3.672121427724416, "grad_norm": 0.37616926791604455, "learning_rate": 8.758327850702492e-07, "loss": 0.0192, "step": 30967 }, { "epoch": 3.672240009486541, "grad_norm": 0.7227406541561577, "learning_rate": 8.752031076946721e-07, "loss": 0.035, "step": 30968 }, { "epoch": 3.672358591248666, "grad_norm": 0.37029970446330834, "learning_rate": 8.745736527201425e-07, "loss": 0.0181, "step": 30969 }, { "epoch": 3.672477173010791, "grad_norm": 0.68623730173007, "learning_rate": 8.739444201524588e-07, "loss": 0.0383, "step": 30970 }, { "epoch": 3.672595754772916, "grad_norm": 0.8037575385402159, "learning_rate": 8.733154099974189e-07, "loss": 0.0515, "step": 30971 }, { "epoch": 3.6727143365350408, "grad_norm": 0.289093489947977, "learning_rate": 8.726866222608293e-07, "loss": 0.0163, "step": 30972 }, { "epoch": 3.672832918297166, "grad_norm": 0.7216050303132983, "learning_rate": 8.720580569484771e-07, "loss": 0.0257, "step": 30973 }, { "epoch": 3.6729515000592907, "grad_norm": 0.37070657417375275, "learning_rate": 8.714297140661687e-07, "loss": 0.0144, "step": 30974 }, { "epoch": 3.673070081821416, "grad_norm": 0.5348702616097799, "learning_rate": 8.708015936196828e-07, "loss": 0.0264, "step": 30975 }, { "epoch": 3.6731886635835407, "grad_norm": 0.5146877966107607, "learning_rate": 8.701736956148232e-07, "loss": 0.0242, "step": 30976 }, { "epoch": 3.673307245345666, "grad_norm": 0.6441353332639412, "learning_rate": 8.695460200573686e-07, "loss": 0.0247, "step": 30977 }, { "epoch": 3.673425827107791, "grad_norm": 0.5522964719391228, "learning_rate": 8.689185669531141e-07, "loss": 0.0248, "step": 30978 }, { "epoch": 3.673544408869916, "grad_norm": 0.5050576232727226, "learning_rate": 8.682913363078304e-07, "loss": 0.026, "step": 30979 }, { "epoch": 3.6736629906320406, "grad_norm": 0.41379690370258576, "learning_rate": 8.676643281273156e-07, "loss": 0.0235, "step": 30980 }, { "epoch": 3.673781572394166, "grad_norm": 0.6642363858363932, "learning_rate": 8.6703754241734e-07, "loss": 0.0335, "step": 30981 }, { "epoch": 3.673900154156291, "grad_norm": 0.624224495580249, "learning_rate": 8.664109791836822e-07, "loss": 0.0283, "step": 30982 }, { "epoch": 3.6740187359184158, "grad_norm": 0.6861443067786523, "learning_rate": 8.657846384321239e-07, "loss": 0.0269, "step": 30983 }, { "epoch": 3.6741373176805405, "grad_norm": 0.5040350256554098, "learning_rate": 8.651585201684326e-07, "loss": 0.0184, "step": 30984 }, { "epoch": 3.6742558994426657, "grad_norm": 0.5416621623185655, "learning_rate": 8.64532624398387e-07, "loss": 0.0308, "step": 30985 }, { "epoch": 3.674374481204791, "grad_norm": 0.43274522347537525, "learning_rate": 8.63906951127752e-07, "loss": 0.0249, "step": 30986 }, { "epoch": 3.6744930629669157, "grad_norm": 0.4017598993173844, "learning_rate": 8.632815003622979e-07, "loss": 0.0228, "step": 30987 }, { "epoch": 3.6746116447290404, "grad_norm": 0.5308564369162935, "learning_rate": 8.626562721077896e-07, "loss": 0.0239, "step": 30988 }, { "epoch": 3.6747302264911657, "grad_norm": 0.29768047631860317, "learning_rate": 8.620312663699948e-07, "loss": 0.0111, "step": 30989 }, { "epoch": 3.674848808253291, "grad_norm": 0.6228262338974714, "learning_rate": 8.614064831546642e-07, "loss": 0.0199, "step": 30990 }, { "epoch": 3.6749673900154156, "grad_norm": 0.7120921228219405, "learning_rate": 8.607819224675711e-07, "loss": 0.0372, "step": 30991 }, { "epoch": 3.6750859717775404, "grad_norm": 0.48130264006553514, "learning_rate": 8.601575843144666e-07, "loss": 0.0235, "step": 30992 }, { "epoch": 3.6752045535396656, "grad_norm": 0.6717827607697575, "learning_rate": 8.595334687011042e-07, "loss": 0.0385, "step": 30993 }, { "epoch": 3.6753231353017908, "grad_norm": 0.629659991013707, "learning_rate": 8.589095756332433e-07, "loss": 0.0317, "step": 30994 }, { "epoch": 3.6754417170639155, "grad_norm": 0.5517202716537072, "learning_rate": 8.582859051166292e-07, "loss": 0.0285, "step": 30995 }, { "epoch": 3.6755602988260403, "grad_norm": 0.42879857322283244, "learning_rate": 8.576624571570186e-07, "loss": 0.0132, "step": 30996 }, { "epoch": 3.6756788805881655, "grad_norm": 0.4767837266269312, "learning_rate": 8.570392317601511e-07, "loss": 0.0194, "step": 30997 }, { "epoch": 3.6757974623502907, "grad_norm": 0.36000646397022507, "learning_rate": 8.564162289317778e-07, "loss": 0.0182, "step": 30998 }, { "epoch": 3.6759160441124155, "grad_norm": 0.4381234092106162, "learning_rate": 8.557934486776386e-07, "loss": 0.0236, "step": 30999 }, { "epoch": 3.67603462587454, "grad_norm": 0.42100517512705765, "learning_rate": 8.551708910034789e-07, "loss": 0.0203, "step": 31000 }, { "epoch": 3.6761532076366654, "grad_norm": 0.5065920547684708, "learning_rate": 8.545485559150301e-07, "loss": 0.023, "step": 31001 }, { "epoch": 3.6762717893987906, "grad_norm": 0.5791383190331865, "learning_rate": 8.539264434180405e-07, "loss": 0.0202, "step": 31002 }, { "epoch": 3.6763903711609154, "grad_norm": 0.49688491655053607, "learning_rate": 8.53304553518236e-07, "loss": 0.025, "step": 31003 }, { "epoch": 3.6765089529230406, "grad_norm": 0.5689661727122218, "learning_rate": 8.526828862213537e-07, "loss": 0.0289, "step": 31004 }, { "epoch": 3.6766275346851653, "grad_norm": 0.5247177986549756, "learning_rate": 8.520614415331224e-07, "loss": 0.0192, "step": 31005 }, { "epoch": 3.6767461164472905, "grad_norm": 0.6161939806146902, "learning_rate": 8.514402194592736e-07, "loss": 0.036, "step": 31006 }, { "epoch": 3.6768646982094153, "grad_norm": 0.5636870147778875, "learning_rate": 8.508192200055359e-07, "loss": 0.0266, "step": 31007 }, { "epoch": 3.6769832799715405, "grad_norm": 0.5159608521991481, "learning_rate": 8.501984431776272e-07, "loss": 0.0215, "step": 31008 }, { "epoch": 3.6771018617336653, "grad_norm": 0.28251324617087775, "learning_rate": 8.49577888981276e-07, "loss": 0.0106, "step": 31009 }, { "epoch": 3.6772204434957905, "grad_norm": 0.5984082714944161, "learning_rate": 8.489575574222003e-07, "loss": 0.0257, "step": 31010 }, { "epoch": 3.6773390252579152, "grad_norm": 0.4912838141915146, "learning_rate": 8.483374485061229e-07, "loss": 0.0152, "step": 31011 }, { "epoch": 3.6774576070200404, "grad_norm": 0.8073189789271624, "learning_rate": 8.477175622387562e-07, "loss": 0.0326, "step": 31012 }, { "epoch": 3.677576188782165, "grad_norm": 0.45744195900806006, "learning_rate": 8.47097898625815e-07, "loss": 0.0207, "step": 31013 }, { "epoch": 3.6776947705442904, "grad_norm": 0.31216457758909527, "learning_rate": 8.464784576730111e-07, "loss": 0.0123, "step": 31014 }, { "epoch": 3.677813352306415, "grad_norm": 0.4312776502407389, "learning_rate": 8.458592393860598e-07, "loss": 0.0223, "step": 31015 }, { "epoch": 3.6779319340685404, "grad_norm": 1.0047102353464887, "learning_rate": 8.452402437706647e-07, "loss": 0.0413, "step": 31016 }, { "epoch": 3.678050515830665, "grad_norm": 0.5907929899243941, "learning_rate": 8.446214708325351e-07, "loss": 0.02, "step": 31017 }, { "epoch": 3.6781690975927903, "grad_norm": 0.7895947904983134, "learning_rate": 8.440029205773747e-07, "loss": 0.0338, "step": 31018 }, { "epoch": 3.678287679354915, "grad_norm": 0.5993377225567401, "learning_rate": 8.433845930108846e-07, "loss": 0.0307, "step": 31019 }, { "epoch": 3.6784062611170403, "grad_norm": 0.6315315304680627, "learning_rate": 8.427664881387659e-07, "loss": 0.0449, "step": 31020 }, { "epoch": 3.678524842879165, "grad_norm": 0.631612247490349, "learning_rate": 8.421486059667167e-07, "loss": 0.0292, "step": 31021 }, { "epoch": 3.6786434246412902, "grad_norm": 0.618848082194987, "learning_rate": 8.415309465004323e-07, "loss": 0.0281, "step": 31022 }, { "epoch": 3.678762006403415, "grad_norm": 0.4547792016610691, "learning_rate": 8.409135097456111e-07, "loss": 0.0229, "step": 31023 }, { "epoch": 3.67888058816554, "grad_norm": 0.6539784267942285, "learning_rate": 8.402962957079347e-07, "loss": 0.0353, "step": 31024 }, { "epoch": 3.678999169927665, "grad_norm": 0.5724732076765425, "learning_rate": 8.396793043931067e-07, "loss": 0.0273, "step": 31025 }, { "epoch": 3.67911775168979, "grad_norm": 0.8930016626888366, "learning_rate": 8.390625358068033e-07, "loss": 0.0488, "step": 31026 }, { "epoch": 3.6792363334519154, "grad_norm": 0.34802632108334136, "learning_rate": 8.384459899547198e-07, "loss": 0.0117, "step": 31027 }, { "epoch": 3.67935491521404, "grad_norm": 0.30916512695883025, "learning_rate": 8.378296668425295e-07, "loss": 0.0128, "step": 31028 }, { "epoch": 3.679473496976165, "grad_norm": 0.3902179935393511, "learning_rate": 8.372135664759279e-07, "loss": 0.0139, "step": 31029 }, { "epoch": 3.67959207873829, "grad_norm": 0.8029867940838362, "learning_rate": 8.365976888605798e-07, "loss": 0.0427, "step": 31030 }, { "epoch": 3.6797106605004153, "grad_norm": 0.7202261244116787, "learning_rate": 8.359820340021724e-07, "loss": 0.0284, "step": 31031 }, { "epoch": 3.67982924226254, "grad_norm": 0.592786095646171, "learning_rate": 8.353666019063789e-07, "loss": 0.0279, "step": 31032 }, { "epoch": 3.679947824024665, "grad_norm": 0.35101995542616554, "learning_rate": 8.347513925788725e-07, "loss": 0.0112, "step": 31033 }, { "epoch": 3.68006640578679, "grad_norm": 0.2999752415260466, "learning_rate": 8.341364060253265e-07, "loss": 0.0109, "step": 31034 }, { "epoch": 3.680184987548915, "grad_norm": 0.6154157561690549, "learning_rate": 8.335216422514031e-07, "loss": 0.026, "step": 31035 }, { "epoch": 3.68030356931104, "grad_norm": 0.6331598337235191, "learning_rate": 8.329071012627837e-07, "loss": 0.0236, "step": 31036 }, { "epoch": 3.6804221510731647, "grad_norm": 0.6333238992811661, "learning_rate": 8.322927830651195e-07, "loss": 0.0191, "step": 31037 }, { "epoch": 3.68054073283529, "grad_norm": 0.8261202156433725, "learning_rate": 8.316786876640837e-07, "loss": 0.0335, "step": 31038 }, { "epoch": 3.680659314597415, "grad_norm": 0.665378765508391, "learning_rate": 8.310648150653272e-07, "loss": 0.0238, "step": 31039 }, { "epoch": 3.68077789635954, "grad_norm": 0.5881760994153129, "learning_rate": 8.304511652745206e-07, "loss": 0.0227, "step": 31040 }, { "epoch": 3.6808964781216647, "grad_norm": 0.47219267218960254, "learning_rate": 8.298377382973121e-07, "loss": 0.0204, "step": 31041 }, { "epoch": 3.68101505988379, "grad_norm": 0.551064278065902, "learning_rate": 8.292245341393612e-07, "loss": 0.0248, "step": 31042 }, { "epoch": 3.681133641645915, "grad_norm": 0.7203451733942626, "learning_rate": 8.286115528063188e-07, "loss": 0.0363, "step": 31043 }, { "epoch": 3.68125222340804, "grad_norm": 0.40003720799194376, "learning_rate": 8.279987943038387e-07, "loss": 0.0216, "step": 31044 }, { "epoch": 3.6813708051701646, "grad_norm": 0.48121897748447956, "learning_rate": 8.273862586375691e-07, "loss": 0.0204, "step": 31045 }, { "epoch": 3.6814893869322898, "grad_norm": 0.39263378663840787, "learning_rate": 8.267739458131501e-07, "loss": 0.0169, "step": 31046 }, { "epoch": 3.681607968694415, "grad_norm": 0.6749475826533218, "learning_rate": 8.26161855836241e-07, "loss": 0.0463, "step": 31047 }, { "epoch": 3.6817265504565397, "grad_norm": 0.8096647888596742, "learning_rate": 8.255499887124707e-07, "loss": 0.0419, "step": 31048 }, { "epoch": 3.6818451322186645, "grad_norm": 0.5794217958417276, "learning_rate": 8.249383444474873e-07, "loss": 0.0216, "step": 31049 }, { "epoch": 3.6819637139807897, "grad_norm": 0.6992044479008122, "learning_rate": 8.243269230469197e-07, "loss": 0.0233, "step": 31050 }, { "epoch": 3.682082295742915, "grad_norm": 0.6003891476074346, "learning_rate": 8.23715724516419e-07, "loss": 0.0239, "step": 31051 }, { "epoch": 3.6822008775050397, "grad_norm": 0.4155876329533847, "learning_rate": 8.231047488616112e-07, "loss": 0.0198, "step": 31052 }, { "epoch": 3.682319459267165, "grad_norm": 0.699387247369998, "learning_rate": 8.224939960881278e-07, "loss": 0.0323, "step": 31053 }, { "epoch": 3.6824380410292896, "grad_norm": 0.7449455336270528, "learning_rate": 8.218834662016033e-07, "loss": 0.0434, "step": 31054 }, { "epoch": 3.682556622791415, "grad_norm": 0.6183202959480373, "learning_rate": 8.212731592076612e-07, "loss": 0.0176, "step": 31055 }, { "epoch": 3.6826752045535396, "grad_norm": 0.7770295856396513, "learning_rate": 8.206630751119354e-07, "loss": 0.0334, "step": 31056 }, { "epoch": 3.682793786315665, "grad_norm": 0.554456146476468, "learning_rate": 8.20053213920044e-07, "loss": 0.0322, "step": 31057 }, { "epoch": 3.6829123680777895, "grad_norm": 0.7562713711584284, "learning_rate": 8.194435756376074e-07, "loss": 0.0316, "step": 31058 }, { "epoch": 3.6830309498399147, "grad_norm": 0.5959293332778908, "learning_rate": 8.188341602702515e-07, "loss": 0.0327, "step": 31059 }, { "epoch": 3.6831495316020395, "grad_norm": 0.42422435727679964, "learning_rate": 8.182249678235915e-07, "loss": 0.0208, "step": 31060 }, { "epoch": 3.6832681133641647, "grad_norm": 0.6170293774724901, "learning_rate": 8.176159983032422e-07, "loss": 0.0294, "step": 31061 }, { "epoch": 3.6833866951262895, "grad_norm": 0.6533088547282592, "learning_rate": 8.170072517148214e-07, "loss": 0.0242, "step": 31062 }, { "epoch": 3.6835052768884147, "grad_norm": 0.7141298163907617, "learning_rate": 8.163987280639357e-07, "loss": 0.0375, "step": 31063 }, { "epoch": 3.6836238586505394, "grad_norm": 0.3136099054609796, "learning_rate": 8.157904273562e-07, "loss": 0.0155, "step": 31064 }, { "epoch": 3.6837424404126646, "grad_norm": 0.3599092439151212, "learning_rate": 8.151823495972183e-07, "loss": 0.0185, "step": 31065 }, { "epoch": 3.6838610221747894, "grad_norm": 0.31600163797290093, "learning_rate": 8.14574494792597e-07, "loss": 0.0142, "step": 31066 }, { "epoch": 3.6839796039369146, "grad_norm": 0.3418522333390079, "learning_rate": 8.139668629479457e-07, "loss": 0.0161, "step": 31067 }, { "epoch": 3.6840981856990394, "grad_norm": 0.7509296018734232, "learning_rate": 8.133594540688571e-07, "loss": 0.0288, "step": 31068 }, { "epoch": 3.6842167674611646, "grad_norm": 0.6091425455009665, "learning_rate": 8.12752268160935e-07, "loss": 0.0293, "step": 31069 }, { "epoch": 3.6843353492232893, "grad_norm": 0.6797997177598647, "learning_rate": 8.121453052297778e-07, "loss": 0.0341, "step": 31070 }, { "epoch": 3.6844539309854145, "grad_norm": 0.9569669631265709, "learning_rate": 8.115385652809782e-07, "loss": 0.0269, "step": 31071 }, { "epoch": 3.6845725127475393, "grad_norm": 0.330402364399142, "learning_rate": 8.109320483201343e-07, "loss": 0.0124, "step": 31072 }, { "epoch": 3.6846910945096645, "grad_norm": 0.7375289239634463, "learning_rate": 8.103257543528281e-07, "loss": 0.0438, "step": 31073 }, { "epoch": 3.6848096762717892, "grad_norm": 0.4513635765035643, "learning_rate": 8.097196833846632e-07, "loss": 0.0211, "step": 31074 }, { "epoch": 3.6849282580339144, "grad_norm": 0.47530939405521194, "learning_rate": 8.09113835421213e-07, "loss": 0.0198, "step": 31075 }, { "epoch": 3.6850468397960396, "grad_norm": 0.6236374306539888, "learning_rate": 8.085082104680702e-07, "loss": 0.0427, "step": 31076 }, { "epoch": 3.6851654215581644, "grad_norm": 0.4122361214565143, "learning_rate": 8.079028085308165e-07, "loss": 0.0207, "step": 31077 }, { "epoch": 3.685284003320289, "grad_norm": 0.7111291912863849, "learning_rate": 8.072976296150337e-07, "loss": 0.0388, "step": 31078 }, { "epoch": 3.6854025850824144, "grad_norm": 0.7298112645791046, "learning_rate": 8.066926737262975e-07, "loss": 0.0329, "step": 31079 }, { "epoch": 3.6855211668445396, "grad_norm": 0.3843943825015927, "learning_rate": 8.060879408701871e-07, "loss": 0.0129, "step": 31080 }, { "epoch": 3.6856397486066643, "grad_norm": 0.3351049750289751, "learning_rate": 8.054834310522785e-07, "loss": 0.0187, "step": 31081 }, { "epoch": 3.685758330368789, "grad_norm": 0.5189038476423049, "learning_rate": 8.048791442781423e-07, "loss": 0.0251, "step": 31082 }, { "epoch": 3.6858769121309143, "grad_norm": 0.5006967971833182, "learning_rate": 8.042750805533517e-07, "loss": 0.0242, "step": 31083 }, { "epoch": 3.6859954938930395, "grad_norm": 0.5563056094132002, "learning_rate": 8.036712398834717e-07, "loss": 0.029, "step": 31084 }, { "epoch": 3.6861140756551642, "grad_norm": 0.4974943751428712, "learning_rate": 8.030676222740757e-07, "loss": 0.0243, "step": 31085 }, { "epoch": 3.686232657417289, "grad_norm": 0.4499895482032899, "learning_rate": 8.024642277307204e-07, "loss": 0.0202, "step": 31086 }, { "epoch": 3.686351239179414, "grad_norm": 0.9161867630380179, "learning_rate": 8.018610562589707e-07, "loss": 0.028, "step": 31087 }, { "epoch": 3.6864698209415394, "grad_norm": 0.5329193168078035, "learning_rate": 8.012581078643915e-07, "loss": 0.0234, "step": 31088 }, { "epoch": 3.686588402703664, "grad_norm": 0.38640788504729523, "learning_rate": 8.006553825525398e-07, "loss": 0.0161, "step": 31089 }, { "epoch": 3.686706984465789, "grad_norm": 0.7101024351683864, "learning_rate": 8.000528803289665e-07, "loss": 0.0314, "step": 31090 }, { "epoch": 3.686825566227914, "grad_norm": 0.6788177666227772, "learning_rate": 7.994506011992309e-07, "loss": 0.0223, "step": 31091 }, { "epoch": 3.6869441479900393, "grad_norm": 0.38112600516643547, "learning_rate": 7.988485451688815e-07, "loss": 0.0152, "step": 31092 }, { "epoch": 3.687062729752164, "grad_norm": 0.37316223796857434, "learning_rate": 7.98246712243475e-07, "loss": 0.0159, "step": 31093 }, { "epoch": 3.687181311514289, "grad_norm": 0.5866800494143181, "learning_rate": 7.97645102428557e-07, "loss": 0.0282, "step": 31094 }, { "epoch": 3.687299893276414, "grad_norm": 0.5039922277197196, "learning_rate": 7.970437157296645e-07, "loss": 0.0279, "step": 31095 }, { "epoch": 3.6874184750385393, "grad_norm": 0.5038896425097493, "learning_rate": 7.964425521523572e-07, "loss": 0.0195, "step": 31096 }, { "epoch": 3.687537056800664, "grad_norm": 1.0636196785676488, "learning_rate": 7.958416117021666e-07, "loss": 0.038, "step": 31097 }, { "epoch": 3.6876556385627888, "grad_norm": 0.3432839669125862, "learning_rate": 7.952408943846357e-07, "loss": 0.0109, "step": 31098 }, { "epoch": 3.687774220324914, "grad_norm": 0.39178069349332206, "learning_rate": 7.946404002052988e-07, "loss": 0.0141, "step": 31099 }, { "epoch": 3.687892802087039, "grad_norm": 0.746880688328633, "learning_rate": 7.940401291696986e-07, "loss": 0.0301, "step": 31100 }, { "epoch": 3.688011383849164, "grad_norm": 0.4903706007477691, "learning_rate": 7.934400812833642e-07, "loss": 0.02, "step": 31101 }, { "epoch": 3.688129965611289, "grad_norm": 0.6196196371217465, "learning_rate": 7.928402565518272e-07, "loss": 0.0327, "step": 31102 }, { "epoch": 3.688248547373414, "grad_norm": 0.4651792272124165, "learning_rate": 7.922406549806166e-07, "loss": 0.0181, "step": 31103 }, { "epoch": 3.688367129135539, "grad_norm": 0.569524432879516, "learning_rate": 7.91641276575264e-07, "loss": 0.0261, "step": 31104 }, { "epoch": 3.688485710897664, "grad_norm": 0.4445231348498981, "learning_rate": 7.910421213412956e-07, "loss": 0.018, "step": 31105 }, { "epoch": 3.688604292659789, "grad_norm": 0.3984083986516453, "learning_rate": 7.904431892842262e-07, "loss": 0.0202, "step": 31106 }, { "epoch": 3.688722874421914, "grad_norm": 0.618962955225649, "learning_rate": 7.89844480409585e-07, "loss": 0.0311, "step": 31107 }, { "epoch": 3.688841456184039, "grad_norm": 0.7136373396152241, "learning_rate": 7.892459947228898e-07, "loss": 0.026, "step": 31108 }, { "epoch": 3.688960037946164, "grad_norm": 0.6102862763583974, "learning_rate": 7.886477322296609e-07, "loss": 0.0258, "step": 31109 }, { "epoch": 3.689078619708289, "grad_norm": 0.7410367838782559, "learning_rate": 7.880496929354026e-07, "loss": 0.0538, "step": 31110 }, { "epoch": 3.6891972014704137, "grad_norm": 0.5970455275311776, "learning_rate": 7.874518768456407e-07, "loss": 0.0262, "step": 31111 }, { "epoch": 3.689315783232539, "grad_norm": 0.3101430918328745, "learning_rate": 7.868542839658793e-07, "loss": 0.0157, "step": 31112 }, { "epoch": 3.6894343649946637, "grad_norm": 0.9408374288569628, "learning_rate": 7.86256914301628e-07, "loss": 0.0434, "step": 31113 }, { "epoch": 3.689552946756789, "grad_norm": 0.5612452136919589, "learning_rate": 7.85659767858396e-07, "loss": 0.0207, "step": 31114 }, { "epoch": 3.6896715285189137, "grad_norm": 0.4392178170506111, "learning_rate": 7.850628446416874e-07, "loss": 0.0171, "step": 31115 }, { "epoch": 3.689790110281039, "grad_norm": 0.4122150982475814, "learning_rate": 7.844661446570089e-07, "loss": 0.0186, "step": 31116 }, { "epoch": 3.6899086920431636, "grad_norm": 0.8078878929814375, "learning_rate": 7.838696679098506e-07, "loss": 0.0364, "step": 31117 }, { "epoch": 3.690027273805289, "grad_norm": 0.5118488919268804, "learning_rate": 7.832734144057246e-07, "loss": 0.0256, "step": 31118 }, { "epoch": 3.6901458555674136, "grad_norm": 1.0225946092604805, "learning_rate": 7.826773841501183e-07, "loss": 0.0321, "step": 31119 }, { "epoch": 3.690264437329539, "grad_norm": 0.8165477889522688, "learning_rate": 7.820815771485274e-07, "loss": 0.0554, "step": 31120 }, { "epoch": 3.6903830190916636, "grad_norm": 0.6329464680879681, "learning_rate": 7.814859934064472e-07, "loss": 0.0269, "step": 31121 }, { "epoch": 3.6905016008537888, "grad_norm": 0.49148743744451706, "learning_rate": 7.808906329293681e-07, "loss": 0.0226, "step": 31122 }, { "epoch": 3.6906201826159135, "grad_norm": 0.5143997966978379, "learning_rate": 7.802954957227826e-07, "loss": 0.0213, "step": 31123 }, { "epoch": 3.6907387643780387, "grad_norm": 0.4192954592418635, "learning_rate": 7.797005817921671e-07, "loss": 0.0198, "step": 31124 }, { "epoch": 3.690857346140164, "grad_norm": 0.5935368138415941, "learning_rate": 7.791058911430116e-07, "loss": 0.0309, "step": 31125 }, { "epoch": 3.6909759279022887, "grad_norm": 1.090273988736509, "learning_rate": 7.785114237807978e-07, "loss": 0.0555, "step": 31126 }, { "epoch": 3.6910945096644134, "grad_norm": 0.6411947610587434, "learning_rate": 7.779171797110102e-07, "loss": 0.0349, "step": 31127 }, { "epoch": 3.6912130914265386, "grad_norm": 0.4023042991185588, "learning_rate": 7.773231589391194e-07, "loss": 0.0169, "step": 31128 }, { "epoch": 3.691331673188664, "grad_norm": 0.42260624419506343, "learning_rate": 7.767293614706073e-07, "loss": 0.0195, "step": 31129 }, { "epoch": 3.6914502549507886, "grad_norm": 0.35146807756829457, "learning_rate": 7.761357873109443e-07, "loss": 0.0189, "step": 31130 }, { "epoch": 3.6915688367129134, "grad_norm": 0.35686718901959374, "learning_rate": 7.755424364656039e-07, "loss": 0.0145, "step": 31131 }, { "epoch": 3.6916874184750386, "grad_norm": 0.5624804956482087, "learning_rate": 7.749493089400567e-07, "loss": 0.0268, "step": 31132 }, { "epoch": 3.6918060002371638, "grad_norm": 0.8589823952346173, "learning_rate": 7.743564047397706e-07, "loss": 0.0454, "step": 31133 }, { "epoch": 3.6919245819992885, "grad_norm": 0.6323241528245358, "learning_rate": 7.737637238702133e-07, "loss": 0.0329, "step": 31134 }, { "epoch": 3.6920431637614133, "grad_norm": 0.5000001347363523, "learning_rate": 7.731712663368417e-07, "loss": 0.0331, "step": 31135 }, { "epoch": 3.6921617455235385, "grad_norm": 0.9439198216396, "learning_rate": 7.725790321451237e-07, "loss": 0.0348, "step": 31136 }, { "epoch": 3.6922803272856637, "grad_norm": 0.6102319540932453, "learning_rate": 7.719870213005187e-07, "loss": 0.0305, "step": 31137 }, { "epoch": 3.6923989090477884, "grad_norm": 0.6981386360997802, "learning_rate": 7.713952338084834e-07, "loss": 0.0321, "step": 31138 }, { "epoch": 3.692517490809913, "grad_norm": 0.5833688014957914, "learning_rate": 7.70803669674472e-07, "loss": 0.0248, "step": 31139 }, { "epoch": 3.6926360725720384, "grad_norm": 0.45319779110768615, "learning_rate": 7.702123289039381e-07, "loss": 0.0226, "step": 31140 }, { "epoch": 3.6927546543341636, "grad_norm": 0.482261397445065, "learning_rate": 7.69621211502336e-07, "loss": 0.0266, "step": 31141 }, { "epoch": 3.6928732360962884, "grad_norm": 0.5521177316627854, "learning_rate": 7.690303174751112e-07, "loss": 0.0294, "step": 31142 }, { "epoch": 3.692991817858413, "grad_norm": 0.6589551505166904, "learning_rate": 7.684396468277149e-07, "loss": 0.0295, "step": 31143 }, { "epoch": 3.6931103996205383, "grad_norm": 0.5225863010846564, "learning_rate": 7.678491995655873e-07, "loss": 0.0215, "step": 31144 }, { "epoch": 3.6932289813826635, "grad_norm": 0.2919543817321376, "learning_rate": 7.672589756941767e-07, "loss": 0.0161, "step": 31145 }, { "epoch": 3.6933475631447883, "grad_norm": 0.4952607691905356, "learning_rate": 7.666689752189232e-07, "loss": 0.0221, "step": 31146 }, { "epoch": 3.693466144906913, "grad_norm": 0.5527490474816786, "learning_rate": 7.660791981452614e-07, "loss": 0.0183, "step": 31147 }, { "epoch": 3.6935847266690383, "grad_norm": 0.7364622441443655, "learning_rate": 7.654896444786341e-07, "loss": 0.0341, "step": 31148 }, { "epoch": 3.6937033084311635, "grad_norm": 0.3527831226116415, "learning_rate": 7.64900314224476e-07, "loss": 0.0229, "step": 31149 }, { "epoch": 3.693821890193288, "grad_norm": 0.6481058475214787, "learning_rate": 7.643112073882158e-07, "loss": 0.0292, "step": 31150 }, { "epoch": 3.6939404719554134, "grad_norm": 0.7003285298342836, "learning_rate": 7.637223239752856e-07, "loss": 0.0259, "step": 31151 }, { "epoch": 3.694059053717538, "grad_norm": 0.7427542916672668, "learning_rate": 7.631336639911168e-07, "loss": 0.0332, "step": 31152 }, { "epoch": 3.6941776354796634, "grad_norm": 0.34571858247158144, "learning_rate": 7.625452274411305e-07, "loss": 0.0154, "step": 31153 }, { "epoch": 3.694296217241788, "grad_norm": 0.48902525333481706, "learning_rate": 7.619570143307609e-07, "loss": 0.0254, "step": 31154 }, { "epoch": 3.6944147990039133, "grad_norm": 0.7098040181777275, "learning_rate": 7.613690246654204e-07, "loss": 0.0355, "step": 31155 }, { "epoch": 3.694533380766038, "grad_norm": 0.3043884869177774, "learning_rate": 7.607812584505381e-07, "loss": 0.0148, "step": 31156 }, { "epoch": 3.6946519625281633, "grad_norm": 0.6957273822524048, "learning_rate": 7.601937156915262e-07, "loss": 0.0344, "step": 31157 }, { "epoch": 3.694770544290288, "grad_norm": 0.6184497375348893, "learning_rate": 7.596063963938027e-07, "loss": 0.0211, "step": 31158 }, { "epoch": 3.6948891260524133, "grad_norm": 0.395150536810827, "learning_rate": 7.590193005627828e-07, "loss": 0.0127, "step": 31159 }, { "epoch": 3.695007707814538, "grad_norm": 0.6597995607115166, "learning_rate": 7.58432428203884e-07, "loss": 0.0272, "step": 31160 }, { "epoch": 3.6951262895766632, "grad_norm": 0.46994193315916466, "learning_rate": 7.578457793225053e-07, "loss": 0.0249, "step": 31161 }, { "epoch": 3.695244871338788, "grad_norm": 0.5070288397905862, "learning_rate": 7.572593539240614e-07, "loss": 0.0226, "step": 31162 }, { "epoch": 3.695363453100913, "grad_norm": 0.3988634514898461, "learning_rate": 7.566731520139591e-07, "loss": 0.016, "step": 31163 }, { "epoch": 3.695482034863038, "grad_norm": 0.728814970380539, "learning_rate": 7.560871735976e-07, "loss": 0.034, "step": 31164 }, { "epoch": 3.695600616625163, "grad_norm": 0.32849325751158953, "learning_rate": 7.555014186803905e-07, "loss": 0.0183, "step": 31165 }, { "epoch": 3.695719198387288, "grad_norm": 0.503145838930809, "learning_rate": 7.549158872677209e-07, "loss": 0.0197, "step": 31166 }, { "epoch": 3.695837780149413, "grad_norm": 0.703501669400999, "learning_rate": 7.543305793650035e-07, "loss": 0.0389, "step": 31167 }, { "epoch": 3.695956361911538, "grad_norm": 0.3711945469393105, "learning_rate": 7.537454949776229e-07, "loss": 0.0127, "step": 31168 }, { "epoch": 3.696074943673663, "grad_norm": 0.503017588418607, "learning_rate": 7.531606341109748e-07, "loss": 0.0163, "step": 31169 }, { "epoch": 3.696193525435788, "grad_norm": 0.4741676184930481, "learning_rate": 7.525759967704521e-07, "loss": 0.0185, "step": 31170 }, { "epoch": 3.696312107197913, "grad_norm": 1.2676101393355226, "learning_rate": 7.519915829614449e-07, "loss": 0.0439, "step": 31171 }, { "epoch": 3.696430688960038, "grad_norm": 0.3582942565297387, "learning_rate": 7.514073926893433e-07, "loss": 0.0243, "step": 31172 }, { "epoch": 3.696549270722163, "grad_norm": 0.38976621475079215, "learning_rate": 7.508234259595237e-07, "loss": 0.0177, "step": 31173 }, { "epoch": 3.6966678524842878, "grad_norm": 0.3690181729188304, "learning_rate": 7.502396827773816e-07, "loss": 0.0218, "step": 31174 }, { "epoch": 3.696786434246413, "grad_norm": 0.4116851682428823, "learning_rate": 7.496561631482907e-07, "loss": 0.0128, "step": 31175 }, { "epoch": 3.6969050160085377, "grad_norm": 0.48562961549685385, "learning_rate": 7.490728670776353e-07, "loss": 0.0162, "step": 31176 }, { "epoch": 3.697023597770663, "grad_norm": 0.5653264559306838, "learning_rate": 7.484897945707836e-07, "loss": 0.0188, "step": 31177 }, { "epoch": 3.697142179532788, "grad_norm": 0.46480567808208656, "learning_rate": 7.479069456331228e-07, "loss": 0.0205, "step": 31178 }, { "epoch": 3.697260761294913, "grad_norm": 0.9197708888313745, "learning_rate": 7.473243202700153e-07, "loss": 0.0373, "step": 31179 }, { "epoch": 3.6973793430570376, "grad_norm": 0.33592309411462806, "learning_rate": 7.467419184868374e-07, "loss": 0.0141, "step": 31180 }, { "epoch": 3.697497924819163, "grad_norm": 0.4028257250459585, "learning_rate": 7.461597402889597e-07, "loss": 0.0138, "step": 31181 }, { "epoch": 3.697616506581288, "grad_norm": 0.6147043221507089, "learning_rate": 7.455777856817475e-07, "loss": 0.0336, "step": 31182 }, { "epoch": 3.697735088343413, "grad_norm": 0.3407010827875755, "learning_rate": 7.449960546705659e-07, "loss": 0.0156, "step": 31183 }, { "epoch": 3.6978536701055376, "grad_norm": 0.7376035483430387, "learning_rate": 7.444145472607744e-07, "loss": 0.0259, "step": 31184 }, { "epoch": 3.6979722518676628, "grad_norm": 0.2961110090160359, "learning_rate": 7.438332634577383e-07, "loss": 0.0124, "step": 31185 }, { "epoch": 3.698090833629788, "grad_norm": 0.7879156314437368, "learning_rate": 7.432522032668143e-07, "loss": 0.0342, "step": 31186 }, { "epoch": 3.6982094153919127, "grad_norm": 0.43645973876892646, "learning_rate": 7.426713666933621e-07, "loss": 0.0177, "step": 31187 }, { "epoch": 3.6983279971540375, "grad_norm": 0.43107436244333325, "learning_rate": 7.420907537427274e-07, "loss": 0.0237, "step": 31188 }, { "epoch": 3.6984465789161627, "grad_norm": 0.7045825691408255, "learning_rate": 7.415103644202726e-07, "loss": 0.0379, "step": 31189 }, { "epoch": 3.698565160678288, "grad_norm": 0.5311521966162452, "learning_rate": 7.409301987313461e-07, "loss": 0.0317, "step": 31190 }, { "epoch": 3.6986837424404126, "grad_norm": 0.4037779424503725, "learning_rate": 7.403502566812908e-07, "loss": 0.0195, "step": 31191 }, { "epoch": 3.6988023242025374, "grad_norm": 0.5264229706927801, "learning_rate": 7.397705382754582e-07, "loss": 0.0247, "step": 31192 }, { "epoch": 3.6989209059646626, "grad_norm": 0.5567023941438662, "learning_rate": 7.39191043519194e-07, "loss": 0.0305, "step": 31193 }, { "epoch": 3.699039487726788, "grad_norm": 0.6215443920800685, "learning_rate": 7.386117724178382e-07, "loss": 0.0273, "step": 31194 }, { "epoch": 3.6991580694889126, "grad_norm": 0.43152838153169615, "learning_rate": 7.380327249767283e-07, "loss": 0.0172, "step": 31195 }, { "epoch": 3.6992766512510373, "grad_norm": 0.33270852279157676, "learning_rate": 7.374539012012044e-07, "loss": 0.015, "step": 31196 }, { "epoch": 3.6993952330131625, "grad_norm": 0.740738177057711, "learning_rate": 7.368753010966012e-07, "loss": 0.0385, "step": 31197 }, { "epoch": 3.6995138147752877, "grad_norm": 0.8402901552615325, "learning_rate": 7.362969246682588e-07, "loss": 0.0367, "step": 31198 }, { "epoch": 3.6996323965374125, "grad_norm": 0.5594733067990859, "learning_rate": 7.357187719215037e-07, "loss": 0.0276, "step": 31199 }, { "epoch": 3.6997509782995377, "grad_norm": 0.7697460173615152, "learning_rate": 7.351408428616646e-07, "loss": 0.0348, "step": 31200 }, { "epoch": 3.6998695600616625, "grad_norm": 0.4940594688152479, "learning_rate": 7.345631374940709e-07, "loss": 0.0222, "step": 31201 }, { "epoch": 3.6999881418237877, "grad_norm": 0.44256373512659297, "learning_rate": 7.339856558240487e-07, "loss": 0.0234, "step": 31202 }, { "epoch": 3.7001067235859124, "grad_norm": 0.887498171932962, "learning_rate": 7.334083978569245e-07, "loss": 0.0325, "step": 31203 }, { "epoch": 3.7002253053480376, "grad_norm": 0.7650074508771773, "learning_rate": 7.328313635980133e-07, "loss": 0.0432, "step": 31204 }, { "epoch": 3.7003438871101624, "grad_norm": 0.5504839025504246, "learning_rate": 7.322545530526442e-07, "loss": 0.0159, "step": 31205 }, { "epoch": 3.7004624688722876, "grad_norm": 0.5095809555657996, "learning_rate": 7.316779662261269e-07, "loss": 0.0239, "step": 31206 }, { "epoch": 3.7005810506344123, "grad_norm": 0.7580630016946854, "learning_rate": 7.311016031237766e-07, "loss": 0.0314, "step": 31207 }, { "epoch": 3.7006996323965375, "grad_norm": 0.5469661457303516, "learning_rate": 7.305254637509112e-07, "loss": 0.0211, "step": 31208 }, { "epoch": 3.7008182141586623, "grad_norm": 0.6085320707329673, "learning_rate": 7.299495481128404e-07, "loss": 0.0392, "step": 31209 }, { "epoch": 3.7009367959207875, "grad_norm": 0.49648564870318745, "learning_rate": 7.293738562148711e-07, "loss": 0.0219, "step": 31210 }, { "epoch": 3.7010553776829123, "grad_norm": 0.8588135088141834, "learning_rate": 7.287983880623128e-07, "loss": 0.0442, "step": 31211 }, { "epoch": 3.7011739594450375, "grad_norm": 0.8958310212452234, "learning_rate": 7.282231436604698e-07, "loss": 0.0446, "step": 31212 }, { "epoch": 3.7012925412071622, "grad_norm": 0.5588096501255252, "learning_rate": 7.276481230146431e-07, "loss": 0.0242, "step": 31213 }, { "epoch": 3.7014111229692874, "grad_norm": 0.7469776847238184, "learning_rate": 7.270733261301427e-07, "loss": 0.0399, "step": 31214 }, { "epoch": 3.701529704731412, "grad_norm": 0.5519707565312701, "learning_rate": 7.26498753012253e-07, "loss": 0.0241, "step": 31215 }, { "epoch": 3.7016482864935374, "grad_norm": 0.4282213576547305, "learning_rate": 7.259244036662838e-07, "loss": 0.0197, "step": 31216 }, { "epoch": 3.701766868255662, "grad_norm": 0.5543226222310279, "learning_rate": 7.253502780975224e-07, "loss": 0.0244, "step": 31217 }, { "epoch": 3.7018854500177873, "grad_norm": 0.5091585638298133, "learning_rate": 7.247763763112647e-07, "loss": 0.0201, "step": 31218 }, { "epoch": 3.702004031779912, "grad_norm": 0.5482083678577713, "learning_rate": 7.242026983127981e-07, "loss": 0.0243, "step": 31219 }, { "epoch": 3.7021226135420373, "grad_norm": 0.5220228422371889, "learning_rate": 7.236292441074183e-07, "loss": 0.0268, "step": 31220 }, { "epoch": 3.702241195304162, "grad_norm": 0.8329467929565736, "learning_rate": 7.230560137004045e-07, "loss": 0.0332, "step": 31221 }, { "epoch": 3.7023597770662873, "grad_norm": 0.5444736231206689, "learning_rate": 7.22483007097044e-07, "loss": 0.0238, "step": 31222 }, { "epoch": 3.702478358828412, "grad_norm": 0.6232729369481905, "learning_rate": 7.219102243026215e-07, "loss": 0.0315, "step": 31223 }, { "epoch": 3.7025969405905372, "grad_norm": 0.3964258949376255, "learning_rate": 7.213376653224107e-07, "loss": 0.0198, "step": 31224 }, { "epoch": 3.702715522352662, "grad_norm": 0.501643088204781, "learning_rate": 7.20765330161699e-07, "loss": 0.0232, "step": 31225 }, { "epoch": 3.702834104114787, "grad_norm": 0.36638431626267415, "learning_rate": 7.201932188257515e-07, "loss": 0.0131, "step": 31226 }, { "epoch": 3.7029526858769124, "grad_norm": 0.7615783117000072, "learning_rate": 7.196213313198558e-07, "loss": 0.0367, "step": 31227 }, { "epoch": 3.703071267639037, "grad_norm": 0.4320215598444288, "learning_rate": 7.190496676492714e-07, "loss": 0.015, "step": 31228 }, { "epoch": 3.703189849401162, "grad_norm": 0.6318255257347049, "learning_rate": 7.184782278192776e-07, "loss": 0.0168, "step": 31229 }, { "epoch": 3.703308431163287, "grad_norm": 0.643502211004576, "learning_rate": 7.179070118351366e-07, "loss": 0.0329, "step": 31230 }, { "epoch": 3.7034270129254123, "grad_norm": 0.5529063681491397, "learning_rate": 7.17336019702114e-07, "loss": 0.0195, "step": 31231 }, { "epoch": 3.703545594687537, "grad_norm": 0.3580956203130037, "learning_rate": 7.167652514254802e-07, "loss": 0.0177, "step": 31232 }, { "epoch": 3.703664176449662, "grad_norm": 0.582183995391498, "learning_rate": 7.161947070104896e-07, "loss": 0.031, "step": 31233 }, { "epoch": 3.703782758211787, "grad_norm": 0.5133792900709148, "learning_rate": 7.156243864624074e-07, "loss": 0.0224, "step": 31234 }, { "epoch": 3.7039013399739122, "grad_norm": 0.4132230210654316, "learning_rate": 7.150542897864876e-07, "loss": 0.0199, "step": 31235 }, { "epoch": 3.704019921736037, "grad_norm": 0.40074047425248877, "learning_rate": 7.144844169879899e-07, "loss": 0.0182, "step": 31236 }, { "epoch": 3.7041385034981618, "grad_norm": 0.5202357664581342, "learning_rate": 7.139147680721603e-07, "loss": 0.0248, "step": 31237 }, { "epoch": 3.704257085260287, "grad_norm": 0.6184952926869999, "learning_rate": 7.133453430442583e-07, "loss": 0.0272, "step": 31238 }, { "epoch": 3.704375667022412, "grad_norm": 0.5681512960880519, "learning_rate": 7.127761419095297e-07, "loss": 0.026, "step": 31239 }, { "epoch": 3.704494248784537, "grad_norm": 0.7212329253831629, "learning_rate": 7.122071646732204e-07, "loss": 0.0266, "step": 31240 }, { "epoch": 3.7046128305466617, "grad_norm": 0.5741154510392191, "learning_rate": 7.116384113405761e-07, "loss": 0.0313, "step": 31241 }, { "epoch": 3.704731412308787, "grad_norm": 0.4738818057517235, "learning_rate": 7.110698819168426e-07, "loss": 0.0204, "step": 31242 }, { "epoch": 3.704849994070912, "grad_norm": 0.5121374985143637, "learning_rate": 7.105015764072631e-07, "loss": 0.0201, "step": 31243 }, { "epoch": 3.704968575833037, "grad_norm": 0.6484307047476258, "learning_rate": 7.099334948170694e-07, "loss": 0.035, "step": 31244 }, { "epoch": 3.7050871575951616, "grad_norm": 0.3169596529827098, "learning_rate": 7.093656371515046e-07, "loss": 0.0163, "step": 31245 }, { "epoch": 3.705205739357287, "grad_norm": 0.30761159290572004, "learning_rate": 7.087980034158004e-07, "loss": 0.0107, "step": 31246 }, { "epoch": 3.705324321119412, "grad_norm": 0.3342314771529582, "learning_rate": 7.082305936151917e-07, "loss": 0.016, "step": 31247 }, { "epoch": 3.7054429028815368, "grad_norm": 0.535063980988157, "learning_rate": 7.076634077549049e-07, "loss": 0.024, "step": 31248 }, { "epoch": 3.7055614846436615, "grad_norm": 0.600046981418442, "learning_rate": 7.070964458401774e-07, "loss": 0.0237, "step": 31249 }, { "epoch": 3.7056800664057867, "grad_norm": 0.39081868643306156, "learning_rate": 7.065297078762301e-07, "loss": 0.0152, "step": 31250 }, { "epoch": 3.705798648167912, "grad_norm": 0.4894404653385313, "learning_rate": 7.059631938682837e-07, "loss": 0.0233, "step": 31251 }, { "epoch": 3.7059172299300367, "grad_norm": 0.45404756979789324, "learning_rate": 7.053969038215674e-07, "loss": 0.0252, "step": 31252 }, { "epoch": 3.706035811692162, "grad_norm": 0.4516785901186444, "learning_rate": 7.048308377413021e-07, "loss": 0.0202, "step": 31253 }, { "epoch": 3.7061543934542867, "grad_norm": 0.4410587801264695, "learning_rate": 7.04264995632703e-07, "loss": 0.0188, "step": 31254 }, { "epoch": 3.706272975216412, "grad_norm": 0.4476595221018931, "learning_rate": 7.036993775009854e-07, "loss": 0.0209, "step": 31255 }, { "epoch": 3.7063915569785366, "grad_norm": 0.4313844545163968, "learning_rate": 7.031339833513673e-07, "loss": 0.0145, "step": 31256 }, { "epoch": 3.706510138740662, "grad_norm": 0.4363662691068536, "learning_rate": 7.025688131890584e-07, "loss": 0.0244, "step": 31257 }, { "epoch": 3.7066287205027866, "grad_norm": 0.43679825922819754, "learning_rate": 7.020038670192713e-07, "loss": 0.0172, "step": 31258 }, { "epoch": 3.706747302264912, "grad_norm": 0.36745928468702543, "learning_rate": 7.014391448472074e-07, "loss": 0.014, "step": 31259 }, { "epoch": 3.7068658840270365, "grad_norm": 0.5416563979572981, "learning_rate": 7.008746466780819e-07, "loss": 0.0268, "step": 31260 }, { "epoch": 3.7069844657891617, "grad_norm": 0.5794167594245443, "learning_rate": 7.003103725170934e-07, "loss": 0.0234, "step": 31261 }, { "epoch": 3.7071030475512865, "grad_norm": 0.5201191860598152, "learning_rate": 6.997463223694434e-07, "loss": 0.0185, "step": 31262 }, { "epoch": 3.7072216293134117, "grad_norm": 0.4557655218866438, "learning_rate": 6.991824962403359e-07, "loss": 0.0233, "step": 31263 }, { "epoch": 3.7073402110755365, "grad_norm": 0.5716154974415758, "learning_rate": 6.986188941349642e-07, "loss": 0.027, "step": 31264 }, { "epoch": 3.7074587928376617, "grad_norm": 0.32235829531181814, "learning_rate": 6.980555160585295e-07, "loss": 0.011, "step": 31265 }, { "epoch": 3.7075773745997864, "grad_norm": 1.1465701905484642, "learning_rate": 6.974923620162193e-07, "loss": 0.0403, "step": 31266 }, { "epoch": 3.7076959563619116, "grad_norm": 0.41452281957365333, "learning_rate": 6.969294320132269e-07, "loss": 0.0246, "step": 31267 }, { "epoch": 3.7078145381240364, "grad_norm": 0.3609207941034242, "learning_rate": 6.963667260547424e-07, "loss": 0.0118, "step": 31268 }, { "epoch": 3.7079331198861616, "grad_norm": 0.5312540250043283, "learning_rate": 6.958042441459589e-07, "loss": 0.023, "step": 31269 }, { "epoch": 3.7080517016482863, "grad_norm": 0.4452723573301421, "learning_rate": 6.952419862920528e-07, "loss": 0.0212, "step": 31270 }, { "epoch": 3.7081702834104115, "grad_norm": 0.4889935768862295, "learning_rate": 6.946799524982089e-07, "loss": 0.0232, "step": 31271 }, { "epoch": 3.7082888651725363, "grad_norm": 0.3445501450361918, "learning_rate": 6.941181427696119e-07, "loss": 0.0198, "step": 31272 }, { "epoch": 3.7084074469346615, "grad_norm": 0.3718847756787146, "learning_rate": 6.93556557111441e-07, "loss": 0.016, "step": 31273 }, { "epoch": 3.7085260286967863, "grad_norm": 0.43859794826077175, "learning_rate": 6.929951955288727e-07, "loss": 0.0199, "step": 31274 }, { "epoch": 3.7086446104589115, "grad_norm": 0.6416113628285168, "learning_rate": 6.924340580270778e-07, "loss": 0.0311, "step": 31275 }, { "epoch": 3.7087631922210367, "grad_norm": 0.5146111955327067, "learning_rate": 6.918731446112381e-07, "loss": 0.0286, "step": 31276 }, { "epoch": 3.7088817739831614, "grad_norm": 0.9295038361130905, "learning_rate": 6.913124552865163e-07, "loss": 0.0418, "step": 31277 }, { "epoch": 3.709000355745286, "grad_norm": 1.0843934520395029, "learning_rate": 6.907519900580861e-07, "loss": 0.0651, "step": 31278 }, { "epoch": 3.7091189375074114, "grad_norm": 0.6445416947609446, "learning_rate": 6.9019174893111e-07, "loss": 0.0347, "step": 31279 }, { "epoch": 3.7092375192695366, "grad_norm": 0.6557396179430635, "learning_rate": 6.896317319107559e-07, "loss": 0.0303, "step": 31280 }, { "epoch": 3.7093561010316614, "grad_norm": 0.7433535838935268, "learning_rate": 6.890719390021893e-07, "loss": 0.0343, "step": 31281 }, { "epoch": 3.709474682793786, "grad_norm": 0.42479985029850587, "learning_rate": 6.885123702105617e-07, "loss": 0.0245, "step": 31282 }, { "epoch": 3.7095932645559113, "grad_norm": 0.5998068039580419, "learning_rate": 6.879530255410438e-07, "loss": 0.0273, "step": 31283 }, { "epoch": 3.7097118463180365, "grad_norm": 0.4876132837258091, "learning_rate": 6.873939049987815e-07, "loss": 0.0185, "step": 31284 }, { "epoch": 3.7098304280801613, "grad_norm": 0.322403789407701, "learning_rate": 6.868350085889374e-07, "loss": 0.0169, "step": 31285 }, { "epoch": 3.709949009842286, "grad_norm": 0.6540900026723424, "learning_rate": 6.862763363166519e-07, "loss": 0.038, "step": 31286 }, { "epoch": 3.7100675916044112, "grad_norm": 0.4861243327495563, "learning_rate": 6.857178881870901e-07, "loss": 0.0242, "step": 31287 }, { "epoch": 3.7101861733665364, "grad_norm": 0.4743489534757921, "learning_rate": 6.851596642053926e-07, "loss": 0.024, "step": 31288 }, { "epoch": 3.710304755128661, "grad_norm": 0.617425257865431, "learning_rate": 6.846016643767022e-07, "loss": 0.0276, "step": 31289 }, { "epoch": 3.710423336890786, "grad_norm": 0.4295018772416999, "learning_rate": 6.840438887061706e-07, "loss": 0.0158, "step": 31290 }, { "epoch": 3.710541918652911, "grad_norm": 0.3490104239556726, "learning_rate": 6.834863371989326e-07, "loss": 0.0166, "step": 31291 }, { "epoch": 3.7106605004150364, "grad_norm": 0.4623706596654786, "learning_rate": 6.829290098601365e-07, "loss": 0.0205, "step": 31292 }, { "epoch": 3.710779082177161, "grad_norm": 0.39795077631796866, "learning_rate": 6.823719066949092e-07, "loss": 0.0209, "step": 31293 }, { "epoch": 3.710897663939286, "grad_norm": 0.4077578838591052, "learning_rate": 6.81815027708399e-07, "loss": 0.0208, "step": 31294 }, { "epoch": 3.711016245701411, "grad_norm": 0.5206976875004036, "learning_rate": 6.812583729057298e-07, "loss": 0.0297, "step": 31295 }, { "epoch": 3.7111348274635363, "grad_norm": 0.494399881860018, "learning_rate": 6.807019422920391e-07, "loss": 0.0246, "step": 31296 }, { "epoch": 3.711253409225661, "grad_norm": 0.49104046508334653, "learning_rate": 6.801457358724506e-07, "loss": 0.0222, "step": 31297 }, { "epoch": 3.711371990987786, "grad_norm": 0.4558840444684764, "learning_rate": 6.79589753652099e-07, "loss": 0.019, "step": 31298 }, { "epoch": 3.711490572749911, "grad_norm": 0.6512033849394941, "learning_rate": 6.790339956361053e-07, "loss": 0.0351, "step": 31299 }, { "epoch": 3.711609154512036, "grad_norm": 0.5294645907499599, "learning_rate": 6.784784618295931e-07, "loss": 0.0196, "step": 31300 }, { "epoch": 3.711727736274161, "grad_norm": 0.5277695702468178, "learning_rate": 6.779231522376833e-07, "loss": 0.0295, "step": 31301 }, { "epoch": 3.711846318036286, "grad_norm": 0.40024928928296, "learning_rate": 6.77368066865497e-07, "loss": 0.0157, "step": 31302 }, { "epoch": 3.711964899798411, "grad_norm": 0.5631502341789902, "learning_rate": 6.76813205718152e-07, "loss": 0.0294, "step": 31303 }, { "epoch": 3.712083481560536, "grad_norm": 0.6504477904419412, "learning_rate": 6.762585688007611e-07, "loss": 0.0364, "step": 31304 }, { "epoch": 3.712202063322661, "grad_norm": 0.6040609692887471, "learning_rate": 6.757041561184396e-07, "loss": 0.024, "step": 31305 }, { "epoch": 3.712320645084786, "grad_norm": 0.5655548290009161, "learning_rate": 6.751499676762974e-07, "loss": 0.0201, "step": 31306 }, { "epoch": 3.712439226846911, "grad_norm": 0.2922779205859103, "learning_rate": 6.74596003479444e-07, "loss": 0.0116, "step": 31307 }, { "epoch": 3.712557808609036, "grad_norm": 0.6628778483309246, "learning_rate": 6.740422635329813e-07, "loss": 0.0271, "step": 31308 }, { "epoch": 3.712676390371161, "grad_norm": 0.7177799862334735, "learning_rate": 6.734887478420243e-07, "loss": 0.0376, "step": 31309 }, { "epoch": 3.712794972133286, "grad_norm": 0.4094201062609272, "learning_rate": 6.729354564116663e-07, "loss": 0.0179, "step": 31310 }, { "epoch": 3.712913553895411, "grad_norm": 0.6022478578932542, "learning_rate": 6.723823892470116e-07, "loss": 0.0325, "step": 31311 }, { "epoch": 3.713032135657536, "grad_norm": 0.6981141783957656, "learning_rate": 6.718295463531587e-07, "loss": 0.0342, "step": 31312 }, { "epoch": 3.7131507174196607, "grad_norm": 0.550364380168794, "learning_rate": 6.712769277352038e-07, "loss": 0.0262, "step": 31313 }, { "epoch": 3.713269299181786, "grad_norm": 0.49941772727470024, "learning_rate": 6.707245333982454e-07, "loss": 0.0248, "step": 31314 }, { "epoch": 3.7133878809439107, "grad_norm": 0.46839219842527424, "learning_rate": 6.701723633473711e-07, "loss": 0.0173, "step": 31315 }, { "epoch": 3.713506462706036, "grad_norm": 0.498312809990686, "learning_rate": 6.696204175876686e-07, "loss": 0.0251, "step": 31316 }, { "epoch": 3.7136250444681607, "grad_norm": 0.42102410297743065, "learning_rate": 6.690686961242337e-07, "loss": 0.0191, "step": 31317 }, { "epoch": 3.713743626230286, "grad_norm": 0.5828399473704055, "learning_rate": 6.685171989621486e-07, "loss": 0.0251, "step": 31318 }, { "epoch": 3.7138622079924106, "grad_norm": 0.526886823584554, "learning_rate": 6.679659261064952e-07, "loss": 0.0259, "step": 31319 }, { "epoch": 3.713980789754536, "grad_norm": 0.4301795265383838, "learning_rate": 6.674148775623612e-07, "loss": 0.0228, "step": 31320 }, { "epoch": 3.7140993715166606, "grad_norm": 0.3765719653285971, "learning_rate": 6.668640533348202e-07, "loss": 0.0197, "step": 31321 }, { "epoch": 3.714217953278786, "grad_norm": 0.5357365350365233, "learning_rate": 6.663134534289545e-07, "loss": 0.0256, "step": 31322 }, { "epoch": 3.7143365350409105, "grad_norm": 0.4488290686515934, "learning_rate": 6.657630778498403e-07, "loss": 0.0191, "step": 31323 }, { "epoch": 3.7144551168030358, "grad_norm": 0.5834645786476494, "learning_rate": 6.652129266025486e-07, "loss": 0.0297, "step": 31324 }, { "epoch": 3.714573698565161, "grad_norm": 0.4133307284988312, "learning_rate": 6.646629996921533e-07, "loss": 0.0198, "step": 31325 }, { "epoch": 3.7146922803272857, "grad_norm": 0.42784144194181023, "learning_rate": 6.641132971237224e-07, "loss": 0.0178, "step": 31326 }, { "epoch": 3.7148108620894105, "grad_norm": 0.6771020556153522, "learning_rate": 6.635638189023241e-07, "loss": 0.0348, "step": 31327 }, { "epoch": 3.7149294438515357, "grad_norm": 0.5197000221710092, "learning_rate": 6.630145650330239e-07, "loss": 0.0274, "step": 31328 }, { "epoch": 3.715048025613661, "grad_norm": 0.4881719939585504, "learning_rate": 6.62465535520887e-07, "loss": 0.0189, "step": 31329 }, { "epoch": 3.7151666073757856, "grad_norm": 0.8144398426255635, "learning_rate": 6.619167303709706e-07, "loss": 0.0311, "step": 31330 }, { "epoch": 3.7152851891379104, "grad_norm": 0.5440610200640724, "learning_rate": 6.6136814958834e-07, "loss": 0.0261, "step": 31331 }, { "epoch": 3.7154037709000356, "grad_norm": 0.5776969844736991, "learning_rate": 6.608197931780496e-07, "loss": 0.0225, "step": 31332 }, { "epoch": 3.715522352662161, "grad_norm": 0.5234362431736586, "learning_rate": 6.602716611451509e-07, "loss": 0.0217, "step": 31333 }, { "epoch": 3.7156409344242856, "grad_norm": 0.6023180199824486, "learning_rate": 6.597237534947009e-07, "loss": 0.0246, "step": 31334 }, { "epoch": 3.7157595161864103, "grad_norm": 0.5298404128275658, "learning_rate": 6.591760702317484e-07, "loss": 0.0237, "step": 31335 }, { "epoch": 3.7158780979485355, "grad_norm": 0.5430989781671565, "learning_rate": 6.586286113613477e-07, "loss": 0.0203, "step": 31336 }, { "epoch": 3.7159966797106607, "grad_norm": 0.6211084649653716, "learning_rate": 6.580813768885391e-07, "loss": 0.0318, "step": 31337 }, { "epoch": 3.7161152614727855, "grad_norm": 0.6301015669418718, "learning_rate": 6.575343668183687e-07, "loss": 0.0195, "step": 31338 }, { "epoch": 3.7162338432349102, "grad_norm": 0.3028899693869545, "learning_rate": 6.569875811558823e-07, "loss": 0.0124, "step": 31339 }, { "epoch": 3.7163524249970354, "grad_norm": 0.41236998816816633, "learning_rate": 6.564410199061205e-07, "loss": 0.0165, "step": 31340 }, { "epoch": 3.7164710067591606, "grad_norm": 0.5891114815407545, "learning_rate": 6.558946830741208e-07, "loss": 0.0223, "step": 31341 }, { "epoch": 3.7165895885212854, "grad_norm": 0.5369237889239968, "learning_rate": 6.553485706649154e-07, "loss": 0.0294, "step": 31342 }, { "epoch": 3.71670817028341, "grad_norm": 0.581004699226553, "learning_rate": 6.548026826835474e-07, "loss": 0.0291, "step": 31343 }, { "epoch": 3.7168267520455354, "grad_norm": 0.48114192951163853, "learning_rate": 6.542570191350434e-07, "loss": 0.018, "step": 31344 }, { "epoch": 3.7169453338076606, "grad_norm": 0.45449022335269273, "learning_rate": 6.537115800244325e-07, "loss": 0.0228, "step": 31345 }, { "epoch": 3.7170639155697853, "grad_norm": 0.2450118833978659, "learning_rate": 6.531663653567471e-07, "loss": 0.0093, "step": 31346 }, { "epoch": 3.71718249733191, "grad_norm": 0.6010754207005617, "learning_rate": 6.526213751370135e-07, "loss": 0.0276, "step": 31347 }, { "epoch": 3.7173010790940353, "grad_norm": 0.46500710041705606, "learning_rate": 6.52076609370253e-07, "loss": 0.0224, "step": 31348 }, { "epoch": 3.7174196608561605, "grad_norm": 0.6298496828003539, "learning_rate": 6.515320680614861e-07, "loss": 0.0284, "step": 31349 }, { "epoch": 3.7175382426182852, "grad_norm": 0.6055532029719441, "learning_rate": 6.50987751215737e-07, "loss": 0.0304, "step": 31350 }, { "epoch": 3.7176568243804105, "grad_norm": 0.6893239920430956, "learning_rate": 6.504436588380236e-07, "loss": 0.0377, "step": 31351 }, { "epoch": 3.717775406142535, "grad_norm": 0.5030996753946907, "learning_rate": 6.498997909333615e-07, "loss": 0.0202, "step": 31352 }, { "epoch": 3.7178939879046604, "grad_norm": 0.8951321014205403, "learning_rate": 6.493561475067577e-07, "loss": 0.0506, "step": 31353 }, { "epoch": 3.718012569666785, "grad_norm": 0.6691634877965267, "learning_rate": 6.488127285632333e-07, "loss": 0.0193, "step": 31354 }, { "epoch": 3.7181311514289104, "grad_norm": 0.6087716548547942, "learning_rate": 6.482695341077927e-07, "loss": 0.0225, "step": 31355 }, { "epoch": 3.718249733191035, "grad_norm": 0.5721073271800746, "learning_rate": 6.477265641454483e-07, "loss": 0.0352, "step": 31356 }, { "epoch": 3.7183683149531603, "grad_norm": 0.6115339671884462, "learning_rate": 6.471838186811963e-07, "loss": 0.0259, "step": 31357 }, { "epoch": 3.718486896715285, "grad_norm": 0.37851158815023356, "learning_rate": 6.46641297720052e-07, "loss": 0.0179, "step": 31358 }, { "epoch": 3.7186054784774103, "grad_norm": 0.6400551659168957, "learning_rate": 6.460990012670087e-07, "loss": 0.0291, "step": 31359 }, { "epoch": 3.718724060239535, "grad_norm": 0.850358580604002, "learning_rate": 6.455569293270681e-07, "loss": 0.0405, "step": 31360 }, { "epoch": 3.7188426420016603, "grad_norm": 0.4344119081603087, "learning_rate": 6.450150819052258e-07, "loss": 0.0181, "step": 31361 }, { "epoch": 3.718961223763785, "grad_norm": 0.5726534482721657, "learning_rate": 6.444734590064782e-07, "loss": 0.0242, "step": 31362 }, { "epoch": 3.71907980552591, "grad_norm": 0.37878239367882655, "learning_rate": 6.439320606358212e-07, "loss": 0.0121, "step": 31363 }, { "epoch": 3.719198387288035, "grad_norm": 0.5516078270057642, "learning_rate": 6.433908867982396e-07, "loss": 0.0234, "step": 31364 }, { "epoch": 3.71931696905016, "grad_norm": 0.42646111119723484, "learning_rate": 6.428499374987296e-07, "loss": 0.0178, "step": 31365 }, { "epoch": 3.719435550812285, "grad_norm": 0.46586631059521266, "learning_rate": 6.423092127422731e-07, "loss": 0.0274, "step": 31366 }, { "epoch": 3.71955413257441, "grad_norm": 0.6011538222429266, "learning_rate": 6.417687125338578e-07, "loss": 0.0228, "step": 31367 }, { "epoch": 3.719672714336535, "grad_norm": 0.37067498546757177, "learning_rate": 6.412284368784604e-07, "loss": 0.0164, "step": 31368 }, { "epoch": 3.71979129609866, "grad_norm": 0.3671073355746183, "learning_rate": 6.406883857810714e-07, "loss": 0.0151, "step": 31369 }, { "epoch": 3.719909877860785, "grad_norm": 0.5133899147894238, "learning_rate": 6.401485592466616e-07, "loss": 0.017, "step": 31370 }, { "epoch": 3.72002845962291, "grad_norm": 0.5347291322213187, "learning_rate": 6.396089572802105e-07, "loss": 0.0267, "step": 31371 }, { "epoch": 3.720147041385035, "grad_norm": 0.3814375392548133, "learning_rate": 6.390695798866919e-07, "loss": 0.0171, "step": 31372 }, { "epoch": 3.72026562314716, "grad_norm": 0.4781739302964642, "learning_rate": 6.385304270710768e-07, "loss": 0.0191, "step": 31373 }, { "epoch": 3.720384204909285, "grad_norm": 0.5105097275048281, "learning_rate": 6.379914988383418e-07, "loss": 0.0233, "step": 31374 }, { "epoch": 3.72050278667141, "grad_norm": 0.4899655802326429, "learning_rate": 6.374527951934439e-07, "loss": 0.0236, "step": 31375 }, { "epoch": 3.7206213684335347, "grad_norm": 0.2991300218966744, "learning_rate": 6.369143161413626e-07, "loss": 0.0116, "step": 31376 }, { "epoch": 3.72073995019566, "grad_norm": 0.4793432644045596, "learning_rate": 6.363760616870495e-07, "loss": 0.0203, "step": 31377 }, { "epoch": 3.720858531957785, "grad_norm": 0.5401187130967229, "learning_rate": 6.358380318354756e-07, "loss": 0.0217, "step": 31378 }, { "epoch": 3.72097711371991, "grad_norm": 0.29279173781475853, "learning_rate": 6.35300226591598e-07, "loss": 0.012, "step": 31379 }, { "epoch": 3.7210956954820347, "grad_norm": 0.7251777113500958, "learning_rate": 6.347626459603712e-07, "loss": 0.035, "step": 31380 }, { "epoch": 3.72121427724416, "grad_norm": 0.854766551909778, "learning_rate": 6.342252899467604e-07, "loss": 0.0296, "step": 31381 }, { "epoch": 3.721332859006285, "grad_norm": 0.5205416717842121, "learning_rate": 6.336881585557092e-07, "loss": 0.0324, "step": 31382 }, { "epoch": 3.72145144076841, "grad_norm": 0.5572041942615217, "learning_rate": 6.331512517921717e-07, "loss": 0.0273, "step": 31383 }, { "epoch": 3.7215700225305346, "grad_norm": 0.5731953354132416, "learning_rate": 6.326145696610997e-07, "loss": 0.0232, "step": 31384 }, { "epoch": 3.72168860429266, "grad_norm": 1.0610501899611209, "learning_rate": 6.320781121674446e-07, "loss": 0.0529, "step": 31385 }, { "epoch": 3.721807186054785, "grad_norm": 0.5524156712054321, "learning_rate": 6.315418793161415e-07, "loss": 0.0273, "step": 31386 }, { "epoch": 3.7219257678169098, "grad_norm": 0.949151499823319, "learning_rate": 6.31005871112142e-07, "loss": 0.0413, "step": 31387 }, { "epoch": 3.7220443495790345, "grad_norm": 0.41184729100584533, "learning_rate": 6.304700875603864e-07, "loss": 0.0181, "step": 31388 }, { "epoch": 3.7221629313411597, "grad_norm": 0.5862299272363212, "learning_rate": 6.299345286658098e-07, "loss": 0.0295, "step": 31389 }, { "epoch": 3.722281513103285, "grad_norm": 0.3310999030167397, "learning_rate": 6.293991944333527e-07, "loss": 0.0166, "step": 31390 }, { "epoch": 3.7224000948654097, "grad_norm": 0.4508415574492145, "learning_rate": 6.288640848679528e-07, "loss": 0.0206, "step": 31391 }, { "epoch": 3.7225186766275344, "grad_norm": 0.44461685518151634, "learning_rate": 6.283291999745394e-07, "loss": 0.0212, "step": 31392 }, { "epoch": 3.7226372583896596, "grad_norm": 0.4567594192242598, "learning_rate": 6.27794539758042e-07, "loss": 0.0223, "step": 31393 }, { "epoch": 3.722755840151785, "grad_norm": 0.6837596429753174, "learning_rate": 6.272601042233928e-07, "loss": 0.0325, "step": 31394 }, { "epoch": 3.7228744219139096, "grad_norm": 0.5333662400899111, "learning_rate": 6.267258933755183e-07, "loss": 0.027, "step": 31395 }, { "epoch": 3.7229930036760344, "grad_norm": 0.8843911135296453, "learning_rate": 6.26191907219345e-07, "loss": 0.0459, "step": 31396 }, { "epoch": 3.7231115854381596, "grad_norm": 0.7630283564912982, "learning_rate": 6.256581457597888e-07, "loss": 0.0279, "step": 31397 }, { "epoch": 3.7232301672002848, "grad_norm": 0.635391567563164, "learning_rate": 6.25124609001776e-07, "loss": 0.0422, "step": 31398 }, { "epoch": 3.7233487489624095, "grad_norm": 0.6852837379913299, "learning_rate": 6.245912969502249e-07, "loss": 0.0255, "step": 31399 }, { "epoch": 3.7234673307245347, "grad_norm": 0.4301615565412388, "learning_rate": 6.240582096100484e-07, "loss": 0.0245, "step": 31400 }, { "epoch": 3.7235859124866595, "grad_norm": 0.6694338551763499, "learning_rate": 6.235253469861647e-07, "loss": 0.0301, "step": 31401 }, { "epoch": 3.7237044942487847, "grad_norm": 0.309036317281725, "learning_rate": 6.229927090834863e-07, "loss": 0.0129, "step": 31402 }, { "epoch": 3.7238230760109094, "grad_norm": 0.39048245110929425, "learning_rate": 6.224602959069237e-07, "loss": 0.0164, "step": 31403 }, { "epoch": 3.7239416577730347, "grad_norm": 0.7859851889514737, "learning_rate": 6.219281074613809e-07, "loss": 0.0424, "step": 31404 }, { "epoch": 3.7240602395351594, "grad_norm": 0.6385332949325545, "learning_rate": 6.213961437517651e-07, "loss": 0.0355, "step": 31405 }, { "epoch": 3.7241788212972846, "grad_norm": 0.6041970605596065, "learning_rate": 6.208644047829837e-07, "loss": 0.0218, "step": 31406 }, { "epoch": 3.7242974030594094, "grad_norm": 0.44863041896716305, "learning_rate": 6.203328905599381e-07, "loss": 0.0169, "step": 31407 }, { "epoch": 3.7244159848215346, "grad_norm": 0.23242413725890373, "learning_rate": 6.198016010875246e-07, "loss": 0.0081, "step": 31408 }, { "epoch": 3.7245345665836593, "grad_norm": 0.719922776147399, "learning_rate": 6.192705363706447e-07, "loss": 0.0274, "step": 31409 }, { "epoch": 3.7246531483457845, "grad_norm": 0.4461987672380783, "learning_rate": 6.187396964141917e-07, "loss": 0.0248, "step": 31410 }, { "epoch": 3.7247717301079093, "grad_norm": 0.7016852635517967, "learning_rate": 6.18209081223059e-07, "loss": 0.0319, "step": 31411 }, { "epoch": 3.7248903118700345, "grad_norm": 0.5172169628086125, "learning_rate": 6.176786908021453e-07, "loss": 0.0268, "step": 31412 }, { "epoch": 3.7250088936321593, "grad_norm": 0.7138909025593738, "learning_rate": 6.171485251563275e-07, "loss": 0.0232, "step": 31413 }, { "epoch": 3.7251274753942845, "grad_norm": 0.6817262997088542, "learning_rate": 6.166185842905043e-07, "loss": 0.0283, "step": 31414 }, { "epoch": 3.725246057156409, "grad_norm": 0.4571313605845929, "learning_rate": 6.16088868209555e-07, "loss": 0.0217, "step": 31415 }, { "epoch": 3.7253646389185344, "grad_norm": 0.4559350405182197, "learning_rate": 6.155593769183676e-07, "loss": 0.0154, "step": 31416 }, { "epoch": 3.725483220680659, "grad_norm": 0.49418800488005843, "learning_rate": 6.150301104218187e-07, "loss": 0.0212, "step": 31417 }, { "epoch": 3.7256018024427844, "grad_norm": 0.48815364113940213, "learning_rate": 6.145010687247904e-07, "loss": 0.025, "step": 31418 }, { "epoch": 3.725720384204909, "grad_norm": 0.28933899863447654, "learning_rate": 6.139722518321567e-07, "loss": 0.0175, "step": 31419 }, { "epoch": 3.7258389659670343, "grad_norm": 0.5044762431122067, "learning_rate": 6.134436597487969e-07, "loss": 0.0296, "step": 31420 }, { "epoch": 3.725957547729159, "grad_norm": 0.4696265709235607, "learning_rate": 6.129152924795794e-07, "loss": 0.016, "step": 31421 }, { "epoch": 3.7260761294912843, "grad_norm": 0.39048026185577067, "learning_rate": 6.123871500293782e-07, "loss": 0.0154, "step": 31422 }, { "epoch": 3.726194711253409, "grad_norm": 0.7919728661899215, "learning_rate": 6.118592324030642e-07, "loss": 0.041, "step": 31423 }, { "epoch": 3.7263132930155343, "grad_norm": 0.7211488229087859, "learning_rate": 6.113315396054948e-07, "loss": 0.0346, "step": 31424 }, { "epoch": 3.726431874777659, "grad_norm": 0.7259068586121512, "learning_rate": 6.108040716415492e-07, "loss": 0.0307, "step": 31425 }, { "epoch": 3.7265504565397842, "grad_norm": 0.5236783249316528, "learning_rate": 6.102768285160764e-07, "loss": 0.0247, "step": 31426 }, { "epoch": 3.7266690383019094, "grad_norm": 0.5816078880888574, "learning_rate": 6.097498102339449e-07, "loss": 0.0348, "step": 31427 }, { "epoch": 3.726787620064034, "grad_norm": 0.8581068302581635, "learning_rate": 6.09223016800009e-07, "loss": 0.046, "step": 31428 }, { "epoch": 3.726906201826159, "grad_norm": 0.42337734359445073, "learning_rate": 6.086964482191287e-07, "loss": 0.0202, "step": 31429 }, { "epoch": 3.727024783588284, "grad_norm": 0.4847422077567859, "learning_rate": 6.081701044961585e-07, "loss": 0.0204, "step": 31430 }, { "epoch": 3.7271433653504094, "grad_norm": 0.7406760079031143, "learning_rate": 6.076439856359445e-07, "loss": 0.0351, "step": 31431 }, { "epoch": 3.727261947112534, "grad_norm": 0.6331416833031045, "learning_rate": 6.071180916433439e-07, "loss": 0.0354, "step": 31432 }, { "epoch": 3.727380528874659, "grad_norm": 0.26979544875949774, "learning_rate": 6.065924225232028e-07, "loss": 0.0118, "step": 31433 }, { "epoch": 3.727499110636784, "grad_norm": 0.41965160393139705, "learning_rate": 6.060669782803646e-07, "loss": 0.01, "step": 31434 }, { "epoch": 3.7276176923989093, "grad_norm": 0.47854535581367214, "learning_rate": 6.055417589196727e-07, "loss": 0.0216, "step": 31435 }, { "epoch": 3.727736274161034, "grad_norm": 0.22045720930832644, "learning_rate": 6.050167644459758e-07, "loss": 0.0117, "step": 31436 }, { "epoch": 3.727854855923159, "grad_norm": 0.3614185464159956, "learning_rate": 6.044919948641064e-07, "loss": 0.0196, "step": 31437 }, { "epoch": 3.727973437685284, "grad_norm": 0.43597212440234795, "learning_rate": 6.039674501789078e-07, "loss": 0.0208, "step": 31438 }, { "epoch": 3.728092019447409, "grad_norm": 0.497110646592538, "learning_rate": 6.034431303952092e-07, "loss": 0.0239, "step": 31439 }, { "epoch": 3.728210601209534, "grad_norm": 0.8585854730096437, "learning_rate": 6.029190355178488e-07, "loss": 0.0364, "step": 31440 }, { "epoch": 3.7283291829716587, "grad_norm": 0.6849722370448874, "learning_rate": 6.023951655516613e-07, "loss": 0.0312, "step": 31441 }, { "epoch": 3.728447764733784, "grad_norm": 0.5589287010895808, "learning_rate": 6.01871520501468e-07, "loss": 0.0323, "step": 31442 }, { "epoch": 3.728566346495909, "grad_norm": 0.536722028292015, "learning_rate": 6.013481003721011e-07, "loss": 0.0273, "step": 31443 }, { "epoch": 3.728684928258034, "grad_norm": 0.42959275159833354, "learning_rate": 6.008249051683845e-07, "loss": 0.0242, "step": 31444 }, { "epoch": 3.7288035100201586, "grad_norm": 0.4960768241895897, "learning_rate": 6.00301934895145e-07, "loss": 0.0216, "step": 31445 }, { "epoch": 3.728922091782284, "grad_norm": 0.5007720737002487, "learning_rate": 5.997791895571953e-07, "loss": 0.0269, "step": 31446 }, { "epoch": 3.729040673544409, "grad_norm": 0.3013241271665834, "learning_rate": 5.992566691593649e-07, "loss": 0.0122, "step": 31447 }, { "epoch": 3.729159255306534, "grad_norm": 0.34579706087108675, "learning_rate": 5.987343737064637e-07, "loss": 0.0148, "step": 31448 }, { "epoch": 3.7292778370686586, "grad_norm": 0.5977513774873052, "learning_rate": 5.982123032033077e-07, "loss": 0.0284, "step": 31449 }, { "epoch": 3.7293964188307838, "grad_norm": 0.516296902625843, "learning_rate": 5.976904576547121e-07, "loss": 0.0217, "step": 31450 }, { "epoch": 3.729515000592909, "grad_norm": 0.30606002770747226, "learning_rate": 5.971688370654871e-07, "loss": 0.0139, "step": 31451 }, { "epoch": 3.7296335823550337, "grad_norm": 0.4207631534662485, "learning_rate": 5.966474414404427e-07, "loss": 0.0172, "step": 31452 }, { "epoch": 3.729752164117159, "grad_norm": 0.3459479877251247, "learning_rate": 5.961262707843807e-07, "loss": 0.0205, "step": 31453 }, { "epoch": 3.7298707458792837, "grad_norm": 0.5761065366959414, "learning_rate": 5.956053251021082e-07, "loss": 0.0294, "step": 31454 }, { "epoch": 3.729989327641409, "grad_norm": 0.6023278437147181, "learning_rate": 5.95084604398427e-07, "loss": 0.0249, "step": 31455 }, { "epoch": 3.7301079094035337, "grad_norm": 0.5336093342866962, "learning_rate": 5.945641086781417e-07, "loss": 0.0154, "step": 31456 }, { "epoch": 3.730226491165659, "grad_norm": 0.47192674303915694, "learning_rate": 5.940438379460455e-07, "loss": 0.0239, "step": 31457 }, { "epoch": 3.7303450729277836, "grad_norm": 0.4894764535089744, "learning_rate": 5.935237922069376e-07, "loss": 0.0284, "step": 31458 }, { "epoch": 3.730463654689909, "grad_norm": 0.35834282310854104, "learning_rate": 5.930039714656083e-07, "loss": 0.0168, "step": 31459 }, { "epoch": 3.7305822364520336, "grad_norm": 0.4640675569926395, "learning_rate": 5.924843757268539e-07, "loss": 0.0204, "step": 31460 }, { "epoch": 3.7307008182141588, "grad_norm": 0.6070660389321221, "learning_rate": 5.91965004995465e-07, "loss": 0.0415, "step": 31461 }, { "epoch": 3.7308193999762835, "grad_norm": 0.8688516482562573, "learning_rate": 5.914458592762267e-07, "loss": 0.0502, "step": 31462 }, { "epoch": 3.7309379817384087, "grad_norm": 0.8914412314096773, "learning_rate": 5.909269385739269e-07, "loss": 0.0454, "step": 31463 }, { "epoch": 3.7310565635005335, "grad_norm": 0.42286283905362515, "learning_rate": 5.904082428933449e-07, "loss": 0.0204, "step": 31464 }, { "epoch": 3.7311751452626587, "grad_norm": 0.6338956126417437, "learning_rate": 5.898897722392688e-07, "loss": 0.0281, "step": 31465 }, { "epoch": 3.7312937270247835, "grad_norm": 0.5526830704022726, "learning_rate": 5.893715266164751e-07, "loss": 0.0259, "step": 31466 }, { "epoch": 3.7314123087869087, "grad_norm": 0.6311611317554382, "learning_rate": 5.888535060297407e-07, "loss": 0.015, "step": 31467 }, { "epoch": 3.7315308905490334, "grad_norm": 0.8327148140131049, "learning_rate": 5.883357104838421e-07, "loss": 0.0383, "step": 31468 }, { "epoch": 3.7316494723111586, "grad_norm": 0.5268788923428607, "learning_rate": 5.878181399835536e-07, "loss": 0.0206, "step": 31469 }, { "epoch": 3.7317680540732834, "grad_norm": 0.5077923292321295, "learning_rate": 5.873007945336433e-07, "loss": 0.0225, "step": 31470 }, { "epoch": 3.7318866358354086, "grad_norm": 1.0298646092415453, "learning_rate": 5.867836741388855e-07, "loss": 0.032, "step": 31471 }, { "epoch": 3.7320052175975333, "grad_norm": 0.6096806292414664, "learning_rate": 5.862667788040455e-07, "loss": 0.0254, "step": 31472 }, { "epoch": 3.7321237993596585, "grad_norm": 0.4696604652012172, "learning_rate": 5.857501085338834e-07, "loss": 0.0204, "step": 31473 }, { "epoch": 3.7322423811217833, "grad_norm": 0.39248808214395603, "learning_rate": 5.852336633331706e-07, "loss": 0.0169, "step": 31474 }, { "epoch": 3.7323609628839085, "grad_norm": 0.8215840780245833, "learning_rate": 5.847174432066616e-07, "loss": 0.0443, "step": 31475 }, { "epoch": 3.7324795446460337, "grad_norm": 0.6073539371029008, "learning_rate": 5.842014481591191e-07, "loss": 0.0332, "step": 31476 }, { "epoch": 3.7325981264081585, "grad_norm": 0.3426013115458381, "learning_rate": 5.836856781952976e-07, "loss": 0.0152, "step": 31477 }, { "epoch": 3.7327167081702832, "grad_norm": 0.7746657960468412, "learning_rate": 5.831701333199574e-07, "loss": 0.0342, "step": 31478 }, { "epoch": 3.7328352899324084, "grad_norm": 0.7104021609688193, "learning_rate": 5.826548135378418e-07, "loss": 0.0217, "step": 31479 }, { "epoch": 3.7329538716945336, "grad_norm": 0.7967910067788115, "learning_rate": 5.821397188537053e-07, "loss": 0.0301, "step": 31480 }, { "epoch": 3.7330724534566584, "grad_norm": 0.570837678490679, "learning_rate": 5.816248492722998e-07, "loss": 0.0225, "step": 31481 }, { "epoch": 3.733191035218783, "grad_norm": 0.3397118236136237, "learning_rate": 5.811102047983686e-07, "loss": 0.0184, "step": 31482 }, { "epoch": 3.7333096169809084, "grad_norm": 0.7507934857609583, "learning_rate": 5.805957854366606e-07, "loss": 0.0315, "step": 31483 }, { "epoch": 3.7334281987430336, "grad_norm": 0.2890883767510263, "learning_rate": 5.800815911919083e-07, "loss": 0.0107, "step": 31484 }, { "epoch": 3.7335467805051583, "grad_norm": 0.5835256283058244, "learning_rate": 5.795676220688634e-07, "loss": 0.0221, "step": 31485 }, { "epoch": 3.733665362267283, "grad_norm": 0.5763885149624606, "learning_rate": 5.790538780722554e-07, "loss": 0.0227, "step": 31486 }, { "epoch": 3.7337839440294083, "grad_norm": 0.7138293291891239, "learning_rate": 5.785403592068223e-07, "loss": 0.0329, "step": 31487 }, { "epoch": 3.7339025257915335, "grad_norm": 0.7378542947472001, "learning_rate": 5.780270654773018e-07, "loss": 0.0429, "step": 31488 }, { "epoch": 3.7340211075536582, "grad_norm": 0.5580674052540348, "learning_rate": 5.775139968884236e-07, "loss": 0.0235, "step": 31489 }, { "epoch": 3.734139689315783, "grad_norm": 0.7748729777984324, "learning_rate": 5.770011534449199e-07, "loss": 0.0296, "step": 31490 }, { "epoch": 3.734258271077908, "grad_norm": 0.45729090254259935, "learning_rate": 5.764885351515092e-07, "loss": 0.0251, "step": 31491 }, { "epoch": 3.7343768528400334, "grad_norm": 0.6207981888617391, "learning_rate": 5.759761420129322e-07, "loss": 0.0338, "step": 31492 }, { "epoch": 3.734495434602158, "grad_norm": 0.5146157873188513, "learning_rate": 5.754639740338991e-07, "loss": 0.0207, "step": 31493 }, { "epoch": 3.734614016364283, "grad_norm": 0.27830462933701045, "learning_rate": 5.74952031219142e-07, "loss": 0.0114, "step": 31494 }, { "epoch": 3.734732598126408, "grad_norm": 0.7748754125224554, "learning_rate": 5.744403135733683e-07, "loss": 0.0374, "step": 31495 }, { "epoch": 3.7348511798885333, "grad_norm": 0.4310123175666704, "learning_rate": 5.739288211013078e-07, "loss": 0.025, "step": 31496 }, { "epoch": 3.734969761650658, "grad_norm": 0.5243212560672253, "learning_rate": 5.734175538076674e-07, "loss": 0.0204, "step": 31497 }, { "epoch": 3.735088343412783, "grad_norm": 0.618937236501607, "learning_rate": 5.729065116971632e-07, "loss": 0.0356, "step": 31498 }, { "epoch": 3.735206925174908, "grad_norm": 0.6604166760159736, "learning_rate": 5.723956947745079e-07, "loss": 0.025, "step": 31499 }, { "epoch": 3.7353255069370332, "grad_norm": 0.5619970930627122, "learning_rate": 5.718851030444089e-07, "loss": 0.0265, "step": 31500 }, { "epoch": 3.735444088699158, "grad_norm": 0.44098187680912293, "learning_rate": 5.713747365115762e-07, "loss": 0.0175, "step": 31501 }, { "epoch": 3.735562670461283, "grad_norm": 0.6555850436624612, "learning_rate": 5.708645951807062e-07, "loss": 0.0288, "step": 31502 }, { "epoch": 3.735681252223408, "grad_norm": 0.36825361584651944, "learning_rate": 5.703546790565117e-07, "loss": 0.0153, "step": 31503 }, { "epoch": 3.735799833985533, "grad_norm": 0.36321891532488887, "learning_rate": 5.69844988143689e-07, "loss": 0.0187, "step": 31504 }, { "epoch": 3.735918415747658, "grad_norm": 0.4329621107298743, "learning_rate": 5.693355224469371e-07, "loss": 0.02, "step": 31505 }, { "epoch": 3.736036997509783, "grad_norm": 0.5591615756566501, "learning_rate": 5.688262819709494e-07, "loss": 0.026, "step": 31506 }, { "epoch": 3.736155579271908, "grad_norm": 0.40689796027593433, "learning_rate": 5.683172667204306e-07, "loss": 0.0166, "step": 31507 }, { "epoch": 3.736274161034033, "grad_norm": 0.45304236148754695, "learning_rate": 5.678084767000602e-07, "loss": 0.0141, "step": 31508 }, { "epoch": 3.736392742796158, "grad_norm": 0.6346176958630031, "learning_rate": 5.672999119145373e-07, "loss": 0.0287, "step": 31509 }, { "epoch": 3.736511324558283, "grad_norm": 0.4390888766204307, "learning_rate": 5.667915723685496e-07, "loss": 0.0183, "step": 31510 }, { "epoch": 3.736629906320408, "grad_norm": 0.41632082488531663, "learning_rate": 5.662834580667798e-07, "loss": 0.0114, "step": 31511 }, { "epoch": 3.736748488082533, "grad_norm": 0.38291472906687224, "learning_rate": 5.657755690139155e-07, "loss": 0.0168, "step": 31512 }, { "epoch": 3.7368670698446578, "grad_norm": 0.740850647627758, "learning_rate": 5.652679052146365e-07, "loss": 0.0401, "step": 31513 }, { "epoch": 3.736985651606783, "grad_norm": 0.577052064806342, "learning_rate": 5.647604666736223e-07, "loss": 0.0199, "step": 31514 }, { "epoch": 3.7371042333689077, "grad_norm": 0.3860605884489728, "learning_rate": 5.642532533955525e-07, "loss": 0.0174, "step": 31515 }, { "epoch": 3.737222815131033, "grad_norm": 0.5737617062784284, "learning_rate": 5.637462653851039e-07, "loss": 0.0189, "step": 31516 }, { "epoch": 3.7373413968931577, "grad_norm": 0.8299250246168696, "learning_rate": 5.63239502646945e-07, "loss": 0.0426, "step": 31517 }, { "epoch": 3.737459978655283, "grad_norm": 0.5031544166199313, "learning_rate": 5.627329651857582e-07, "loss": 0.0193, "step": 31518 }, { "epoch": 3.7375785604174077, "grad_norm": 0.4917967379131274, "learning_rate": 5.622266530062009e-07, "loss": 0.0189, "step": 31519 }, { "epoch": 3.737697142179533, "grad_norm": 0.6811120184039263, "learning_rate": 5.617205661129471e-07, "loss": 0.0418, "step": 31520 }, { "epoch": 3.7378157239416576, "grad_norm": 0.3960129895445664, "learning_rate": 5.612147045106625e-07, "loss": 0.0149, "step": 31521 }, { "epoch": 3.737934305703783, "grad_norm": 0.4838510591292708, "learning_rate": 5.607090682040101e-07, "loss": 0.0215, "step": 31522 }, { "epoch": 3.7380528874659076, "grad_norm": 0.3324611371208113, "learning_rate": 5.602036571976527e-07, "loss": 0.0143, "step": 31523 }, { "epoch": 3.738171469228033, "grad_norm": 0.5959332027270696, "learning_rate": 5.59698471496245e-07, "loss": 0.0294, "step": 31524 }, { "epoch": 3.738290050990158, "grad_norm": 0.5713800928779742, "learning_rate": 5.591935111044472e-07, "loss": 0.0208, "step": 31525 }, { "epoch": 3.7384086327522827, "grad_norm": 0.6184715950082924, "learning_rate": 5.586887760269111e-07, "loss": 0.0267, "step": 31526 }, { "epoch": 3.7385272145144075, "grad_norm": 0.419264645703571, "learning_rate": 5.581842662682967e-07, "loss": 0.0181, "step": 31527 }, { "epoch": 3.7386457962765327, "grad_norm": 0.5106595437404775, "learning_rate": 5.576799818332506e-07, "loss": 0.0301, "step": 31528 }, { "epoch": 3.738764378038658, "grad_norm": 0.5676361068728127, "learning_rate": 5.571759227264189e-07, "loss": 0.0313, "step": 31529 }, { "epoch": 3.7388829598007827, "grad_norm": 0.5946622841980573, "learning_rate": 5.566720889524535e-07, "loss": 0.0301, "step": 31530 }, { "epoch": 3.7390015415629074, "grad_norm": 0.7778878705348743, "learning_rate": 5.56168480515995e-07, "loss": 0.0391, "step": 31531 }, { "epoch": 3.7391201233250326, "grad_norm": 0.5988012104987436, "learning_rate": 5.556650974216898e-07, "loss": 0.0259, "step": 31532 }, { "epoch": 3.739238705087158, "grad_norm": 0.6353808171334769, "learning_rate": 5.551619396741759e-07, "loss": 0.0258, "step": 31533 }, { "epoch": 3.7393572868492826, "grad_norm": 0.606604320119029, "learning_rate": 5.546590072780966e-07, "loss": 0.023, "step": 31534 }, { "epoch": 3.7394758686114073, "grad_norm": 0.5227736151287294, "learning_rate": 5.541563002380789e-07, "loss": 0.0206, "step": 31535 }, { "epoch": 3.7395944503735326, "grad_norm": 0.27605836590731536, "learning_rate": 5.536538185587664e-07, "loss": 0.0149, "step": 31536 }, { "epoch": 3.7397130321356578, "grad_norm": 0.8805123845450928, "learning_rate": 5.53151562244783e-07, "loss": 0.0265, "step": 31537 }, { "epoch": 3.7398316138977825, "grad_norm": 0.6737649158559315, "learning_rate": 5.526495313007668e-07, "loss": 0.0295, "step": 31538 }, { "epoch": 3.7399501956599073, "grad_norm": 0.2378982837716851, "learning_rate": 5.521477257313445e-07, "loss": 0.0145, "step": 31539 }, { "epoch": 3.7400687774220325, "grad_norm": 0.7313377753511603, "learning_rate": 5.516461455411348e-07, "loss": 0.0339, "step": 31540 }, { "epoch": 3.7401873591841577, "grad_norm": 0.4814135573035987, "learning_rate": 5.511447907347728e-07, "loss": 0.0261, "step": 31541 }, { "epoch": 3.7403059409462824, "grad_norm": 0.5515139010441078, "learning_rate": 5.506436613168714e-07, "loss": 0.0191, "step": 31542 }, { "epoch": 3.740424522708407, "grad_norm": 0.5751153899181652, "learning_rate": 5.501427572920575e-07, "loss": 0.0195, "step": 31543 }, { "epoch": 3.7405431044705324, "grad_norm": 0.30632791826887107, "learning_rate": 5.496420786649387e-07, "loss": 0.0135, "step": 31544 }, { "epoch": 3.7406616862326576, "grad_norm": 0.46903229196870166, "learning_rate": 5.491416254401416e-07, "loss": 0.0131, "step": 31545 }, { "epoch": 3.7407802679947824, "grad_norm": 0.5054930357110051, "learning_rate": 5.486413976222738e-07, "loss": 0.0205, "step": 31546 }, { "epoch": 3.740898849756907, "grad_norm": 0.7120651556973215, "learning_rate": 5.481413952159481e-07, "loss": 0.0231, "step": 31547 }, { "epoch": 3.7410174315190323, "grad_norm": 0.615101566179651, "learning_rate": 5.476416182257749e-07, "loss": 0.0206, "step": 31548 }, { "epoch": 3.7411360132811575, "grad_norm": 0.5430645051299532, "learning_rate": 5.471420666563587e-07, "loss": 0.0203, "step": 31549 }, { "epoch": 3.7412545950432823, "grad_norm": 0.4245241569564131, "learning_rate": 5.46642740512307e-07, "loss": 0.0218, "step": 31550 }, { "epoch": 3.7413731768054075, "grad_norm": 0.7586551139498764, "learning_rate": 5.461436397982217e-07, "loss": 0.0321, "step": 31551 }, { "epoch": 3.7414917585675322, "grad_norm": 0.42813806602266763, "learning_rate": 5.456447645187074e-07, "loss": 0.0172, "step": 31552 }, { "epoch": 3.7416103403296574, "grad_norm": 0.7195115403140351, "learning_rate": 5.451461146783576e-07, "loss": 0.0365, "step": 31553 }, { "epoch": 3.741728922091782, "grad_norm": 0.49997526136269177, "learning_rate": 5.446476902817771e-07, "loss": 0.0253, "step": 31554 }, { "epoch": 3.7418475038539074, "grad_norm": 0.5000791684606295, "learning_rate": 5.441494913335482e-07, "loss": 0.0254, "step": 31555 }, { "epoch": 3.741966085616032, "grad_norm": 0.3653947890100805, "learning_rate": 5.436515178382784e-07, "loss": 0.0175, "step": 31556 }, { "epoch": 3.7420846673781574, "grad_norm": 0.4324961306526892, "learning_rate": 5.431537698005473e-07, "loss": 0.0183, "step": 31557 }, { "epoch": 3.742203249140282, "grad_norm": 0.4105758297836235, "learning_rate": 5.426562472249486e-07, "loss": 0.0115, "step": 31558 }, { "epoch": 3.7423218309024073, "grad_norm": 0.32209809282732366, "learning_rate": 5.421589501160645e-07, "loss": 0.0162, "step": 31559 }, { "epoch": 3.742440412664532, "grad_norm": 0.6315835473029403, "learning_rate": 5.416618784784861e-07, "loss": 0.0289, "step": 31560 }, { "epoch": 3.7425589944266573, "grad_norm": 0.813416570379004, "learning_rate": 5.411650323167928e-07, "loss": 0.0345, "step": 31561 }, { "epoch": 3.742677576188782, "grad_norm": 0.8070267303684362, "learning_rate": 5.406684116355615e-07, "loss": 0.032, "step": 31562 }, { "epoch": 3.7427961579509073, "grad_norm": 0.3807986555955308, "learning_rate": 5.401720164393775e-07, "loss": 0.0158, "step": 31563 }, { "epoch": 3.742914739713032, "grad_norm": 0.5372525208034847, "learning_rate": 5.396758467328094e-07, "loss": 0.0243, "step": 31564 }, { "epoch": 3.743033321475157, "grad_norm": 0.3039659168999697, "learning_rate": 5.391799025204397e-07, "loss": 0.0127, "step": 31565 }, { "epoch": 3.743151903237282, "grad_norm": 0.5534494158649287, "learning_rate": 5.386841838068285e-07, "loss": 0.0231, "step": 31566 }, { "epoch": 3.743270484999407, "grad_norm": 0.46830001360041285, "learning_rate": 5.381886905965583e-07, "loss": 0.0228, "step": 31567 }, { "epoch": 3.743389066761532, "grad_norm": 0.4362841467128925, "learning_rate": 5.376934228941893e-07, "loss": 0.0175, "step": 31568 }, { "epoch": 3.743507648523657, "grad_norm": 0.6780097097616146, "learning_rate": 5.371983807042874e-07, "loss": 0.0291, "step": 31569 }, { "epoch": 3.743626230285782, "grad_norm": 0.3785742190949289, "learning_rate": 5.367035640314184e-07, "loss": 0.0156, "step": 31570 }, { "epoch": 3.743744812047907, "grad_norm": 0.6405253937163138, "learning_rate": 5.36208972880145e-07, "loss": 0.0263, "step": 31571 }, { "epoch": 3.743863393810032, "grad_norm": 0.5867151462146843, "learning_rate": 5.357146072550279e-07, "loss": 0.0252, "step": 31572 }, { "epoch": 3.743981975572157, "grad_norm": 0.385426635669653, "learning_rate": 5.352204671606187e-07, "loss": 0.0181, "step": 31573 }, { "epoch": 3.7441005573342823, "grad_norm": 0.2564962723553276, "learning_rate": 5.347265526014778e-07, "loss": 0.009, "step": 31574 }, { "epoch": 3.744219139096407, "grad_norm": 0.48813682582222784, "learning_rate": 5.342328635821542e-07, "loss": 0.024, "step": 31575 }, { "epoch": 3.744337720858532, "grad_norm": 0.404414167348229, "learning_rate": 5.337394001072054e-07, "loss": 0.022, "step": 31576 }, { "epoch": 3.744456302620657, "grad_norm": 0.8571883505744096, "learning_rate": 5.332461621811724e-07, "loss": 0.0412, "step": 31577 }, { "epoch": 3.744574884382782, "grad_norm": 0.658895478299704, "learning_rate": 5.327531498086125e-07, "loss": 0.0297, "step": 31578 }, { "epoch": 3.744693466144907, "grad_norm": 0.5153029544681773, "learning_rate": 5.322603629940609e-07, "loss": 0.0192, "step": 31579 }, { "epoch": 3.7448120479070317, "grad_norm": 0.7047118290632046, "learning_rate": 5.317678017420641e-07, "loss": 0.031, "step": 31580 }, { "epoch": 3.744930629669157, "grad_norm": 0.6006722439848856, "learning_rate": 5.312754660571628e-07, "loss": 0.0344, "step": 31581 }, { "epoch": 3.745049211431282, "grad_norm": 0.6503915420269802, "learning_rate": 5.30783355943898e-07, "loss": 0.032, "step": 31582 }, { "epoch": 3.745167793193407, "grad_norm": 0.5000719867171144, "learning_rate": 5.302914714068075e-07, "loss": 0.0161, "step": 31583 }, { "epoch": 3.7452863749555316, "grad_norm": 0.5848523436902527, "learning_rate": 5.297998124504183e-07, "loss": 0.0367, "step": 31584 }, { "epoch": 3.745404956717657, "grad_norm": 0.4223089900075269, "learning_rate": 5.293083790792686e-07, "loss": 0.0186, "step": 31585 }, { "epoch": 3.745523538479782, "grad_norm": 0.2010980504276291, "learning_rate": 5.288171712978879e-07, "loss": 0.008, "step": 31586 }, { "epoch": 3.745642120241907, "grad_norm": 0.38891899395278595, "learning_rate": 5.283261891108033e-07, "loss": 0.0161, "step": 31587 }, { "epoch": 3.7457607020040316, "grad_norm": 0.6965095341724485, "learning_rate": 5.278354325225416e-07, "loss": 0.0377, "step": 31588 }, { "epoch": 3.7458792837661568, "grad_norm": 0.5431714863680627, "learning_rate": 5.273449015376298e-07, "loss": 0.0263, "step": 31589 }, { "epoch": 3.745997865528282, "grad_norm": 0.6112742294254316, "learning_rate": 5.268545961605864e-07, "loss": 0.0294, "step": 31590 }, { "epoch": 3.7461164472904067, "grad_norm": 0.3263440128765745, "learning_rate": 5.263645163959329e-07, "loss": 0.0156, "step": 31591 }, { "epoch": 3.7462350290525315, "grad_norm": 0.4605713778257016, "learning_rate": 5.258746622481881e-07, "loss": 0.0272, "step": 31592 }, { "epoch": 3.7463536108146567, "grad_norm": 0.45786377737166073, "learning_rate": 5.253850337218647e-07, "loss": 0.0242, "step": 31593 }, { "epoch": 3.746472192576782, "grad_norm": 0.7252559250655347, "learning_rate": 5.248956308214814e-07, "loss": 0.0222, "step": 31594 }, { "epoch": 3.7465907743389066, "grad_norm": 0.43223946564400245, "learning_rate": 5.244064535515458e-07, "loss": 0.0215, "step": 31595 }, { "epoch": 3.7467093561010314, "grad_norm": 0.9664198126796915, "learning_rate": 5.239175019165681e-07, "loss": 0.0455, "step": 31596 }, { "epoch": 3.7468279378631566, "grad_norm": 0.4192504511057313, "learning_rate": 5.23428775921056e-07, "loss": 0.0226, "step": 31597 }, { "epoch": 3.746946519625282, "grad_norm": 0.6997482638035774, "learning_rate": 5.229402755695139e-07, "loss": 0.0275, "step": 31598 }, { "epoch": 3.7470651013874066, "grad_norm": 0.40792202055139887, "learning_rate": 5.224520008664524e-07, "loss": 0.0175, "step": 31599 }, { "epoch": 3.7471836831495318, "grad_norm": 0.6815667211971167, "learning_rate": 5.219639518163594e-07, "loss": 0.0329, "step": 31600 }, { "epoch": 3.7473022649116565, "grad_norm": 0.7000309647538855, "learning_rate": 5.214761284237479e-07, "loss": 0.0412, "step": 31601 }, { "epoch": 3.7474208466737817, "grad_norm": 0.6446560738421567, "learning_rate": 5.209885306931062e-07, "loss": 0.0366, "step": 31602 }, { "epoch": 3.7475394284359065, "grad_norm": 0.4550384891370782, "learning_rate": 5.205011586289332e-07, "loss": 0.0195, "step": 31603 }, { "epoch": 3.7476580101980317, "grad_norm": 0.4658579941979995, "learning_rate": 5.2001401223572e-07, "loss": 0.0255, "step": 31604 }, { "epoch": 3.7477765919601564, "grad_norm": 0.5352945541680737, "learning_rate": 5.195270915179601e-07, "loss": 0.0217, "step": 31605 }, { "epoch": 3.7478951737222816, "grad_norm": 0.47047377213197084, "learning_rate": 5.190403964801389e-07, "loss": 0.0215, "step": 31606 }, { "epoch": 3.7480137554844064, "grad_norm": 0.816826380375169, "learning_rate": 5.185539271267442e-07, "loss": 0.0384, "step": 31607 }, { "epoch": 3.7481323372465316, "grad_norm": 0.337734194411855, "learning_rate": 5.180676834622616e-07, "loss": 0.0129, "step": 31608 }, { "epoch": 3.7482509190086564, "grad_norm": 0.5704262927450912, "learning_rate": 5.175816654911736e-07, "loss": 0.037, "step": 31609 }, { "epoch": 3.7483695007707816, "grad_norm": 0.5838178053849454, "learning_rate": 5.170958732179626e-07, "loss": 0.0335, "step": 31610 }, { "epoch": 3.7484880825329063, "grad_norm": 0.6343378297830077, "learning_rate": 5.166103066470973e-07, "loss": 0.0356, "step": 31611 }, { "epoch": 3.7486066642950315, "grad_norm": 0.8170403446994265, "learning_rate": 5.161249657830686e-07, "loss": 0.0447, "step": 31612 }, { "epoch": 3.7487252460571563, "grad_norm": 0.491156401824117, "learning_rate": 5.156398506303422e-07, "loss": 0.032, "step": 31613 }, { "epoch": 3.7488438278192815, "grad_norm": 0.5235610701052105, "learning_rate": 5.151549611933926e-07, "loss": 0.0198, "step": 31614 }, { "epoch": 3.7489624095814063, "grad_norm": 0.43881425568317795, "learning_rate": 5.146702974766854e-07, "loss": 0.0203, "step": 31615 }, { "epoch": 3.7490809913435315, "grad_norm": 0.6023757020952663, "learning_rate": 5.141858594846948e-07, "loss": 0.0319, "step": 31616 }, { "epoch": 3.749199573105656, "grad_norm": 0.34023296115412227, "learning_rate": 5.137016472218842e-07, "loss": 0.0165, "step": 31617 }, { "epoch": 3.7493181548677814, "grad_norm": 0.5631174764405107, "learning_rate": 5.132176606927136e-07, "loss": 0.0247, "step": 31618 }, { "epoch": 3.749436736629906, "grad_norm": 0.6656734066167209, "learning_rate": 5.127338999016518e-07, "loss": 0.0182, "step": 31619 }, { "epoch": 3.7495553183920314, "grad_norm": 0.9295112650777091, "learning_rate": 5.122503648531535e-07, "loss": 0.0402, "step": 31620 }, { "epoch": 3.749673900154156, "grad_norm": 0.6492115131825976, "learning_rate": 5.117670555516818e-07, "loss": 0.0315, "step": 31621 }, { "epoch": 3.7497924819162813, "grad_norm": 0.4351318477102644, "learning_rate": 5.112839720016832e-07, "loss": 0.0179, "step": 31622 }, { "epoch": 3.749911063678406, "grad_norm": 0.8505811501593521, "learning_rate": 5.108011142076208e-07, "loss": 0.0379, "step": 31623 }, { "epoch": 3.7500296454405313, "grad_norm": 0.657629799720036, "learning_rate": 5.103184821739381e-07, "loss": 0.0257, "step": 31624 }, { "epoch": 3.750148227202656, "grad_norm": 0.49311246735277825, "learning_rate": 5.09836075905093e-07, "loss": 0.0168, "step": 31625 }, { "epoch": 3.7502668089647813, "grad_norm": 0.5639652320863031, "learning_rate": 5.093538954055205e-07, "loss": 0.0216, "step": 31626 }, { "epoch": 3.7503853907269065, "grad_norm": 0.6814221086712895, "learning_rate": 5.088719406796783e-07, "loss": 0.0337, "step": 31627 }, { "epoch": 3.7505039724890312, "grad_norm": 0.4737679406000582, "learning_rate": 5.083902117320017e-07, "loss": 0.0198, "step": 31628 }, { "epoch": 3.750622554251156, "grad_norm": 0.8968102630776643, "learning_rate": 5.079087085669315e-07, "loss": 0.0499, "step": 31629 }, { "epoch": 3.750741136013281, "grad_norm": 0.41362299315434276, "learning_rate": 5.074274311889115e-07, "loss": 0.0217, "step": 31630 }, { "epoch": 3.7508597177754064, "grad_norm": 0.29809951598557405, "learning_rate": 5.069463796023743e-07, "loss": 0.0129, "step": 31631 }, { "epoch": 3.750978299537531, "grad_norm": 0.5345693668673022, "learning_rate": 5.06465553811758e-07, "loss": 0.0222, "step": 31632 }, { "epoch": 3.751096881299656, "grad_norm": 0.5178120472174312, "learning_rate": 5.059849538214895e-07, "loss": 0.0261, "step": 31633 }, { "epoch": 3.751215463061781, "grad_norm": 0.5203481818980756, "learning_rate": 5.055045796360069e-07, "loss": 0.0253, "step": 31634 }, { "epoch": 3.7513340448239063, "grad_norm": 0.627497294896029, "learning_rate": 5.050244312597346e-07, "loss": 0.0216, "step": 31635 }, { "epoch": 3.751452626586031, "grad_norm": 0.6223270208390973, "learning_rate": 5.045445086970995e-07, "loss": 0.0336, "step": 31636 }, { "epoch": 3.751571208348156, "grad_norm": 0.5976397902203945, "learning_rate": 5.040648119525232e-07, "loss": 0.0266, "step": 31637 }, { "epoch": 3.751689790110281, "grad_norm": 0.5398806043435811, "learning_rate": 5.035853410304325e-07, "loss": 0.0198, "step": 31638 }, { "epoch": 3.7518083718724062, "grad_norm": 0.5438211441015067, "learning_rate": 5.031060959352462e-07, "loss": 0.0213, "step": 31639 }, { "epoch": 3.751926953634531, "grad_norm": 0.49379382535375443, "learning_rate": 5.026270766713804e-07, "loss": 0.0208, "step": 31640 }, { "epoch": 3.7520455353966558, "grad_norm": 0.5720836681487873, "learning_rate": 5.021482832432505e-07, "loss": 0.0362, "step": 31641 }, { "epoch": 3.752164117158781, "grad_norm": 0.4071669848784149, "learning_rate": 5.016697156552757e-07, "loss": 0.0228, "step": 31642 }, { "epoch": 3.752282698920906, "grad_norm": 0.6087033928299371, "learning_rate": 5.011913739118635e-07, "loss": 0.0308, "step": 31643 }, { "epoch": 3.752401280683031, "grad_norm": 0.43082347167549945, "learning_rate": 5.007132580174239e-07, "loss": 0.022, "step": 31644 }, { "epoch": 3.7525198624451557, "grad_norm": 0.247068632318401, "learning_rate": 5.00235367976365e-07, "loss": 0.0099, "step": 31645 }, { "epoch": 3.752638444207281, "grad_norm": 0.4031183768382651, "learning_rate": 4.997577037930912e-07, "loss": 0.0139, "step": 31646 }, { "epoch": 3.752757025969406, "grad_norm": 0.6295244318593425, "learning_rate": 4.992802654720074e-07, "loss": 0.0323, "step": 31647 }, { "epoch": 3.752875607731531, "grad_norm": 0.4699184234835197, "learning_rate": 4.988030530175159e-07, "loss": 0.0174, "step": 31648 }, { "epoch": 3.752994189493656, "grad_norm": 0.5133220652081618, "learning_rate": 4.983260664340128e-07, "loss": 0.0237, "step": 31649 }, { "epoch": 3.753112771255781, "grad_norm": 0.4954228658356705, "learning_rate": 4.978493057259004e-07, "loss": 0.0229, "step": 31650 }, { "epoch": 3.753231353017906, "grad_norm": 0.425949085280888, "learning_rate": 4.973727708975695e-07, "loss": 0.0217, "step": 31651 }, { "epoch": 3.7533499347800308, "grad_norm": 0.5098465531062517, "learning_rate": 4.968964619534139e-07, "loss": 0.0212, "step": 31652 }, { "epoch": 3.753468516542156, "grad_norm": 0.2898805038588874, "learning_rate": 4.964203788978272e-07, "loss": 0.0074, "step": 31653 }, { "epoch": 3.7535870983042807, "grad_norm": 0.7674230791362295, "learning_rate": 4.959445217351949e-07, "loss": 0.0357, "step": 31654 }, { "epoch": 3.753705680066406, "grad_norm": 0.3991747707165267, "learning_rate": 4.95468890469905e-07, "loss": 0.0186, "step": 31655 }, { "epoch": 3.7538242618285307, "grad_norm": 0.362746996605073, "learning_rate": 4.949934851063431e-07, "loss": 0.0164, "step": 31656 }, { "epoch": 3.753942843590656, "grad_norm": 0.4820966015090015, "learning_rate": 4.945183056488917e-07, "loss": 0.0216, "step": 31657 }, { "epoch": 3.7540614253527806, "grad_norm": 0.5811691624615548, "learning_rate": 4.940433521019305e-07, "loss": 0.0191, "step": 31658 }, { "epoch": 3.754180007114906, "grad_norm": 0.5648749066909599, "learning_rate": 4.935686244698367e-07, "loss": 0.0166, "step": 31659 }, { "epoch": 3.7542985888770306, "grad_norm": 0.748800102950552, "learning_rate": 4.930941227569902e-07, "loss": 0.0277, "step": 31660 }, { "epoch": 3.754417170639156, "grad_norm": 0.5715263341470392, "learning_rate": 4.92619846967765e-07, "loss": 0.03, "step": 31661 }, { "epoch": 3.7545357524012806, "grad_norm": 0.4735076851060042, "learning_rate": 4.921457971065302e-07, "loss": 0.0201, "step": 31662 }, { "epoch": 3.7546543341634058, "grad_norm": 0.41374661943983015, "learning_rate": 4.91671973177657e-07, "loss": 0.0141, "step": 31663 }, { "epoch": 3.7547729159255305, "grad_norm": 0.7128261390528026, "learning_rate": 4.911983751855143e-07, "loss": 0.0364, "step": 31664 }, { "epoch": 3.7548914976876557, "grad_norm": 0.49757135202558206, "learning_rate": 4.907250031344707e-07, "loss": 0.0169, "step": 31665 }, { "epoch": 3.7550100794497805, "grad_norm": 0.5985853022178877, "learning_rate": 4.902518570288839e-07, "loss": 0.0359, "step": 31666 }, { "epoch": 3.7551286612119057, "grad_norm": 0.5164674210834934, "learning_rate": 4.897789368731199e-07, "loss": 0.0242, "step": 31667 }, { "epoch": 3.7552472429740305, "grad_norm": 0.5253722052445128, "learning_rate": 4.893062426715361e-07, "loss": 0.0246, "step": 31668 }, { "epoch": 3.7553658247361557, "grad_norm": 0.41766448348379204, "learning_rate": 4.888337744284932e-07, "loss": 0.0175, "step": 31669 }, { "epoch": 3.7554844064982804, "grad_norm": 0.6071709492279571, "learning_rate": 4.883615321483487e-07, "loss": 0.0323, "step": 31670 }, { "epoch": 3.7556029882604056, "grad_norm": 0.5718305082208559, "learning_rate": 4.878895158354463e-07, "loss": 0.0308, "step": 31671 }, { "epoch": 3.7557215700225304, "grad_norm": 0.46359543537997167, "learning_rate": 4.874177254941492e-07, "loss": 0.0179, "step": 31672 }, { "epoch": 3.7558401517846556, "grad_norm": 0.5160184672072928, "learning_rate": 4.869461611287985e-07, "loss": 0.0223, "step": 31673 }, { "epoch": 3.7559587335467803, "grad_norm": 0.4532109761076522, "learning_rate": 4.86474822743746e-07, "loss": 0.0168, "step": 31674 }, { "epoch": 3.7560773153089055, "grad_norm": 0.38467198157194377, "learning_rate": 4.860037103433329e-07, "loss": 0.0165, "step": 31675 }, { "epoch": 3.7561958970710307, "grad_norm": 0.7628817996843611, "learning_rate": 4.855328239319085e-07, "loss": 0.0364, "step": 31676 }, { "epoch": 3.7563144788331555, "grad_norm": 0.3603198477936126, "learning_rate": 4.850621635138081e-07, "loss": 0.0145, "step": 31677 }, { "epoch": 3.7564330605952803, "grad_norm": 0.3124553507275398, "learning_rate": 4.845917290933699e-07, "loss": 0.0119, "step": 31678 }, { "epoch": 3.7565516423574055, "grad_norm": 0.5992694769311568, "learning_rate": 4.84121520674935e-07, "loss": 0.0236, "step": 31679 }, { "epoch": 3.7566702241195307, "grad_norm": 0.3751517696959551, "learning_rate": 4.836515382628359e-07, "loss": 0.0186, "step": 31680 }, { "epoch": 3.7567888058816554, "grad_norm": 0.4174851157238731, "learning_rate": 4.831817818614081e-07, "loss": 0.0161, "step": 31681 }, { "epoch": 3.75690738764378, "grad_norm": 0.7145930073077518, "learning_rate": 4.827122514749732e-07, "loss": 0.0357, "step": 31682 }, { "epoch": 3.7570259694059054, "grad_norm": 0.7607260458390087, "learning_rate": 4.822429471078721e-07, "loss": 0.0353, "step": 31683 }, { "epoch": 3.7571445511680306, "grad_norm": 0.5267217249680649, "learning_rate": 4.817738687644236e-07, "loss": 0.0208, "step": 31684 }, { "epoch": 3.7572631329301553, "grad_norm": 0.923470964478167, "learning_rate": 4.813050164489575e-07, "loss": 0.0397, "step": 31685 }, { "epoch": 3.75738171469228, "grad_norm": 0.6923044963601438, "learning_rate": 4.80836390165787e-07, "loss": 0.0403, "step": 31686 }, { "epoch": 3.7575002964544053, "grad_norm": 0.34583184633730063, "learning_rate": 4.803679899192392e-07, "loss": 0.0143, "step": 31687 }, { "epoch": 3.7576188782165305, "grad_norm": 0.4638666536990943, "learning_rate": 4.798998157136331e-07, "loss": 0.0185, "step": 31688 }, { "epoch": 3.7577374599786553, "grad_norm": 0.6533598737965659, "learning_rate": 4.794318675532761e-07, "loss": 0.0381, "step": 31689 }, { "epoch": 3.75785604174078, "grad_norm": 0.5781051323726416, "learning_rate": 4.789641454424954e-07, "loss": 0.0211, "step": 31690 }, { "epoch": 3.7579746235029052, "grad_norm": 0.43891021289064874, "learning_rate": 4.784966493855902e-07, "loss": 0.0121, "step": 31691 }, { "epoch": 3.7580932052650304, "grad_norm": 0.5755951252759012, "learning_rate": 4.780293793868795e-07, "loss": 0.0338, "step": 31692 }, { "epoch": 3.758211787027155, "grad_norm": 0.5524917223800868, "learning_rate": 4.775623354506626e-07, "loss": 0.0236, "step": 31693 }, { "epoch": 3.75833036878928, "grad_norm": 0.6501844471996531, "learning_rate": 4.770955175812553e-07, "loss": 0.033, "step": 31694 }, { "epoch": 3.758448950551405, "grad_norm": 0.4572037182567546, "learning_rate": 4.766289257829515e-07, "loss": 0.0194, "step": 31695 }, { "epoch": 3.7585675323135304, "grad_norm": 0.5882891701314099, "learning_rate": 4.761625600600561e-07, "loss": 0.0273, "step": 31696 }, { "epoch": 3.758686114075655, "grad_norm": 0.40278108548778035, "learning_rate": 4.756964204168712e-07, "loss": 0.021, "step": 31697 }, { "epoch": 3.75880469583778, "grad_norm": 0.4832926545791205, "learning_rate": 4.7523050685768786e-07, "loss": 0.0164, "step": 31698 }, { "epoch": 3.758923277599905, "grad_norm": 0.5763307328013652, "learning_rate": 4.7476481938681094e-07, "loss": 0.0252, "step": 31699 }, { "epoch": 3.7590418593620303, "grad_norm": 0.656694808138985, "learning_rate": 4.742993580085231e-07, "loss": 0.0293, "step": 31700 }, { "epoch": 3.759160441124155, "grad_norm": 0.3570249461405894, "learning_rate": 4.738341227271209e-07, "loss": 0.0179, "step": 31701 }, { "epoch": 3.7592790228862802, "grad_norm": 0.42851664527572686, "learning_rate": 4.733691135468926e-07, "loss": 0.0261, "step": 31702 }, { "epoch": 3.759397604648405, "grad_norm": 0.3638129190634693, "learning_rate": 4.7290433047212366e-07, "loss": 0.0166, "step": 31703 }, { "epoch": 3.75951618641053, "grad_norm": 0.48192283263413804, "learning_rate": 4.7243977350709955e-07, "loss": 0.0276, "step": 31704 }, { "epoch": 3.759634768172655, "grad_norm": 0.5560326329793466, "learning_rate": 4.7197544265610294e-07, "loss": 0.023, "step": 31705 }, { "epoch": 3.75975334993478, "grad_norm": 0.38097324132247495, "learning_rate": 4.715113379234165e-07, "loss": 0.0132, "step": 31706 }, { "epoch": 3.759871931696905, "grad_norm": 0.5786819758974097, "learning_rate": 4.710474593133146e-07, "loss": 0.02, "step": 31707 }, { "epoch": 3.75999051345903, "grad_norm": 0.6577828628050721, "learning_rate": 4.705838068300744e-07, "loss": 0.0276, "step": 31708 }, { "epoch": 3.760109095221155, "grad_norm": 0.794349275704206, "learning_rate": 4.70120380477973e-07, "loss": 0.0338, "step": 31709 }, { "epoch": 3.76022767698328, "grad_norm": 0.5090169776294505, "learning_rate": 4.696571802612848e-07, "loss": 0.0212, "step": 31710 }, { "epoch": 3.760346258745405, "grad_norm": 0.2922569342826977, "learning_rate": 4.69194206184273e-07, "loss": 0.0116, "step": 31711 }, { "epoch": 3.76046484050753, "grad_norm": 0.5623016279545806, "learning_rate": 4.6873145825120926e-07, "loss": 0.0235, "step": 31712 }, { "epoch": 3.760583422269655, "grad_norm": 0.5304940956463632, "learning_rate": 4.682689364663595e-07, "loss": 0.0271, "step": 31713 }, { "epoch": 3.76070200403178, "grad_norm": 0.6340391393314827, "learning_rate": 4.6780664083398704e-07, "loss": 0.0244, "step": 31714 }, { "epoch": 3.7608205857939048, "grad_norm": 0.4098639454390098, "learning_rate": 4.673445713583552e-07, "loss": 0.0184, "step": 31715 }, { "epoch": 3.76093916755603, "grad_norm": 0.48887728025240096, "learning_rate": 4.668827280437188e-07, "loss": 0.0173, "step": 31716 }, { "epoch": 3.7610577493181547, "grad_norm": 0.5961169912180226, "learning_rate": 4.6642111089434113e-07, "loss": 0.0241, "step": 31717 }, { "epoch": 3.76117633108028, "grad_norm": 0.6587060453471448, "learning_rate": 4.6595971991447716e-07, "loss": 0.0314, "step": 31718 }, { "epoch": 3.7612949128424047, "grad_norm": 0.5779973004791293, "learning_rate": 4.6549855510837626e-07, "loss": 0.0226, "step": 31719 }, { "epoch": 3.76141349460453, "grad_norm": 0.6465273781126086, "learning_rate": 4.650376164802961e-07, "loss": 0.0294, "step": 31720 }, { "epoch": 3.7615320763666547, "grad_norm": 0.6539334250653994, "learning_rate": 4.6457690403448053e-07, "loss": 0.0325, "step": 31721 }, { "epoch": 3.76165065812878, "grad_norm": 0.41125736771878285, "learning_rate": 4.641164177751789e-07, "loss": 0.0127, "step": 31722 }, { "epoch": 3.7617692398909046, "grad_norm": 0.7648432528273937, "learning_rate": 4.6365615770663506e-07, "loss": 0.0367, "step": 31723 }, { "epoch": 3.76188782165303, "grad_norm": 0.5204144823816625, "learning_rate": 4.6319612383309283e-07, "loss": 0.0217, "step": 31724 }, { "epoch": 3.762006403415155, "grad_norm": 0.6170044832814776, "learning_rate": 4.627363161587961e-07, "loss": 0.0247, "step": 31725 }, { "epoch": 3.76212498517728, "grad_norm": 0.5550215155327914, "learning_rate": 4.622767346879775e-07, "loss": 0.0268, "step": 31726 }, { "epoch": 3.7622435669394045, "grad_norm": 0.7536287879322616, "learning_rate": 4.6181737942487814e-07, "loss": 0.0414, "step": 31727 }, { "epoch": 3.7623621487015297, "grad_norm": 0.4629213768393277, "learning_rate": 4.613582503737335e-07, "loss": 0.018, "step": 31728 }, { "epoch": 3.762480730463655, "grad_norm": 0.43368927792392226, "learning_rate": 4.608993475387735e-07, "loss": 0.0184, "step": 31729 }, { "epoch": 3.7625993122257797, "grad_norm": 0.7766191621648451, "learning_rate": 4.6044067092422815e-07, "loss": 0.028, "step": 31730 }, { "epoch": 3.7627178939879045, "grad_norm": 0.4043969458506702, "learning_rate": 4.5998222053432736e-07, "loss": 0.0151, "step": 31731 }, { "epoch": 3.7628364757500297, "grad_norm": 0.3722174436481523, "learning_rate": 4.5952399637330115e-07, "loss": 0.0213, "step": 31732 }, { "epoch": 3.762955057512155, "grad_norm": 0.5912093195099211, "learning_rate": 4.5906599844536826e-07, "loss": 0.0253, "step": 31733 }, { "epoch": 3.7630736392742796, "grad_norm": 0.4267301343678796, "learning_rate": 4.5860822675475313e-07, "loss": 0.0202, "step": 31734 }, { "epoch": 3.7631922210364044, "grad_norm": 0.7030925567846759, "learning_rate": 4.5815068130567465e-07, "loss": 0.0319, "step": 31735 }, { "epoch": 3.7633108027985296, "grad_norm": 0.3922444714075633, "learning_rate": 4.576933621023544e-07, "loss": 0.0182, "step": 31736 }, { "epoch": 3.763429384560655, "grad_norm": 0.7009894702551888, "learning_rate": 4.572362691490001e-07, "loss": 0.0298, "step": 31737 }, { "epoch": 3.7635479663227795, "grad_norm": 0.34995024648947753, "learning_rate": 4.567794024498362e-07, "loss": 0.0154, "step": 31738 }, { "epoch": 3.7636665480849043, "grad_norm": 0.5197882809733071, "learning_rate": 4.5632276200906485e-07, "loss": 0.0244, "step": 31739 }, { "epoch": 3.7637851298470295, "grad_norm": 0.7841635027288899, "learning_rate": 4.558663478309022e-07, "loss": 0.0383, "step": 31740 }, { "epoch": 3.7639037116091547, "grad_norm": 0.48279133662024504, "learning_rate": 4.5541015991955593e-07, "loss": 0.0259, "step": 31741 }, { "epoch": 3.7640222933712795, "grad_norm": 0.8138372953100675, "learning_rate": 4.549541982792255e-07, "loss": 0.0252, "step": 31742 }, { "epoch": 3.7641408751334042, "grad_norm": 0.6544112959747511, "learning_rate": 4.5449846291412413e-07, "loss": 0.0242, "step": 31743 }, { "epoch": 3.7642594568955294, "grad_norm": 0.7371360190171548, "learning_rate": 4.54042953828443e-07, "loss": 0.037, "step": 31744 }, { "epoch": 3.7643780386576546, "grad_norm": 0.4282798965605088, "learning_rate": 4.535876710263842e-07, "loss": 0.0158, "step": 31745 }, { "epoch": 3.7644966204197794, "grad_norm": 0.857502524008476, "learning_rate": 4.531326145121473e-07, "loss": 0.0531, "step": 31746 }, { "epoch": 3.764615202181904, "grad_norm": 0.5797641708886534, "learning_rate": 4.526777842899288e-07, "loss": 0.025, "step": 31747 }, { "epoch": 3.7647337839440294, "grad_norm": 0.7999159499287557, "learning_rate": 4.522231803639199e-07, "loss": 0.022, "step": 31748 }, { "epoch": 3.7648523657061546, "grad_norm": 0.5620852184116854, "learning_rate": 4.51768802738306e-07, "loss": 0.0223, "step": 31749 }, { "epoch": 3.7649709474682793, "grad_norm": 0.5143098458162462, "learning_rate": 4.513146514172839e-07, "loss": 0.0288, "step": 31750 }, { "epoch": 3.7650895292304045, "grad_norm": 0.3903275431737434, "learning_rate": 4.5086072640503627e-07, "loss": 0.0126, "step": 31751 }, { "epoch": 3.7652081109925293, "grad_norm": 0.5873251853615715, "learning_rate": 4.504070277057515e-07, "loss": 0.0248, "step": 31752 }, { "epoch": 3.7653266927546545, "grad_norm": 0.48240248816693876, "learning_rate": 4.4995355532360397e-07, "loss": 0.0204, "step": 31753 }, { "epoch": 3.7654452745167792, "grad_norm": 0.3884835278829907, "learning_rate": 4.4950030926278197e-07, "loss": 0.0194, "step": 31754 }, { "epoch": 3.7655638562789044, "grad_norm": 0.4412156752214478, "learning_rate": 4.490472895274628e-07, "loss": 0.0224, "step": 31755 }, { "epoch": 3.765682438041029, "grad_norm": 0.649769915398782, "learning_rate": 4.4859449612181803e-07, "loss": 0.0296, "step": 31756 }, { "epoch": 3.7658010198031544, "grad_norm": 0.40844913093707536, "learning_rate": 4.481419290500277e-07, "loss": 0.0154, "step": 31757 }, { "epoch": 3.765919601565279, "grad_norm": 0.7215429466057867, "learning_rate": 4.476895883162607e-07, "loss": 0.0406, "step": 31758 }, { "epoch": 3.7660381833274044, "grad_norm": 0.7917076798286585, "learning_rate": 4.4723747392468873e-07, "loss": 0.0425, "step": 31759 }, { "epoch": 3.766156765089529, "grad_norm": 0.3391299180623231, "learning_rate": 4.4678558587947507e-07, "loss": 0.0104, "step": 31760 }, { "epoch": 3.7662753468516543, "grad_norm": 0.3723808811041767, "learning_rate": 4.4633392418479135e-07, "loss": 0.0131, "step": 31761 }, { "epoch": 3.766393928613779, "grad_norm": 0.724428281956527, "learning_rate": 4.45882488844801e-07, "loss": 0.037, "step": 31762 }, { "epoch": 3.7665125103759043, "grad_norm": 0.5347745748513711, "learning_rate": 4.4543127986366176e-07, "loss": 0.0285, "step": 31763 }, { "epoch": 3.766631092138029, "grad_norm": 0.24501362050655365, "learning_rate": 4.449802972455341e-07, "loss": 0.0124, "step": 31764 }, { "epoch": 3.7667496739001542, "grad_norm": 0.8451058308977665, "learning_rate": 4.4452954099458153e-07, "loss": 0.041, "step": 31765 }, { "epoch": 3.766868255662279, "grad_norm": 0.7720947698645696, "learning_rate": 4.4407901111495063e-07, "loss": 0.0339, "step": 31766 }, { "epoch": 3.766986837424404, "grad_norm": 0.5245239105510062, "learning_rate": 4.4362870761079925e-07, "loss": 0.0277, "step": 31767 }, { "epoch": 3.767105419186529, "grad_norm": 0.5616126644244221, "learning_rate": 4.4317863048627673e-07, "loss": 0.0283, "step": 31768 }, { "epoch": 3.767224000948654, "grad_norm": 0.5317551079863586, "learning_rate": 4.427287797455354e-07, "loss": 0.0204, "step": 31769 }, { "epoch": 3.767342582710779, "grad_norm": 0.5409493102311419, "learning_rate": 4.422791553927219e-07, "loss": 0.02, "step": 31770 }, { "epoch": 3.767461164472904, "grad_norm": 0.38830877668112745, "learning_rate": 4.418297574319802e-07, "loss": 0.02, "step": 31771 }, { "epoch": 3.767579746235029, "grad_norm": 0.4936937788887453, "learning_rate": 4.4138058586745137e-07, "loss": 0.0177, "step": 31772 }, { "epoch": 3.767698327997154, "grad_norm": 0.36294249559263503, "learning_rate": 4.409316407032765e-07, "loss": 0.0159, "step": 31773 }, { "epoch": 3.7678169097592793, "grad_norm": 0.48081505188763324, "learning_rate": 4.404829219435996e-07, "loss": 0.0203, "step": 31774 }, { "epoch": 3.767935491521404, "grad_norm": 0.7532621344478417, "learning_rate": 4.400344295925479e-07, "loss": 0.027, "step": 31775 }, { "epoch": 3.768054073283529, "grad_norm": 0.7244622857307539, "learning_rate": 4.395861636542653e-07, "loss": 0.0355, "step": 31776 }, { "epoch": 3.768172655045654, "grad_norm": 0.5432909558525993, "learning_rate": 4.39138124132879e-07, "loss": 0.0263, "step": 31777 }, { "epoch": 3.768291236807779, "grad_norm": 0.5727641975661882, "learning_rate": 4.386903110325191e-07, "loss": 0.0268, "step": 31778 }, { "epoch": 3.768409818569904, "grad_norm": 0.7169338667462131, "learning_rate": 4.382427243573184e-07, "loss": 0.03, "step": 31779 }, { "epoch": 3.7685284003320287, "grad_norm": 0.6160224286070087, "learning_rate": 4.377953641113985e-07, "loss": 0.032, "step": 31780 }, { "epoch": 3.768646982094154, "grad_norm": 0.5893733729080451, "learning_rate": 4.373482302988868e-07, "loss": 0.0281, "step": 31781 }, { "epoch": 3.768765563856279, "grad_norm": 0.5312608024696982, "learning_rate": 4.3690132292390217e-07, "loss": 0.0197, "step": 31782 }, { "epoch": 3.768884145618404, "grad_norm": 0.571874028587005, "learning_rate": 4.364546419905663e-07, "loss": 0.026, "step": 31783 }, { "epoch": 3.7690027273805287, "grad_norm": 0.5328020421484203, "learning_rate": 4.360081875029953e-07, "loss": 0.0229, "step": 31784 }, { "epoch": 3.769121309142654, "grad_norm": 0.3913122091985591, "learning_rate": 4.3556195946531096e-07, "loss": 0.0155, "step": 31785 }, { "epoch": 3.769239890904779, "grad_norm": 0.32276314552478025, "learning_rate": 4.351159578816183e-07, "loss": 0.0153, "step": 31786 }, { "epoch": 3.769358472666904, "grad_norm": 0.541001481885907, "learning_rate": 4.3467018275603343e-07, "loss": 0.0203, "step": 31787 }, { "epoch": 3.7694770544290286, "grad_norm": 0.519665282948342, "learning_rate": 4.34224634092667e-07, "loss": 0.0244, "step": 31788 }, { "epoch": 3.769595636191154, "grad_norm": 0.3335703297655153, "learning_rate": 4.3377931189562405e-07, "loss": 0.011, "step": 31789 }, { "epoch": 3.769714217953279, "grad_norm": 0.4096155159626794, "learning_rate": 4.333342161690096e-07, "loss": 0.0118, "step": 31790 }, { "epoch": 3.7698327997154037, "grad_norm": 0.8090820401139942, "learning_rate": 4.3288934691692875e-07, "loss": 0.0372, "step": 31791 }, { "epoch": 3.7699513814775285, "grad_norm": 0.2598894632673458, "learning_rate": 4.324447041434837e-07, "loss": 0.0113, "step": 31792 }, { "epoch": 3.7700699632396537, "grad_norm": 0.35226842576038286, "learning_rate": 4.3200028785277126e-07, "loss": 0.0104, "step": 31793 }, { "epoch": 3.770188545001779, "grad_norm": 0.3154062886157476, "learning_rate": 4.3155609804888806e-07, "loss": 0.0119, "step": 31794 }, { "epoch": 3.7703071267639037, "grad_norm": 0.8253540940775388, "learning_rate": 4.3111213473593093e-07, "loss": 0.0417, "step": 31795 }, { "epoch": 3.7704257085260284, "grad_norm": 0.44325220016336825, "learning_rate": 4.306683979179882e-07, "loss": 0.0185, "step": 31796 }, { "epoch": 3.7705442902881536, "grad_norm": 0.44595156728003543, "learning_rate": 4.3022488759915933e-07, "loss": 0.0295, "step": 31797 }, { "epoch": 3.770662872050279, "grad_norm": 0.46759484113436844, "learning_rate": 4.2978160378352175e-07, "loss": 0.0228, "step": 31798 }, { "epoch": 3.7707814538124036, "grad_norm": 0.47268983409050036, "learning_rate": 4.29338546475172e-07, "loss": 0.0156, "step": 31799 }, { "epoch": 3.770900035574529, "grad_norm": 0.3121148278249606, "learning_rate": 4.288957156781903e-07, "loss": 0.0154, "step": 31800 }, { "epoch": 3.7710186173366536, "grad_norm": 0.7648270676159207, "learning_rate": 4.2845311139665945e-07, "loss": 0.0336, "step": 31801 }, { "epoch": 3.7711371990987788, "grad_norm": 0.4124428804558289, "learning_rate": 4.2801073363465393e-07, "loss": 0.0214, "step": 31802 }, { "epoch": 3.7712557808609035, "grad_norm": 0.4395198591888608, "learning_rate": 4.275685823962622e-07, "loss": 0.0204, "step": 31803 }, { "epoch": 3.7713743626230287, "grad_norm": 0.6768861817390999, "learning_rate": 4.2712665768555314e-07, "loss": 0.0312, "step": 31804 }, { "epoch": 3.7714929443851535, "grad_norm": 0.47851287161695905, "learning_rate": 4.2668495950660415e-07, "loss": 0.0131, "step": 31805 }, { "epoch": 3.7716115261472787, "grad_norm": 0.3537161489651981, "learning_rate": 4.262434878634869e-07, "loss": 0.0141, "step": 31806 }, { "epoch": 3.7717301079094034, "grad_norm": 0.5598918328580023, "learning_rate": 4.258022427602676e-07, "loss": 0.0356, "step": 31807 }, { "epoch": 3.7718486896715286, "grad_norm": 0.458284740627091, "learning_rate": 4.2536122420102076e-07, "loss": 0.0186, "step": 31808 }, { "epoch": 3.7719672714336534, "grad_norm": 0.6530313995773684, "learning_rate": 4.2492043218980426e-07, "loss": 0.0271, "step": 31809 }, { "epoch": 3.7720858531957786, "grad_norm": 0.6882645229704578, "learning_rate": 4.244798667306871e-07, "loss": 0.0326, "step": 31810 }, { "epoch": 3.7722044349579034, "grad_norm": 0.4542598669655658, "learning_rate": 4.240395278277298e-07, "loss": 0.0136, "step": 31811 }, { "epoch": 3.7723230167200286, "grad_norm": 0.6248827626656878, "learning_rate": 4.2359941548499037e-07, "loss": 0.0267, "step": 31812 }, { "epoch": 3.7724415984821533, "grad_norm": 0.5965080080426812, "learning_rate": 4.2315952970652386e-07, "loss": 0.0295, "step": 31813 }, { "epoch": 3.7725601802442785, "grad_norm": 0.75245982120147, "learning_rate": 4.227198704963936e-07, "loss": 0.0437, "step": 31814 }, { "epoch": 3.7726787620064033, "grad_norm": 0.4195395412804941, "learning_rate": 4.222804378586437e-07, "loss": 0.0237, "step": 31815 }, { "epoch": 3.7727973437685285, "grad_norm": 0.4070031264057588, "learning_rate": 4.218412317973319e-07, "loss": 0.0212, "step": 31816 }, { "epoch": 3.7729159255306532, "grad_norm": 0.6843047220121112, "learning_rate": 4.214022523165023e-07, "loss": 0.024, "step": 31817 }, { "epoch": 3.7730345072927784, "grad_norm": 0.3508557841503246, "learning_rate": 4.2096349942020163e-07, "loss": 0.0167, "step": 31818 }, { "epoch": 3.773153089054903, "grad_norm": 0.5546589031857581, "learning_rate": 4.2052497311248216e-07, "loss": 0.025, "step": 31819 }, { "epoch": 3.7732716708170284, "grad_norm": 0.3833384210881732, "learning_rate": 4.2008667339737407e-07, "loss": 0.0183, "step": 31820 }, { "epoch": 3.773390252579153, "grad_norm": 0.39296483503400786, "learning_rate": 4.196486002789296e-07, "loss": 0.0189, "step": 31821 }, { "epoch": 3.7735088343412784, "grad_norm": 0.43146356530993984, "learning_rate": 4.192107537611817e-07, "loss": 0.0211, "step": 31822 }, { "epoch": 3.773627416103403, "grad_norm": 0.5166066669129281, "learning_rate": 4.1877313384817154e-07, "loss": 0.0223, "step": 31823 }, { "epoch": 3.7737459978655283, "grad_norm": 0.31062322499923817, "learning_rate": 4.1833574054392367e-07, "loss": 0.0108, "step": 31824 }, { "epoch": 3.773864579627653, "grad_norm": 0.3362367507943895, "learning_rate": 4.1789857385247933e-07, "loss": 0.0102, "step": 31825 }, { "epoch": 3.7739831613897783, "grad_norm": 0.370544352691442, "learning_rate": 4.1746163377786305e-07, "loss": 0.0153, "step": 31826 }, { "epoch": 3.7741017431519035, "grad_norm": 0.5807189626949868, "learning_rate": 4.170249203241078e-07, "loss": 0.0222, "step": 31827 }, { "epoch": 3.7742203249140283, "grad_norm": 0.49541192592648775, "learning_rate": 4.1658843349523526e-07, "loss": 0.0194, "step": 31828 }, { "epoch": 3.774338906676153, "grad_norm": 0.3975524443897461, "learning_rate": 4.161521732952728e-07, "loss": 0.0164, "step": 31829 }, { "epoch": 3.774457488438278, "grad_norm": 0.8371928320391157, "learning_rate": 4.157161397282422e-07, "loss": 0.0424, "step": 31830 }, { "epoch": 3.7745760702004034, "grad_norm": 0.49436708290436937, "learning_rate": 4.152803327981569e-07, "loss": 0.0197, "step": 31831 }, { "epoch": 3.774694651962528, "grad_norm": 0.4882580305869405, "learning_rate": 4.1484475250904156e-07, "loss": 0.0227, "step": 31832 }, { "epoch": 3.774813233724653, "grad_norm": 0.8220902643819877, "learning_rate": 4.144093988649095e-07, "loss": 0.0318, "step": 31833 }, { "epoch": 3.774931815486778, "grad_norm": 0.4821572511419582, "learning_rate": 4.13974271869777e-07, "loss": 0.0152, "step": 31834 }, { "epoch": 3.7750503972489033, "grad_norm": 0.5749685032121692, "learning_rate": 4.1353937152764645e-07, "loss": 0.0266, "step": 31835 }, { "epoch": 3.775168979011028, "grad_norm": 0.6427109617670901, "learning_rate": 4.131046978425368e-07, "loss": 0.0305, "step": 31836 }, { "epoch": 3.775287560773153, "grad_norm": 0.8353272112312367, "learning_rate": 4.1267025081845046e-07, "loss": 0.0304, "step": 31837 }, { "epoch": 3.775406142535278, "grad_norm": 0.7600899388366446, "learning_rate": 4.1223603045939254e-07, "loss": 0.0377, "step": 31838 }, { "epoch": 3.7755247242974033, "grad_norm": 0.7797335240118476, "learning_rate": 4.118020367693681e-07, "loss": 0.06, "step": 31839 }, { "epoch": 3.775643306059528, "grad_norm": 0.7689651875683298, "learning_rate": 4.113682697523741e-07, "loss": 0.0386, "step": 31840 }, { "epoch": 3.775761887821653, "grad_norm": 0.5490319434344557, "learning_rate": 4.1093472941241827e-07, "loss": 0.022, "step": 31841 }, { "epoch": 3.775880469583778, "grad_norm": 0.36292279159787116, "learning_rate": 4.1050141575348644e-07, "loss": 0.0115, "step": 31842 }, { "epoch": 3.775999051345903, "grad_norm": 0.7399061139931095, "learning_rate": 4.1006832877957814e-07, "loss": 0.0341, "step": 31843 }, { "epoch": 3.776117633108028, "grad_norm": 0.4729695889735751, "learning_rate": 4.096354684946846e-07, "loss": 0.0221, "step": 31844 }, { "epoch": 3.7762362148701527, "grad_norm": 0.31355914762016357, "learning_rate": 4.0920283490279984e-07, "loss": 0.0164, "step": 31845 }, { "epoch": 3.776354796632278, "grad_norm": 0.4827132319964702, "learning_rate": 4.0877042800790964e-07, "loss": 0.0238, "step": 31846 }, { "epoch": 3.776473378394403, "grad_norm": 0.47805523861129806, "learning_rate": 4.0833824781399687e-07, "loss": 0.0243, "step": 31847 }, { "epoch": 3.776591960156528, "grad_norm": 0.8643425384320679, "learning_rate": 4.0790629432505557e-07, "loss": 0.0349, "step": 31848 }, { "epoch": 3.776710541918653, "grad_norm": 0.4226496229544819, "learning_rate": 4.0747456754505753e-07, "loss": 0.0196, "step": 31849 }, { "epoch": 3.776829123680778, "grad_norm": 0.5913537335060952, "learning_rate": 4.070430674779857e-07, "loss": 0.0254, "step": 31850 }, { "epoch": 3.776947705442903, "grad_norm": 0.4192179364547982, "learning_rate": 4.0661179412782025e-07, "loss": 0.023, "step": 31851 }, { "epoch": 3.777066287205028, "grad_norm": 0.3677656240047313, "learning_rate": 4.0618074749853576e-07, "loss": 0.0177, "step": 31852 }, { "epoch": 3.777184868967153, "grad_norm": 0.3757826554871263, "learning_rate": 4.057499275941068e-07, "loss": 0.0179, "step": 31853 }, { "epoch": 3.7773034507292778, "grad_norm": 0.6583405707951343, "learning_rate": 4.0531933441850246e-07, "loss": 0.0267, "step": 31854 }, { "epoch": 3.777422032491403, "grad_norm": 0.617787289908163, "learning_rate": 4.048889679756918e-07, "loss": 0.0254, "step": 31855 }, { "epoch": 3.7775406142535277, "grad_norm": 0.7308365697164131, "learning_rate": 4.0445882826964653e-07, "loss": 0.0289, "step": 31856 }, { "epoch": 3.777659196015653, "grad_norm": 0.4614767378921938, "learning_rate": 4.0402891530433304e-07, "loss": 0.0227, "step": 31857 }, { "epoch": 3.7777777777777777, "grad_norm": 0.4120895743343994, "learning_rate": 4.035992290837065e-07, "loss": 0.0164, "step": 31858 }, { "epoch": 3.777896359539903, "grad_norm": 0.753556157499614, "learning_rate": 4.031697696117359e-07, "loss": 0.0247, "step": 31859 }, { "epoch": 3.7780149413020276, "grad_norm": 0.7227416612480351, "learning_rate": 4.027405368923765e-07, "loss": 0.027, "step": 31860 }, { "epoch": 3.778133523064153, "grad_norm": 0.5287363008921468, "learning_rate": 4.023115309295833e-07, "loss": 0.0191, "step": 31861 }, { "epoch": 3.7782521048262776, "grad_norm": 0.3970565742346015, "learning_rate": 4.0188275172731714e-07, "loss": 0.0183, "step": 31862 }, { "epoch": 3.778370686588403, "grad_norm": 0.6936002889299994, "learning_rate": 4.0145419928953044e-07, "loss": 0.0281, "step": 31863 }, { "epoch": 3.7784892683505276, "grad_norm": 0.5986737288472799, "learning_rate": 4.010258736201672e-07, "loss": 0.0246, "step": 31864 }, { "epoch": 3.7786078501126528, "grad_norm": 0.3490793560701695, "learning_rate": 4.0059777472317984e-07, "loss": 0.0185, "step": 31865 }, { "epoch": 3.7787264318747775, "grad_norm": 0.31181752288030923, "learning_rate": 4.0016990260251797e-07, "loss": 0.0147, "step": 31866 }, { "epoch": 3.7788450136369027, "grad_norm": 0.7278908359700335, "learning_rate": 3.997422572621201e-07, "loss": 0.0411, "step": 31867 }, { "epoch": 3.7789635953990275, "grad_norm": 0.5487880114987683, "learning_rate": 3.993148387059359e-07, "loss": 0.024, "step": 31868 }, { "epoch": 3.7790821771611527, "grad_norm": 0.48722041499231605, "learning_rate": 3.988876469378983e-07, "loss": 0.0239, "step": 31869 }, { "epoch": 3.7792007589232774, "grad_norm": 0.4397279656736417, "learning_rate": 3.984606819619513e-07, "loss": 0.0196, "step": 31870 }, { "epoch": 3.7793193406854027, "grad_norm": 0.4983560524909072, "learning_rate": 3.9803394378202516e-07, "loss": 0.0245, "step": 31871 }, { "epoch": 3.7794379224475274, "grad_norm": 0.376635536549917, "learning_rate": 3.976074324020612e-07, "loss": 0.0148, "step": 31872 }, { "epoch": 3.7795565042096526, "grad_norm": 0.9387925659287376, "learning_rate": 3.9718114782598126e-07, "loss": 0.0365, "step": 31873 }, { "epoch": 3.7796750859717774, "grad_norm": 0.3869884059293986, "learning_rate": 3.967550900577266e-07, "loss": 0.0199, "step": 31874 }, { "epoch": 3.7797936677339026, "grad_norm": 0.6933075682159763, "learning_rate": 3.963292591012191e-07, "loss": 0.0471, "step": 31875 }, { "epoch": 3.7799122494960278, "grad_norm": 0.7244974394856587, "learning_rate": 3.959036549603834e-07, "loss": 0.0356, "step": 31876 }, { "epoch": 3.7800308312581525, "grad_norm": 0.4894324583702637, "learning_rate": 3.9547827763914416e-07, "loss": 0.0219, "step": 31877 }, { "epoch": 3.7801494130202773, "grad_norm": 0.39266006833975764, "learning_rate": 3.9505312714142316e-07, "loss": 0.0157, "step": 31878 }, { "epoch": 3.7802679947824025, "grad_norm": 0.7121298000560101, "learning_rate": 3.946282034711396e-07, "loss": 0.0389, "step": 31879 }, { "epoch": 3.7803865765445277, "grad_norm": 0.627352188764344, "learning_rate": 3.942035066322097e-07, "loss": 0.0363, "step": 31880 }, { "epoch": 3.7805051583066525, "grad_norm": 0.49527411855144843, "learning_rate": 3.937790366285526e-07, "loss": 0.021, "step": 31881 }, { "epoch": 3.780623740068777, "grad_norm": 0.9090845724947956, "learning_rate": 3.933547934640791e-07, "loss": 0.0476, "step": 31882 }, { "epoch": 3.7807423218309024, "grad_norm": 0.5280205868027062, "learning_rate": 3.929307771426999e-07, "loss": 0.0194, "step": 31883 }, { "epoch": 3.7808609035930276, "grad_norm": 0.5342588008455651, "learning_rate": 3.925069876683174e-07, "loss": 0.0268, "step": 31884 }, { "epoch": 3.7809794853551524, "grad_norm": 0.5714573034811801, "learning_rate": 3.920834250448535e-07, "loss": 0.0227, "step": 31885 }, { "epoch": 3.781098067117277, "grad_norm": 0.4685837423936282, "learning_rate": 3.9166008927619956e-07, "loss": 0.0179, "step": 31886 }, { "epoch": 3.7812166488794023, "grad_norm": 0.7406951398757605, "learning_rate": 3.9123698036626076e-07, "loss": 0.0265, "step": 31887 }, { "epoch": 3.7813352306415275, "grad_norm": 0.5425488817061083, "learning_rate": 3.9081409831894235e-07, "loss": 0.021, "step": 31888 }, { "epoch": 3.7814538124036523, "grad_norm": 0.5215613977881464, "learning_rate": 3.9039144313814116e-07, "loss": 0.0248, "step": 31889 }, { "epoch": 3.781572394165777, "grad_norm": 0.5904298375692939, "learning_rate": 3.8996901482775407e-07, "loss": 0.0233, "step": 31890 }, { "epoch": 3.7816909759279023, "grad_norm": 0.5685630541773063, "learning_rate": 3.895468133916669e-07, "loss": 0.0249, "step": 31891 }, { "epoch": 3.7818095576900275, "grad_norm": 0.32179594138781825, "learning_rate": 3.8912483883378483e-07, "loss": 0.0142, "step": 31892 }, { "epoch": 3.7819281394521522, "grad_norm": 0.5203834164907836, "learning_rate": 3.887030911579909e-07, "loss": 0.0309, "step": 31893 }, { "epoch": 3.782046721214277, "grad_norm": 0.5521898636971335, "learning_rate": 3.882815703681736e-07, "loss": 0.0234, "step": 31894 }, { "epoch": 3.782165302976402, "grad_norm": 0.5294662474398418, "learning_rate": 3.8786027646821877e-07, "loss": 0.028, "step": 31895 }, { "epoch": 3.7822838847385274, "grad_norm": 0.5969241991561479, "learning_rate": 3.874392094620122e-07, "loss": 0.0236, "step": 31896 }, { "epoch": 3.782402466500652, "grad_norm": 0.6335807233750141, "learning_rate": 3.8701836935343407e-07, "loss": 0.024, "step": 31897 }, { "epoch": 3.782521048262777, "grad_norm": 0.4981690234184708, "learning_rate": 3.8659775614636183e-07, "loss": 0.023, "step": 31898 }, { "epoch": 3.782639630024902, "grad_norm": 0.3770953984917607, "learning_rate": 3.861773698446786e-07, "loss": 0.0183, "step": 31899 }, { "epoch": 3.7827582117870273, "grad_norm": 0.4427524920254872, "learning_rate": 3.857572104522533e-07, "loss": 0.0161, "step": 31900 }, { "epoch": 3.782876793549152, "grad_norm": 0.6161563397962272, "learning_rate": 3.8533727797296637e-07, "loss": 0.023, "step": 31901 }, { "epoch": 3.7829953753112773, "grad_norm": 0.4588123636372751, "learning_rate": 3.849175724106813e-07, "loss": 0.0378, "step": 31902 }, { "epoch": 3.783113957073402, "grad_norm": 0.4634858743970792, "learning_rate": 3.8449809376927273e-07, "loss": 0.0252, "step": 31903 }, { "epoch": 3.7832325388355272, "grad_norm": 0.5334546529397126, "learning_rate": 3.84078842052607e-07, "loss": 0.0142, "step": 31904 }, { "epoch": 3.783351120597652, "grad_norm": 0.5566784421100952, "learning_rate": 3.836598172645478e-07, "loss": 0.0217, "step": 31905 }, { "epoch": 3.783469702359777, "grad_norm": 0.6126611954215508, "learning_rate": 3.832410194089586e-07, "loss": 0.0324, "step": 31906 }, { "epoch": 3.783588284121902, "grad_norm": 0.4674974927581612, "learning_rate": 3.828224484896975e-07, "loss": 0.0252, "step": 31907 }, { "epoch": 3.783706865884027, "grad_norm": 0.6139853644976944, "learning_rate": 3.824041045106308e-07, "loss": 0.0292, "step": 31908 }, { "epoch": 3.783825447646152, "grad_norm": 0.5398607124047583, "learning_rate": 3.819859874756082e-07, "loss": 0.0254, "step": 31909 }, { "epoch": 3.783944029408277, "grad_norm": 0.5522137500632561, "learning_rate": 3.8156809738848496e-07, "loss": 0.0253, "step": 31910 }, { "epoch": 3.784062611170402, "grad_norm": 0.3900193660562421, "learning_rate": 3.811504342531136e-07, "loss": 0.0146, "step": 31911 }, { "epoch": 3.784181192932527, "grad_norm": 0.6920867652280905, "learning_rate": 3.8073299807334926e-07, "loss": 0.0321, "step": 31912 }, { "epoch": 3.784299774694652, "grad_norm": 0.4721311082814587, "learning_rate": 3.803157888530362e-07, "loss": 0.0166, "step": 31913 }, { "epoch": 3.784418356456777, "grad_norm": 0.4485947088175132, "learning_rate": 3.798988065960185e-07, "loss": 0.0189, "step": 31914 }, { "epoch": 3.784536938218902, "grad_norm": 0.5216555008985573, "learning_rate": 3.794820513061431e-07, "loss": 0.0253, "step": 31915 }, { "epoch": 3.784655519981027, "grad_norm": 0.43004237031962766, "learning_rate": 3.790655229872514e-07, "loss": 0.0186, "step": 31916 }, { "epoch": 3.7847741017431518, "grad_norm": 0.3153867820475274, "learning_rate": 3.7864922164318473e-07, "loss": 0.0154, "step": 31917 }, { "epoch": 3.784892683505277, "grad_norm": 0.32077494431229864, "learning_rate": 3.7823314727777614e-07, "loss": 0.0166, "step": 31918 }, { "epoch": 3.7850112652674017, "grad_norm": 0.5592501521723984, "learning_rate": 3.778172998948698e-07, "loss": 0.0241, "step": 31919 }, { "epoch": 3.785129847029527, "grad_norm": 0.45766110759451023, "learning_rate": 3.774016794982904e-07, "loss": 0.0198, "step": 31920 }, { "epoch": 3.7852484287916517, "grad_norm": 0.7117537536790278, "learning_rate": 3.7698628609187657e-07, "loss": 0.0294, "step": 31921 }, { "epoch": 3.785367010553777, "grad_norm": 0.5194982338478071, "learning_rate": 3.7657111967945026e-07, "loss": 0.0178, "step": 31922 }, { "epoch": 3.7854855923159016, "grad_norm": 0.40813630426047054, "learning_rate": 3.761561802648472e-07, "loss": 0.0231, "step": 31923 }, { "epoch": 3.785604174078027, "grad_norm": 0.6481257833360387, "learning_rate": 3.757414678518867e-07, "loss": 0.0279, "step": 31924 }, { "epoch": 3.785722755840152, "grad_norm": 0.4506117689470364, "learning_rate": 3.7532698244439337e-07, "loss": 0.0173, "step": 31925 }, { "epoch": 3.785841337602277, "grad_norm": 0.6508368435692178, "learning_rate": 3.749127240461892e-07, "loss": 0.0345, "step": 31926 }, { "epoch": 3.7859599193644016, "grad_norm": 0.437369453555126, "learning_rate": 3.744986926610905e-07, "loss": 0.0122, "step": 31927 }, { "epoch": 3.7860785011265268, "grad_norm": 0.5716715980245407, "learning_rate": 3.740848882929193e-07, "loss": 0.0282, "step": 31928 }, { "epoch": 3.786197082888652, "grad_norm": 0.529094340318738, "learning_rate": 3.7367131094548366e-07, "loss": 0.0221, "step": 31929 }, { "epoch": 3.7863156646507767, "grad_norm": 0.3995481691707605, "learning_rate": 3.732579606226028e-07, "loss": 0.0169, "step": 31930 }, { "epoch": 3.7864342464129015, "grad_norm": 0.39531271991078576, "learning_rate": 3.728448373280818e-07, "loss": 0.0136, "step": 31931 }, { "epoch": 3.7865528281750267, "grad_norm": 0.4824115135795889, "learning_rate": 3.724319410657318e-07, "loss": 0.017, "step": 31932 }, { "epoch": 3.786671409937152, "grad_norm": 0.6604058318245357, "learning_rate": 3.720192718393578e-07, "loss": 0.037, "step": 31933 }, { "epoch": 3.7867899916992767, "grad_norm": 0.7241963492985408, "learning_rate": 3.716068296527708e-07, "loss": 0.032, "step": 31934 }, { "epoch": 3.7869085734614014, "grad_norm": 0.3273884936506529, "learning_rate": 3.7119461450976225e-07, "loss": 0.0103, "step": 31935 }, { "epoch": 3.7870271552235266, "grad_norm": 0.3246141724687696, "learning_rate": 3.7078262641414007e-07, "loss": 0.0158, "step": 31936 }, { "epoch": 3.787145736985652, "grad_norm": 0.5196106242495413, "learning_rate": 3.7037086536969856e-07, "loss": 0.0228, "step": 31937 }, { "epoch": 3.7872643187477766, "grad_norm": 0.4542673971911658, "learning_rate": 3.6995933138023465e-07, "loss": 0.0188, "step": 31938 }, { "epoch": 3.7873829005099013, "grad_norm": 0.6408129719994126, "learning_rate": 3.695480244495425e-07, "loss": 0.0239, "step": 31939 }, { "epoch": 3.7875014822720265, "grad_norm": 0.2756190895843875, "learning_rate": 3.691369445814136e-07, "loss": 0.0089, "step": 31940 }, { "epoch": 3.7876200640341517, "grad_norm": 1.0251813646344898, "learning_rate": 3.6872609177963925e-07, "loss": 0.0626, "step": 31941 }, { "epoch": 3.7877386457962765, "grad_norm": 0.4467478322422121, "learning_rate": 3.683154660480026e-07, "loss": 0.02, "step": 31942 }, { "epoch": 3.7878572275584013, "grad_norm": 0.469438720438782, "learning_rate": 3.6790506739029506e-07, "loss": 0.0247, "step": 31943 }, { "epoch": 3.7879758093205265, "grad_norm": 0.7211261383494686, "learning_rate": 3.6749489581029415e-07, "loss": 0.0421, "step": 31944 }, { "epoch": 3.7880943910826517, "grad_norm": 0.4249242166248013, "learning_rate": 3.670849513117858e-07, "loss": 0.0187, "step": 31945 }, { "epoch": 3.7882129728447764, "grad_norm": 0.5777901199685538, "learning_rate": 3.666752338985474e-07, "loss": 0.0211, "step": 31946 }, { "epoch": 3.788331554606901, "grad_norm": 0.7365847035640102, "learning_rate": 3.662657435743566e-07, "loss": 0.0377, "step": 31947 }, { "epoch": 3.7884501363690264, "grad_norm": 0.6003416197718658, "learning_rate": 3.6585648034298536e-07, "loss": 0.0282, "step": 31948 }, { "epoch": 3.7885687181311516, "grad_norm": 0.6077705731763446, "learning_rate": 3.654474442082084e-07, "loss": 0.0333, "step": 31949 }, { "epoch": 3.7886872998932764, "grad_norm": 0.568682327052585, "learning_rate": 3.6503863517380054e-07, "loss": 0.0198, "step": 31950 }, { "epoch": 3.7888058816554016, "grad_norm": 0.6308824785396013, "learning_rate": 3.646300532435254e-07, "loss": 0.0291, "step": 31951 }, { "epoch": 3.7889244634175263, "grad_norm": 0.4468702828821192, "learning_rate": 3.6422169842115217e-07, "loss": 0.0212, "step": 31952 }, { "epoch": 3.7890430451796515, "grad_norm": 0.6111869380772533, "learning_rate": 3.6381357071044455e-07, "loss": 0.025, "step": 31953 }, { "epoch": 3.7891616269417763, "grad_norm": 0.5617844267530999, "learning_rate": 3.6340567011516335e-07, "loss": 0.0294, "step": 31954 }, { "epoch": 3.7892802087039015, "grad_norm": 0.3988322929750128, "learning_rate": 3.629979966390695e-07, "loss": 0.02, "step": 31955 }, { "epoch": 3.7893987904660262, "grad_norm": 0.6329884751580458, "learning_rate": 3.625905502859239e-07, "loss": 0.0313, "step": 31956 }, { "epoch": 3.7895173722281514, "grad_norm": 0.508061234788832, "learning_rate": 3.621833310594819e-07, "loss": 0.0186, "step": 31957 }, { "epoch": 3.789635953990276, "grad_norm": 0.638010164855185, "learning_rate": 3.6177633896349596e-07, "loss": 0.0353, "step": 31958 }, { "epoch": 3.7897545357524014, "grad_norm": 0.4998226018753507, "learning_rate": 3.613695740017187e-07, "loss": 0.0202, "step": 31959 }, { "epoch": 3.789873117514526, "grad_norm": 0.5726592432517222, "learning_rate": 3.609630361778998e-07, "loss": 0.0253, "step": 31960 }, { "epoch": 3.7899916992766514, "grad_norm": 0.5311231629193192, "learning_rate": 3.605567254957892e-07, "loss": 0.0285, "step": 31961 }, { "epoch": 3.790110281038776, "grad_norm": 0.5997300727514464, "learning_rate": 3.6015064195912553e-07, "loss": 0.0341, "step": 31962 }, { "epoch": 3.7902288628009013, "grad_norm": 0.7007199478093511, "learning_rate": 3.5974478557166403e-07, "loss": 0.0304, "step": 31963 }, { "epoch": 3.790347444563026, "grad_norm": 0.6867472343810522, "learning_rate": 3.593391563371351e-07, "loss": 0.04, "step": 31964 }, { "epoch": 3.7904660263251513, "grad_norm": 1.0958737170637616, "learning_rate": 3.589337542592858e-07, "loss": 0.0381, "step": 31965 }, { "epoch": 3.790584608087276, "grad_norm": 0.45196482387139836, "learning_rate": 3.5852857934184916e-07, "loss": 0.0157, "step": 31966 }, { "epoch": 3.7907031898494012, "grad_norm": 0.6010949165482949, "learning_rate": 3.581236315885611e-07, "loss": 0.0213, "step": 31967 }, { "epoch": 3.790821771611526, "grad_norm": 0.4448819382942913, "learning_rate": 3.5771891100315755e-07, "loss": 0.0226, "step": 31968 }, { "epoch": 3.790940353373651, "grad_norm": 0.2673606718171279, "learning_rate": 3.5731441758936335e-07, "loss": 0.0098, "step": 31969 }, { "epoch": 3.791058935135776, "grad_norm": 0.6815906099876485, "learning_rate": 3.569101513509143e-07, "loss": 0.0321, "step": 31970 }, { "epoch": 3.791177516897901, "grad_norm": 0.5580130431869675, "learning_rate": 3.5650611229153253e-07, "loss": 0.0196, "step": 31971 }, { "epoch": 3.791296098660026, "grad_norm": 0.4263114470975018, "learning_rate": 3.561023004149483e-07, "loss": 0.0164, "step": 31972 }, { "epoch": 3.791414680422151, "grad_norm": 0.6166040755258168, "learning_rate": 3.5569871572487813e-07, "loss": 0.0428, "step": 31973 }, { "epoch": 3.7915332621842763, "grad_norm": 0.5644412485164052, "learning_rate": 3.55295358225044e-07, "loss": 0.028, "step": 31974 }, { "epoch": 3.791651843946401, "grad_norm": 0.824731096964996, "learning_rate": 3.548922279191652e-07, "loss": 0.0322, "step": 31975 }, { "epoch": 3.791770425708526, "grad_norm": 0.4208528552291573, "learning_rate": 3.544893248109582e-07, "loss": 0.0164, "step": 31976 }, { "epoch": 3.791889007470651, "grad_norm": 0.5895615290252004, "learning_rate": 3.5408664890413666e-07, "loss": 0.0316, "step": 31977 }, { "epoch": 3.7920075892327763, "grad_norm": 0.5719868065628717, "learning_rate": 3.536842002024143e-07, "loss": 0.0258, "step": 31978 }, { "epoch": 3.792126170994901, "grad_norm": 0.92298278698909, "learning_rate": 3.5328197870950484e-07, "loss": 0.0318, "step": 31979 }, { "epoch": 3.7922447527570258, "grad_norm": 0.3850797464551466, "learning_rate": 3.528799844291053e-07, "loss": 0.0168, "step": 31980 }, { "epoch": 3.792363334519151, "grad_norm": 0.43618540704253156, "learning_rate": 3.5247821736493216e-07, "loss": 0.0199, "step": 31981 }, { "epoch": 3.792481916281276, "grad_norm": 0.8445382150219234, "learning_rate": 3.520766775206824e-07, "loss": 0.0418, "step": 31982 }, { "epoch": 3.792600498043401, "grad_norm": 0.5993230998050739, "learning_rate": 3.516753649000615e-07, "loss": 0.0384, "step": 31983 }, { "epoch": 3.7927190798055257, "grad_norm": 0.6619232811437021, "learning_rate": 3.512742795067692e-07, "loss": 0.0317, "step": 31984 }, { "epoch": 3.792837661567651, "grad_norm": 0.6463725213266769, "learning_rate": 3.508734213444997e-07, "loss": 0.0276, "step": 31985 }, { "epoch": 3.792956243329776, "grad_norm": 0.6858320237209403, "learning_rate": 3.5047279041695027e-07, "loss": 0.0319, "step": 31986 }, { "epoch": 3.793074825091901, "grad_norm": 0.5254694378737392, "learning_rate": 3.5007238672781495e-07, "loss": 0.0183, "step": 31987 }, { "epoch": 3.7931934068540256, "grad_norm": 0.5307032318386261, "learning_rate": 3.4967221028078534e-07, "loss": 0.0312, "step": 31988 }, { "epoch": 3.793311988616151, "grad_norm": 0.4861456123256489, "learning_rate": 3.4927226107954735e-07, "loss": 0.0226, "step": 31989 }, { "epoch": 3.793430570378276, "grad_norm": 0.28882136801099273, "learning_rate": 3.488725391277953e-07, "loss": 0.0123, "step": 31990 }, { "epoch": 3.793549152140401, "grad_norm": 0.7445896211386859, "learning_rate": 3.484730444292067e-07, "loss": 0.0305, "step": 31991 }, { "epoch": 3.7936677339025255, "grad_norm": 0.39332387078829145, "learning_rate": 3.4807377698746477e-07, "loss": 0.0193, "step": 31992 }, { "epoch": 3.7937863156646507, "grad_norm": 0.46003463135024497, "learning_rate": 3.476747368062527e-07, "loss": 0.0197, "step": 31993 }, { "epoch": 3.793904897426776, "grad_norm": 0.6172089500783939, "learning_rate": 3.472759238892537e-07, "loss": 0.0252, "step": 31994 }, { "epoch": 3.7940234791889007, "grad_norm": 0.6550485257653393, "learning_rate": 3.4687733824013413e-07, "loss": 0.036, "step": 31995 }, { "epoch": 3.7941420609510255, "grad_norm": 0.30111780905952185, "learning_rate": 3.464789798625773e-07, "loss": 0.013, "step": 31996 }, { "epoch": 3.7942606427131507, "grad_norm": 0.3885313252652289, "learning_rate": 3.460808487602496e-07, "loss": 0.0175, "step": 31997 }, { "epoch": 3.794379224475276, "grad_norm": 1.0515107144062488, "learning_rate": 3.456829449368232e-07, "loss": 0.0594, "step": 31998 }, { "epoch": 3.7944978062374006, "grad_norm": 0.536662251084361, "learning_rate": 3.4528526839597285e-07, "loss": 0.0299, "step": 31999 }, { "epoch": 3.794616387999526, "grad_norm": 0.6280402784288501, "learning_rate": 3.4488781914135126e-07, "loss": 0.0303, "step": 32000 }, { "epoch": 3.7947349697616506, "grad_norm": 0.44196091911921875, "learning_rate": 3.4449059717663324e-07, "loss": 0.0155, "step": 32001 }, { "epoch": 3.794853551523776, "grad_norm": 0.4622741714200245, "learning_rate": 3.4409360250547697e-07, "loss": 0.0189, "step": 32002 }, { "epoch": 3.7949721332859006, "grad_norm": 0.6440184347639555, "learning_rate": 3.4369683513154336e-07, "loss": 0.0292, "step": 32003 }, { "epoch": 3.7950907150480258, "grad_norm": 0.5495249172490893, "learning_rate": 3.433002950584879e-07, "loss": 0.033, "step": 32004 }, { "epoch": 3.7952092968101505, "grad_norm": 0.40329860373661286, "learning_rate": 3.429039822899688e-07, "loss": 0.016, "step": 32005 }, { "epoch": 3.7953278785722757, "grad_norm": 0.5460344871339584, "learning_rate": 3.4250789682964135e-07, "loss": 0.0294, "step": 32006 }, { "epoch": 3.7954464603344005, "grad_norm": 0.47067410470609183, "learning_rate": 3.421120386811472e-07, "loss": 0.0158, "step": 32007 }, { "epoch": 3.7955650420965257, "grad_norm": 0.6488633970665022, "learning_rate": 3.4171640784815e-07, "loss": 0.0298, "step": 32008 }, { "epoch": 3.7956836238586504, "grad_norm": 0.7250654991596538, "learning_rate": 3.4132100433428296e-07, "loss": 0.0327, "step": 32009 }, { "epoch": 3.7958022056207756, "grad_norm": 0.6095127445579519, "learning_rate": 3.409258281432043e-07, "loss": 0.0239, "step": 32010 }, { "epoch": 3.7959207873829004, "grad_norm": 0.5129245771601799, "learning_rate": 3.4053087927854446e-07, "loss": 0.0269, "step": 32011 }, { "epoch": 3.7960393691450256, "grad_norm": 0.4826941739688155, "learning_rate": 3.4013615774395323e-07, "loss": 0.0211, "step": 32012 }, { "epoch": 3.7961579509071504, "grad_norm": 0.89618252946814, "learning_rate": 3.397416635430667e-07, "loss": 0.0473, "step": 32013 }, { "epoch": 3.7962765326692756, "grad_norm": 0.37934123676121706, "learning_rate": 3.3934739667952076e-07, "loss": 0.0159, "step": 32014 }, { "epoch": 3.7963951144314003, "grad_norm": 0.3194797688589997, "learning_rate": 3.3895335715695145e-07, "loss": 0.0084, "step": 32015 }, { "epoch": 3.7965136961935255, "grad_norm": 0.43295016193854907, "learning_rate": 3.385595449789919e-07, "loss": 0.0195, "step": 32016 }, { "epoch": 3.7966322779556503, "grad_norm": 0.6018257258167681, "learning_rate": 3.381659601492726e-07, "loss": 0.0266, "step": 32017 }, { "epoch": 3.7967508597177755, "grad_norm": 0.3947752122193301, "learning_rate": 3.3777260267141565e-07, "loss": 0.0175, "step": 32018 }, { "epoch": 3.7968694414799002, "grad_norm": 0.5733467772497082, "learning_rate": 3.37379472549057e-07, "loss": 0.0364, "step": 32019 }, { "epoch": 3.7969880232420254, "grad_norm": 0.3857041773390212, "learning_rate": 3.369865697858132e-07, "loss": 0.014, "step": 32020 }, { "epoch": 3.79710660500415, "grad_norm": 0.7826137028933772, "learning_rate": 3.365938943853147e-07, "loss": 0.0346, "step": 32021 }, { "epoch": 3.7972251867662754, "grad_norm": 0.46450191327546786, "learning_rate": 3.362014463511698e-07, "loss": 0.02, "step": 32022 }, { "epoch": 3.7973437685284006, "grad_norm": 0.4185698522889124, "learning_rate": 3.35809225687006e-07, "loss": 0.0157, "step": 32023 }, { "epoch": 3.7974623502905254, "grad_norm": 0.36972658826700744, "learning_rate": 3.354172323964372e-07, "loss": 0.0116, "step": 32024 }, { "epoch": 3.79758093205265, "grad_norm": 0.4055682571347903, "learning_rate": 3.350254664830743e-07, "loss": 0.0186, "step": 32025 }, { "epoch": 3.7976995138147753, "grad_norm": 0.36082444154350996, "learning_rate": 3.346339279505284e-07, "loss": 0.0173, "step": 32026 }, { "epoch": 3.7978180955769005, "grad_norm": 0.4776012675286623, "learning_rate": 3.342426168024132e-07, "loss": 0.0181, "step": 32027 }, { "epoch": 3.7979366773390253, "grad_norm": 0.2441742671489448, "learning_rate": 3.3385153304233417e-07, "loss": 0.0075, "step": 32028 }, { "epoch": 3.79805525910115, "grad_norm": 0.4032025987400549, "learning_rate": 3.334606766738968e-07, "loss": 0.0176, "step": 32029 }, { "epoch": 3.7981738408632753, "grad_norm": 0.523345768171412, "learning_rate": 3.3307004770070093e-07, "loss": 0.0224, "step": 32030 }, { "epoch": 3.7982924226254005, "grad_norm": 0.3440080086923662, "learning_rate": 3.3267964612635484e-07, "loss": 0.0147, "step": 32031 }, { "epoch": 3.798411004387525, "grad_norm": 0.3626646971249543, "learning_rate": 3.3228947195445e-07, "loss": 0.0144, "step": 32032 }, { "epoch": 3.79852958614965, "grad_norm": 0.750829083790796, "learning_rate": 3.3189952518858646e-07, "loss": 0.0309, "step": 32033 }, { "epoch": 3.798648167911775, "grad_norm": 0.765692895533417, "learning_rate": 3.31509805832364e-07, "loss": 0.0378, "step": 32034 }, { "epoch": 3.7987667496739004, "grad_norm": 0.475585022325879, "learning_rate": 3.3112031388936595e-07, "loss": 0.0217, "step": 32035 }, { "epoch": 3.798885331436025, "grad_norm": 0.7635317826972164, "learning_rate": 3.3073104936318935e-07, "loss": 0.0416, "step": 32036 }, { "epoch": 3.79900391319815, "grad_norm": 0.9572106174862409, "learning_rate": 3.303420122574202e-07, "loss": 0.0314, "step": 32037 }, { "epoch": 3.799122494960275, "grad_norm": 0.553696279875005, "learning_rate": 3.299532025756447e-07, "loss": 0.0253, "step": 32038 }, { "epoch": 3.7992410767224003, "grad_norm": 0.5600736243460904, "learning_rate": 3.295646203214514e-07, "loss": 0.0217, "step": 32039 }, { "epoch": 3.799359658484525, "grad_norm": 0.4636754865267618, "learning_rate": 3.2917626549841816e-07, "loss": 0.024, "step": 32040 }, { "epoch": 3.79947824024665, "grad_norm": 0.6723779372690409, "learning_rate": 3.2878813811012544e-07, "loss": 0.036, "step": 32041 }, { "epoch": 3.799596822008775, "grad_norm": 0.44666651017318737, "learning_rate": 3.2840023816015087e-07, "loss": 0.0224, "step": 32042 }, { "epoch": 3.7997154037709002, "grad_norm": 0.5440321738303014, "learning_rate": 3.280125656520777e-07, "loss": 0.0352, "step": 32043 }, { "epoch": 3.799833985533025, "grad_norm": 0.5211361513039947, "learning_rate": 3.276251205894698e-07, "loss": 0.0281, "step": 32044 }, { "epoch": 3.7999525672951497, "grad_norm": 0.5912881771001252, "learning_rate": 3.2723790297590205e-07, "loss": 0.0241, "step": 32045 }, { "epoch": 3.800071149057275, "grad_norm": 0.6782233748381558, "learning_rate": 3.268509128149466e-07, "loss": 0.038, "step": 32046 }, { "epoch": 3.8001897308194, "grad_norm": 0.3804977987958303, "learning_rate": 3.2646415011016727e-07, "loss": 0.0145, "step": 32047 }, { "epoch": 3.800308312581525, "grad_norm": 0.662231140053833, "learning_rate": 3.2607761486513345e-07, "loss": 0.0229, "step": 32048 }, { "epoch": 3.80042689434365, "grad_norm": 0.6877833916737834, "learning_rate": 3.256913070834061e-07, "loss": 0.0265, "step": 32049 }, { "epoch": 3.800545476105775, "grad_norm": 0.8368045644393477, "learning_rate": 3.2530522676854914e-07, "loss": 0.0331, "step": 32050 }, { "epoch": 3.8006640578679, "grad_norm": 0.4924506940588645, "learning_rate": 3.2491937392412076e-07, "loss": 0.0241, "step": 32051 }, { "epoch": 3.800782639630025, "grad_norm": 0.5972970916217315, "learning_rate": 3.2453374855367367e-07, "loss": 0.0341, "step": 32052 }, { "epoch": 3.80090122139215, "grad_norm": 0.817575284271348, "learning_rate": 3.2414835066076897e-07, "loss": 0.0302, "step": 32053 }, { "epoch": 3.801019803154275, "grad_norm": 0.5706619896285653, "learning_rate": 3.237631802489538e-07, "loss": 0.0262, "step": 32054 }, { "epoch": 3.8011383849164, "grad_norm": 0.6560760255223475, "learning_rate": 3.233782373217864e-07, "loss": 0.028, "step": 32055 }, { "epoch": 3.8012569666785248, "grad_norm": 0.6119848988173971, "learning_rate": 3.229935218828084e-07, "loss": 0.0233, "step": 32056 }, { "epoch": 3.80137554844065, "grad_norm": 0.9422633298683452, "learning_rate": 3.2260903393556974e-07, "loss": 0.0315, "step": 32057 }, { "epoch": 3.8014941302027747, "grad_norm": 0.3013050254933717, "learning_rate": 3.2222477348361477e-07, "loss": 0.0122, "step": 32058 }, { "epoch": 3.8016127119649, "grad_norm": 0.588623595254943, "learning_rate": 3.218407405304852e-07, "loss": 0.0316, "step": 32059 }, { "epoch": 3.8017312937270247, "grad_norm": 0.8919653545449426, "learning_rate": 3.2145693507971974e-07, "loss": 0.0483, "step": 32060 }, { "epoch": 3.80184987548915, "grad_norm": 0.42675080554639677, "learning_rate": 3.2107335713486284e-07, "loss": 0.0167, "step": 32061 }, { "epoch": 3.8019684572512746, "grad_norm": 0.531459638507006, "learning_rate": 3.2069000669944504e-07, "loss": 0.0184, "step": 32062 }, { "epoch": 3.8020870390134, "grad_norm": 0.3182891720999379, "learning_rate": 3.203068837769996e-07, "loss": 0.0157, "step": 32063 }, { "epoch": 3.8022056207755246, "grad_norm": 0.5889466607787966, "learning_rate": 3.1992398837105976e-07, "loss": 0.0226, "step": 32064 }, { "epoch": 3.80232420253765, "grad_norm": 0.7308325999502061, "learning_rate": 3.1954132048515886e-07, "loss": 0.0393, "step": 32065 }, { "epoch": 3.8024427842997746, "grad_norm": 0.362475521193777, "learning_rate": 3.191588801228218e-07, "loss": 0.0138, "step": 32066 }, { "epoch": 3.8025613660618998, "grad_norm": 0.4238304208673787, "learning_rate": 3.1877666728757084e-07, "loss": 0.02, "step": 32067 }, { "epoch": 3.8026799478240245, "grad_norm": 0.321656604164187, "learning_rate": 3.1839468198293644e-07, "loss": 0.0128, "step": 32068 }, { "epoch": 3.8027985295861497, "grad_norm": 0.438210637521601, "learning_rate": 3.180129242124352e-07, "loss": 0.0214, "step": 32069 }, { "epoch": 3.8029171113482745, "grad_norm": 0.36156277096268574, "learning_rate": 3.176313939795894e-07, "loss": 0.0187, "step": 32070 }, { "epoch": 3.8030356931103997, "grad_norm": 0.5594457925522613, "learning_rate": 3.1725009128791007e-07, "loss": 0.0245, "step": 32071 }, { "epoch": 3.8031542748725244, "grad_norm": 0.30498627428708536, "learning_rate": 3.1686901614091937e-07, "loss": 0.0117, "step": 32072 }, { "epoch": 3.8032728566346496, "grad_norm": 0.547039815985683, "learning_rate": 3.1648816854212837e-07, "loss": 0.0291, "step": 32073 }, { "epoch": 3.8033914383967744, "grad_norm": 0.509459396904311, "learning_rate": 3.161075484950482e-07, "loss": 0.0226, "step": 32074 }, { "epoch": 3.8035100201588996, "grad_norm": 0.39319273856593684, "learning_rate": 3.1572715600318436e-07, "loss": 0.0178, "step": 32075 }, { "epoch": 3.803628601921025, "grad_norm": 0.8279689407482818, "learning_rate": 3.1534699107004795e-07, "loss": 0.042, "step": 32076 }, { "epoch": 3.8037471836831496, "grad_norm": 0.5079743038531088, "learning_rate": 3.1496705369914446e-07, "loss": 0.0179, "step": 32077 }, { "epoch": 3.8038657654452743, "grad_norm": 0.7613423154582344, "learning_rate": 3.145873438939684e-07, "loss": 0.0383, "step": 32078 }, { "epoch": 3.8039843472073995, "grad_norm": 0.514980523284161, "learning_rate": 3.1420786165803076e-07, "loss": 0.0195, "step": 32079 }, { "epoch": 3.8041029289695247, "grad_norm": 0.8085188980517779, "learning_rate": 3.138286069948232e-07, "loss": 0.0395, "step": 32080 }, { "epoch": 3.8042215107316495, "grad_norm": 0.7043556742249236, "learning_rate": 3.1344957990784573e-07, "loss": 0.032, "step": 32081 }, { "epoch": 3.8043400924937743, "grad_norm": 0.46454222624150804, "learning_rate": 3.1307078040058725e-07, "loss": 0.0189, "step": 32082 }, { "epoch": 3.8044586742558995, "grad_norm": 0.43470727433312056, "learning_rate": 3.126922084765449e-07, "loss": 0.0237, "step": 32083 }, { "epoch": 3.8045772560180247, "grad_norm": 0.6467265686650456, "learning_rate": 3.1231386413920485e-07, "loss": 0.0203, "step": 32084 }, { "epoch": 3.8046958377801494, "grad_norm": 0.8446253394920026, "learning_rate": 3.119357473920587e-07, "loss": 0.0418, "step": 32085 }, { "epoch": 3.804814419542274, "grad_norm": 0.6933481695766333, "learning_rate": 3.1155785823858973e-07, "loss": 0.0297, "step": 32086 }, { "epoch": 3.8049330013043994, "grad_norm": 0.6758497281701533, "learning_rate": 3.1118019668228137e-07, "loss": 0.0393, "step": 32087 }, { "epoch": 3.8050515830665246, "grad_norm": 0.35198321742268096, "learning_rate": 3.1080276272661966e-07, "loss": 0.0198, "step": 32088 }, { "epoch": 3.8051701648286493, "grad_norm": 0.3947356595009336, "learning_rate": 3.1042555637507686e-07, "loss": 0.0201, "step": 32089 }, { "epoch": 3.805288746590774, "grad_norm": 0.6436727139003262, "learning_rate": 3.1004857763113624e-07, "loss": 0.0235, "step": 32090 }, { "epoch": 3.8054073283528993, "grad_norm": 0.39725264891251505, "learning_rate": 3.096718264982701e-07, "loss": 0.0139, "step": 32091 }, { "epoch": 3.8055259101150245, "grad_norm": 0.3018989310642143, "learning_rate": 3.0929530297995333e-07, "loss": 0.0135, "step": 32092 }, { "epoch": 3.8056444918771493, "grad_norm": 0.5616134769005469, "learning_rate": 3.089190070796527e-07, "loss": 0.0201, "step": 32093 }, { "epoch": 3.805763073639274, "grad_norm": 0.5462253643998886, "learning_rate": 3.0854293880084593e-07, "loss": 0.0287, "step": 32094 }, { "epoch": 3.805881655401399, "grad_norm": 0.5189256903485824, "learning_rate": 3.081670981469914e-07, "loss": 0.0206, "step": 32095 }, { "epoch": 3.8060002371635244, "grad_norm": 0.2525075201867821, "learning_rate": 3.077914851215585e-07, "loss": 0.0078, "step": 32096 }, { "epoch": 3.806118818925649, "grad_norm": 0.33026433980000824, "learning_rate": 3.074160997280057e-07, "loss": 0.0138, "step": 32097 }, { "epoch": 3.8062374006877744, "grad_norm": 0.3927463063330785, "learning_rate": 3.0704094196979947e-07, "loss": 0.0191, "step": 32098 }, { "epoch": 3.806355982449899, "grad_norm": 0.61555998266712, "learning_rate": 3.066660118503956e-07, "loss": 0.0239, "step": 32099 }, { "epoch": 3.8064745642120243, "grad_norm": 0.7598710090053276, "learning_rate": 3.0629130937324667e-07, "loss": 0.0393, "step": 32100 }, { "epoch": 3.806593145974149, "grad_norm": 1.0576635724729997, "learning_rate": 3.05916834541814e-07, "loss": 0.0575, "step": 32101 }, { "epoch": 3.8067117277362743, "grad_norm": 0.3993152014116616, "learning_rate": 3.055425873595419e-07, "loss": 0.0239, "step": 32102 }, { "epoch": 3.806830309498399, "grad_norm": 0.6733542696420486, "learning_rate": 3.051685678298888e-07, "loss": 0.0356, "step": 32103 }, { "epoch": 3.8069488912605243, "grad_norm": 0.500464831214322, "learning_rate": 3.0479477595629636e-07, "loss": 0.0217, "step": 32104 }, { "epoch": 3.807067473022649, "grad_norm": 0.3312797896224538, "learning_rate": 3.044212117422146e-07, "loss": 0.0137, "step": 32105 }, { "epoch": 3.8071860547847742, "grad_norm": 0.45716445593466887, "learning_rate": 3.0404787519108523e-07, "loss": 0.0173, "step": 32106 }, { "epoch": 3.807304636546899, "grad_norm": 0.3211063088180714, "learning_rate": 3.0367476630634995e-07, "loss": 0.0118, "step": 32107 }, { "epoch": 3.807423218309024, "grad_norm": 0.4084646214768792, "learning_rate": 3.033018850914476e-07, "loss": 0.0152, "step": 32108 }, { "epoch": 3.807541800071149, "grad_norm": 0.4990642646643085, "learning_rate": 3.0292923154981723e-07, "loss": 0.0271, "step": 32109 }, { "epoch": 3.807660381833274, "grad_norm": 0.6475374536322294, "learning_rate": 3.0255680568489764e-07, "loss": 0.0287, "step": 32110 }, { "epoch": 3.807778963595399, "grad_norm": 0.3767821490455209, "learning_rate": 3.021846075001139e-07, "loss": 0.0168, "step": 32111 }, { "epoch": 3.807897545357524, "grad_norm": 0.4943377902742331, "learning_rate": 3.0181263699890215e-07, "loss": 0.0188, "step": 32112 }, { "epoch": 3.808016127119649, "grad_norm": 0.36479195021532557, "learning_rate": 3.0144089418469304e-07, "loss": 0.0136, "step": 32113 }, { "epoch": 3.808134708881774, "grad_norm": 0.5043447536590082, "learning_rate": 3.010693790609087e-07, "loss": 0.0338, "step": 32114 }, { "epoch": 3.808253290643899, "grad_norm": 0.5528151844315454, "learning_rate": 3.006980916309798e-07, "loss": 0.0219, "step": 32115 }, { "epoch": 3.808371872406024, "grad_norm": 0.5165873487304844, "learning_rate": 3.0032703189832314e-07, "loss": 0.0277, "step": 32116 }, { "epoch": 3.808490454168149, "grad_norm": 0.7942713850954259, "learning_rate": 2.999561998663636e-07, "loss": 0.0378, "step": 32117 }, { "epoch": 3.808609035930274, "grad_norm": 0.6961625744568131, "learning_rate": 2.9958559553852074e-07, "loss": 0.0354, "step": 32118 }, { "epoch": 3.8087276176923988, "grad_norm": 0.6469532821970493, "learning_rate": 2.992152189182057e-07, "loss": 0.0339, "step": 32119 }, { "epoch": 3.808846199454524, "grad_norm": 0.4847831975174317, "learning_rate": 2.98845070008838e-07, "loss": 0.0206, "step": 32120 }, { "epoch": 3.8089647812166487, "grad_norm": 0.3605426488564424, "learning_rate": 2.9847514881382876e-07, "loss": 0.0188, "step": 32121 }, { "epoch": 3.809083362978774, "grad_norm": 0.7590539188283542, "learning_rate": 2.981054553365864e-07, "loss": 0.0333, "step": 32122 }, { "epoch": 3.8092019447408987, "grad_norm": 0.6035542228609977, "learning_rate": 2.9773598958052205e-07, "loss": 0.0273, "step": 32123 }, { "epoch": 3.809320526503024, "grad_norm": 0.4979172267200873, "learning_rate": 2.973667515490386e-07, "loss": 0.0143, "step": 32124 }, { "epoch": 3.809439108265149, "grad_norm": 0.4557807647319833, "learning_rate": 2.969977412455416e-07, "loss": 0.0223, "step": 32125 }, { "epoch": 3.809557690027274, "grad_norm": 0.509152378761844, "learning_rate": 2.9662895867343387e-07, "loss": 0.0222, "step": 32126 }, { "epoch": 3.8096762717893986, "grad_norm": 0.3595126656861218, "learning_rate": 2.9626040383611e-07, "loss": 0.0177, "step": 32127 }, { "epoch": 3.809794853551524, "grad_norm": 0.497496558177567, "learning_rate": 2.958920767369755e-07, "loss": 0.0245, "step": 32128 }, { "epoch": 3.809913435313649, "grad_norm": 0.5400028938142566, "learning_rate": 2.9552397737941937e-07, "loss": 0.0273, "step": 32129 }, { "epoch": 3.8100320170757738, "grad_norm": 0.7629963517153588, "learning_rate": 2.951561057668417e-07, "loss": 0.0282, "step": 32130 }, { "epoch": 3.8101505988378985, "grad_norm": 0.3986755723248289, "learning_rate": 2.947884619026231e-07, "loss": 0.0163, "step": 32131 }, { "epoch": 3.8102691806000237, "grad_norm": 0.6054364225380848, "learning_rate": 2.944210457901636e-07, "loss": 0.0256, "step": 32132 }, { "epoch": 3.810387762362149, "grad_norm": 0.568111852209842, "learning_rate": 2.940538574328466e-07, "loss": 0.0267, "step": 32133 }, { "epoch": 3.8105063441242737, "grad_norm": 0.960676663239833, "learning_rate": 2.936868968340528e-07, "loss": 0.0475, "step": 32134 }, { "epoch": 3.8106249258863985, "grad_norm": 0.6167569819632651, "learning_rate": 2.933201639971711e-07, "loss": 0.0278, "step": 32135 }, { "epoch": 3.8107435076485237, "grad_norm": 0.6558509091234292, "learning_rate": 2.9295365892558214e-07, "loss": 0.0281, "step": 32136 }, { "epoch": 3.810862089410649, "grad_norm": 0.6174441140804513, "learning_rate": 2.92587381622661e-07, "loss": 0.0346, "step": 32137 }, { "epoch": 3.8109806711727736, "grad_norm": 0.344222166350809, "learning_rate": 2.9222133209178546e-07, "loss": 0.0173, "step": 32138 }, { "epoch": 3.8110992529348984, "grad_norm": 0.4380606430623032, "learning_rate": 2.918555103363307e-07, "loss": 0.0222, "step": 32139 }, { "epoch": 3.8112178346970236, "grad_norm": 0.44861400352474984, "learning_rate": 2.91489916359669e-07, "loss": 0.0181, "step": 32140 }, { "epoch": 3.811336416459149, "grad_norm": 0.3632571935040418, "learning_rate": 2.911245501651727e-07, "loss": 0.0109, "step": 32141 }, { "epoch": 3.8114549982212735, "grad_norm": 0.7596862346415664, "learning_rate": 2.907594117562057e-07, "loss": 0.0313, "step": 32142 }, { "epoch": 3.8115735799833983, "grad_norm": 0.49370307743134734, "learning_rate": 2.9039450113613753e-07, "loss": 0.022, "step": 32143 }, { "epoch": 3.8116921617455235, "grad_norm": 0.596876350266793, "learning_rate": 2.900298183083294e-07, "loss": 0.025, "step": 32144 }, { "epoch": 3.8118107435076487, "grad_norm": 0.4070314030500513, "learning_rate": 2.8966536327614813e-07, "loss": 0.0146, "step": 32145 }, { "epoch": 3.8119293252697735, "grad_norm": 0.6682084852075161, "learning_rate": 2.893011360429465e-07, "loss": 0.0379, "step": 32146 }, { "epoch": 3.812047907031898, "grad_norm": 0.7713628780711353, "learning_rate": 2.8893713661208854e-07, "loss": 0.0411, "step": 32147 }, { "epoch": 3.8121664887940234, "grad_norm": 0.5252344007130583, "learning_rate": 2.8857336498692713e-07, "loss": 0.0264, "step": 32148 }, { "epoch": 3.8122850705561486, "grad_norm": 0.696424277436016, "learning_rate": 2.882098211708151e-07, "loss": 0.0183, "step": 32149 }, { "epoch": 3.8124036523182734, "grad_norm": 0.5410910609887338, "learning_rate": 2.878465051671081e-07, "loss": 0.0222, "step": 32150 }, { "epoch": 3.8125222340803986, "grad_norm": 0.39307738535373016, "learning_rate": 2.874834169791507e-07, "loss": 0.0213, "step": 32151 }, { "epoch": 3.8126408158425233, "grad_norm": 0.3639490551697164, "learning_rate": 2.8712055661029025e-07, "loss": 0.0146, "step": 32152 }, { "epoch": 3.8127593976046485, "grad_norm": 0.5230112626611549, "learning_rate": 2.86757924063874e-07, "loss": 0.0223, "step": 32153 }, { "epoch": 3.8128779793667733, "grad_norm": 0.4175185836371759, "learning_rate": 2.863955193432438e-07, "loss": 0.0182, "step": 32154 }, { "epoch": 3.8129965611288985, "grad_norm": 0.5987133493123983, "learning_rate": 2.860333424517442e-07, "loss": 0.0224, "step": 32155 }, { "epoch": 3.8131151428910233, "grad_norm": 0.5778298840508103, "learning_rate": 2.8567139339270577e-07, "loss": 0.0238, "step": 32156 }, { "epoch": 3.8132337246531485, "grad_norm": 0.48099292484348594, "learning_rate": 2.853096721694731e-07, "loss": 0.0203, "step": 32157 }, { "epoch": 3.8133523064152732, "grad_norm": 0.6205378948887058, "learning_rate": 2.849481787853797e-07, "loss": 0.0318, "step": 32158 }, { "epoch": 3.8134708881773984, "grad_norm": 0.5864162671376884, "learning_rate": 2.845869132437562e-07, "loss": 0.0208, "step": 32159 }, { "epoch": 3.813589469939523, "grad_norm": 0.4239473056638681, "learning_rate": 2.8422587554793044e-07, "loss": 0.0142, "step": 32160 }, { "epoch": 3.8137080517016484, "grad_norm": 0.5561876994845704, "learning_rate": 2.838650657012359e-07, "loss": 0.0244, "step": 32161 }, { "epoch": 3.813826633463773, "grad_norm": 0.6096188508105082, "learning_rate": 2.8350448370699503e-07, "loss": 0.0291, "step": 32162 }, { "epoch": 3.8139452152258984, "grad_norm": 0.4373125667688239, "learning_rate": 2.831441295685328e-07, "loss": 0.021, "step": 32163 }, { "epoch": 3.814063796988023, "grad_norm": 0.5156895134029705, "learning_rate": 2.827840032891743e-07, "loss": 0.0344, "step": 32164 }, { "epoch": 3.8141823787501483, "grad_norm": 0.514217433159126, "learning_rate": 2.824241048722365e-07, "loss": 0.0248, "step": 32165 }, { "epoch": 3.814300960512273, "grad_norm": 0.6192485951374296, "learning_rate": 2.820644343210388e-07, "loss": 0.0333, "step": 32166 }, { "epoch": 3.8144195422743983, "grad_norm": 0.8838464215050871, "learning_rate": 2.8170499163889253e-07, "loss": 0.0363, "step": 32167 }, { "epoch": 3.814538124036523, "grad_norm": 0.5453269974764499, "learning_rate": 2.8134577682911724e-07, "loss": 0.0247, "step": 32168 }, { "epoch": 3.8146567057986482, "grad_norm": 0.559958577048024, "learning_rate": 2.8098678989502136e-07, "loss": 0.0254, "step": 32169 }, { "epoch": 3.814775287560773, "grad_norm": 0.41172191714659967, "learning_rate": 2.8062803083991617e-07, "loss": 0.0161, "step": 32170 }, { "epoch": 3.814893869322898, "grad_norm": 0.46594771121041234, "learning_rate": 2.802694996671046e-07, "loss": 0.0237, "step": 32171 }, { "epoch": 3.815012451085023, "grad_norm": 0.526639268727333, "learning_rate": 2.7991119637989504e-07, "loss": 0.0196, "step": 32172 }, { "epoch": 3.815131032847148, "grad_norm": 0.6194033140248086, "learning_rate": 2.7955312098159326e-07, "loss": 0.0294, "step": 32173 }, { "epoch": 3.8152496146092734, "grad_norm": 0.40909722980669366, "learning_rate": 2.7919527347549657e-07, "loss": 0.0174, "step": 32174 }, { "epoch": 3.815368196371398, "grad_norm": 0.6547848852081021, "learning_rate": 2.7883765386490236e-07, "loss": 0.0299, "step": 32175 }, { "epoch": 3.815486778133523, "grad_norm": 0.39778716266662484, "learning_rate": 2.784802621531107e-07, "loss": 0.0169, "step": 32176 }, { "epoch": 3.815605359895648, "grad_norm": 0.5050673730790161, "learning_rate": 2.781230983434191e-07, "loss": 0.0202, "step": 32177 }, { "epoch": 3.8157239416577733, "grad_norm": 0.3611663462208305, "learning_rate": 2.7776616243911366e-07, "loss": 0.0119, "step": 32178 }, { "epoch": 3.815842523419898, "grad_norm": 0.7727142040202052, "learning_rate": 2.774094544434891e-07, "loss": 0.0322, "step": 32179 }, { "epoch": 3.815961105182023, "grad_norm": 0.5526738371916118, "learning_rate": 2.7705297435983156e-07, "loss": 0.0254, "step": 32180 }, { "epoch": 3.816079686944148, "grad_norm": 0.6368852707357536, "learning_rate": 2.766967221914302e-07, "loss": 0.0351, "step": 32181 }, { "epoch": 3.816198268706273, "grad_norm": 0.8199680520175389, "learning_rate": 2.763406979415628e-07, "loss": 0.0333, "step": 32182 }, { "epoch": 3.816316850468398, "grad_norm": 0.642397478634977, "learning_rate": 2.759849016135185e-07, "loss": 0.0258, "step": 32183 }, { "epoch": 3.8164354322305227, "grad_norm": 0.6666426205089777, "learning_rate": 2.7562933321057525e-07, "loss": 0.0283, "step": 32184 }, { "epoch": 3.816554013992648, "grad_norm": 0.739024718972567, "learning_rate": 2.7527399273601094e-07, "loss": 0.0398, "step": 32185 }, { "epoch": 3.816672595754773, "grad_norm": 0.7535639307639996, "learning_rate": 2.7491888019310074e-07, "loss": 0.0211, "step": 32186 }, { "epoch": 3.816791177516898, "grad_norm": 0.4370430186018234, "learning_rate": 2.7456399558511705e-07, "loss": 0.0187, "step": 32187 }, { "epoch": 3.8169097592790227, "grad_norm": 0.6517119258880838, "learning_rate": 2.7420933891533493e-07, "loss": 0.0254, "step": 32188 }, { "epoch": 3.817028341041148, "grad_norm": 0.5518762920010124, "learning_rate": 2.738549101870214e-07, "loss": 0.0248, "step": 32189 }, { "epoch": 3.817146922803273, "grad_norm": 0.5401496396390258, "learning_rate": 2.735007094034431e-07, "loss": 0.0208, "step": 32190 }, { "epoch": 3.817265504565398, "grad_norm": 0.6497218695862856, "learning_rate": 2.7314673656786693e-07, "loss": 0.0211, "step": 32191 }, { "epoch": 3.8173840863275226, "grad_norm": 0.5583087510112364, "learning_rate": 2.7279299168355976e-07, "loss": 0.0319, "step": 32192 }, { "epoch": 3.8175026680896478, "grad_norm": 0.6899440356276416, "learning_rate": 2.7243947475377453e-07, "loss": 0.0351, "step": 32193 }, { "epoch": 3.817621249851773, "grad_norm": 0.5361484105575834, "learning_rate": 2.7208618578177523e-07, "loss": 0.0207, "step": 32194 }, { "epoch": 3.8177398316138977, "grad_norm": 0.31705951183652753, "learning_rate": 2.717331247708177e-07, "loss": 0.013, "step": 32195 }, { "epoch": 3.8178584133760225, "grad_norm": 0.4453075947846347, "learning_rate": 2.7138029172415757e-07, "loss": 0.0231, "step": 32196 }, { "epoch": 3.8179769951381477, "grad_norm": 0.5207109924855691, "learning_rate": 2.7102768664504787e-07, "loss": 0.0187, "step": 32197 }, { "epoch": 3.818095576900273, "grad_norm": 0.3103661846279404, "learning_rate": 2.7067530953673594e-07, "loss": 0.0156, "step": 32198 }, { "epoch": 3.8182141586623977, "grad_norm": 0.5703196836919577, "learning_rate": 2.703231604024747e-07, "loss": 0.022, "step": 32199 }, { "epoch": 3.818332740424523, "grad_norm": 0.6039950880931766, "learning_rate": 2.6997123924550617e-07, "loss": 0.0281, "step": 32200 }, { "epoch": 3.8184513221866476, "grad_norm": 0.5019789927083761, "learning_rate": 2.6961954606908035e-07, "loss": 0.0192, "step": 32201 }, { "epoch": 3.818569903948773, "grad_norm": 0.5721215583956886, "learning_rate": 2.6926808087643087e-07, "loss": 0.0218, "step": 32202 }, { "epoch": 3.8186884857108976, "grad_norm": 0.4696212422364732, "learning_rate": 2.6891684367080503e-07, "loss": 0.0245, "step": 32203 }, { "epoch": 3.818807067473023, "grad_norm": 0.49863587270982584, "learning_rate": 2.6856583445543924e-07, "loss": 0.0252, "step": 32204 }, { "epoch": 3.8189256492351475, "grad_norm": 0.7004597122940662, "learning_rate": 2.6821505323356976e-07, "loss": 0.0318, "step": 32205 }, { "epoch": 3.8190442309972727, "grad_norm": 0.4730078141430379, "learning_rate": 2.678645000084273e-07, "loss": 0.0197, "step": 32206 }, { "epoch": 3.8191628127593975, "grad_norm": 0.35563681793354585, "learning_rate": 2.675141747832455e-07, "loss": 0.0113, "step": 32207 }, { "epoch": 3.8192813945215227, "grad_norm": 0.3753308417357076, "learning_rate": 2.671640775612577e-07, "loss": 0.0144, "step": 32208 }, { "epoch": 3.8193999762836475, "grad_norm": 0.507144588843481, "learning_rate": 2.6681420834568374e-07, "loss": 0.0274, "step": 32209 }, { "epoch": 3.8195185580457727, "grad_norm": 0.5399470820298754, "learning_rate": 2.664645671397542e-07, "loss": 0.0184, "step": 32210 }, { "epoch": 3.8196371398078974, "grad_norm": 0.7952011556432225, "learning_rate": 2.661151539466916e-07, "loss": 0.0244, "step": 32211 }, { "epoch": 3.8197557215700226, "grad_norm": 0.9487168637397733, "learning_rate": 2.657659687697156e-07, "loss": 0.0396, "step": 32212 }, { "epoch": 3.8198743033321474, "grad_norm": 0.4617495909953384, "learning_rate": 2.654170116120458e-07, "loss": 0.0267, "step": 32213 }, { "epoch": 3.8199928850942726, "grad_norm": 0.37874086369923865, "learning_rate": 2.6506828247690183e-07, "loss": 0.0178, "step": 32214 }, { "epoch": 3.8201114668563974, "grad_norm": 0.546500322457693, "learning_rate": 2.647197813674951e-07, "loss": 0.031, "step": 32215 }, { "epoch": 3.8202300486185226, "grad_norm": 0.5046283227730778, "learning_rate": 2.643715082870396e-07, "loss": 0.0286, "step": 32216 }, { "epoch": 3.8203486303806473, "grad_norm": 0.3908885599982109, "learning_rate": 2.6402346323874674e-07, "loss": 0.0188, "step": 32217 }, { "epoch": 3.8204672121427725, "grad_norm": 0.5129649262158739, "learning_rate": 2.636756462258222e-07, "loss": 0.0268, "step": 32218 }, { "epoch": 3.8205857939048973, "grad_norm": 0.35819007185432894, "learning_rate": 2.6332805725147737e-07, "loss": 0.0178, "step": 32219 }, { "epoch": 3.8207043756670225, "grad_norm": 0.44142026146298186, "learning_rate": 2.629806963189096e-07, "loss": 0.025, "step": 32220 }, { "epoch": 3.8208229574291472, "grad_norm": 0.5752579617436857, "learning_rate": 2.626335634313304e-07, "loss": 0.0227, "step": 32221 }, { "epoch": 3.8209415391912724, "grad_norm": 0.45893093527683004, "learning_rate": 2.6228665859193145e-07, "loss": 0.0293, "step": 32222 }, { "epoch": 3.8210601209533976, "grad_norm": 0.4647831717156037, "learning_rate": 2.6193998180391586e-07, "loss": 0.0221, "step": 32223 }, { "epoch": 3.8211787027155224, "grad_norm": 0.5284659708108459, "learning_rate": 2.6159353307047553e-07, "loss": 0.0241, "step": 32224 }, { "epoch": 3.821297284477647, "grad_norm": 0.5304875058541338, "learning_rate": 2.612473123948078e-07, "loss": 0.0187, "step": 32225 }, { "epoch": 3.8214158662397724, "grad_norm": 0.4799787577688291, "learning_rate": 2.6090131978010467e-07, "loss": 0.0192, "step": 32226 }, { "epoch": 3.8215344480018976, "grad_norm": 0.5232750273028788, "learning_rate": 2.6055555522955246e-07, "loss": 0.025, "step": 32227 }, { "epoch": 3.8216530297640223, "grad_norm": 0.3987948574776581, "learning_rate": 2.602100187463402e-07, "loss": 0.0146, "step": 32228 }, { "epoch": 3.821771611526147, "grad_norm": 0.43419701881066636, "learning_rate": 2.5986471033365436e-07, "loss": 0.0185, "step": 32229 }, { "epoch": 3.8218901932882723, "grad_norm": 0.6958787211139715, "learning_rate": 2.595196299946784e-07, "loss": 0.0326, "step": 32230 }, { "epoch": 3.8220087750503975, "grad_norm": 0.5672662252761036, "learning_rate": 2.5917477773259035e-07, "loss": 0.0292, "step": 32231 }, { "epoch": 3.8221273568125222, "grad_norm": 0.9027498840647368, "learning_rate": 2.588301535505766e-07, "loss": 0.0394, "step": 32232 }, { "epoch": 3.822245938574647, "grad_norm": 0.3521656311347229, "learning_rate": 2.5848575745180403e-07, "loss": 0.0164, "step": 32233 }, { "epoch": 3.822364520336772, "grad_norm": 0.6429599964194214, "learning_rate": 2.5814158943945345e-07, "loss": 0.0274, "step": 32234 }, { "epoch": 3.8224831020988974, "grad_norm": 0.4064868817886187, "learning_rate": 2.5779764951670004e-07, "loss": 0.0206, "step": 32235 }, { "epoch": 3.822601683861022, "grad_norm": 0.6306410854426514, "learning_rate": 2.57453937686708e-07, "loss": 0.0254, "step": 32236 }, { "epoch": 3.822720265623147, "grad_norm": 0.5429582839302577, "learning_rate": 2.5711045395265256e-07, "loss": 0.0231, "step": 32237 }, { "epoch": 3.822838847385272, "grad_norm": 0.46519794595487, "learning_rate": 2.5676719831769505e-07, "loss": 0.0181, "step": 32238 }, { "epoch": 3.8229574291473973, "grad_norm": 0.7280284469981231, "learning_rate": 2.564241707849996e-07, "loss": 0.0247, "step": 32239 }, { "epoch": 3.823076010909522, "grad_norm": 0.5047628281857388, "learning_rate": 2.5608137135773314e-07, "loss": 0.0236, "step": 32240 }, { "epoch": 3.823194592671647, "grad_norm": 0.325679299468194, "learning_rate": 2.557388000390543e-07, "loss": 0.0132, "step": 32241 }, { "epoch": 3.823313174433772, "grad_norm": 0.41539549426857697, "learning_rate": 2.553964568321188e-07, "loss": 0.0154, "step": 32242 }, { "epoch": 3.8234317561958973, "grad_norm": 0.6702192057668742, "learning_rate": 2.5505434174008527e-07, "loss": 0.0356, "step": 32243 }, { "epoch": 3.823550337958022, "grad_norm": 0.33182721996282155, "learning_rate": 2.547124547661067e-07, "loss": 0.0153, "step": 32244 }, { "epoch": 3.8236689197201468, "grad_norm": 0.5817909711514432, "learning_rate": 2.5437079591333344e-07, "loss": 0.0201, "step": 32245 }, { "epoch": 3.823787501482272, "grad_norm": 0.36745071060875256, "learning_rate": 2.540293651849157e-07, "loss": 0.0162, "step": 32246 }, { "epoch": 3.823906083244397, "grad_norm": 0.7260017540196879, "learning_rate": 2.5368816258400094e-07, "loss": 0.0352, "step": 32247 }, { "epoch": 3.824024665006522, "grad_norm": 0.37188829372276855, "learning_rate": 2.5334718811373947e-07, "loss": 0.0173, "step": 32248 }, { "epoch": 3.824143246768647, "grad_norm": 0.5913274947916497, "learning_rate": 2.5300644177726763e-07, "loss": 0.0227, "step": 32249 }, { "epoch": 3.824261828530772, "grad_norm": 0.803227890653559, "learning_rate": 2.5266592357773013e-07, "loss": 0.0336, "step": 32250 }, { "epoch": 3.824380410292897, "grad_norm": 0.4638562715866802, "learning_rate": 2.523256335182661e-07, "loss": 0.0223, "step": 32251 }, { "epoch": 3.824498992055022, "grad_norm": 0.4111870645026095, "learning_rate": 2.51985571602012e-07, "loss": 0.0232, "step": 32252 }, { "epoch": 3.824617573817147, "grad_norm": 0.4888976062042943, "learning_rate": 2.516457378321041e-07, "loss": 0.0179, "step": 32253 }, { "epoch": 3.824736155579272, "grad_norm": 0.4388575645168092, "learning_rate": 2.5130613221167056e-07, "loss": 0.0208, "step": 32254 }, { "epoch": 3.824854737341397, "grad_norm": 0.5281659770810934, "learning_rate": 2.5096675474384767e-07, "loss": 0.0218, "step": 32255 }, { "epoch": 3.824973319103522, "grad_norm": 0.7254443873286416, "learning_rate": 2.5062760543176076e-07, "loss": 0.0258, "step": 32256 }, { "epoch": 3.825091900865647, "grad_norm": 0.4069688959046381, "learning_rate": 2.502886842785407e-07, "loss": 0.0198, "step": 32257 }, { "epoch": 3.8252104826277717, "grad_norm": 0.38135254733149465, "learning_rate": 2.499499912873071e-07, "loss": 0.0157, "step": 32258 }, { "epoch": 3.825329064389897, "grad_norm": 0.5791007514490251, "learning_rate": 2.4961152646118535e-07, "loss": 0.026, "step": 32259 }, { "epoch": 3.8254476461520217, "grad_norm": 0.6488853484444277, "learning_rate": 2.4927328980329235e-07, "loss": 0.0227, "step": 32260 }, { "epoch": 3.825566227914147, "grad_norm": 0.7347388766501896, "learning_rate": 2.489352813167478e-07, "loss": 0.031, "step": 32261 }, { "epoch": 3.8256848096762717, "grad_norm": 0.43742387465332966, "learning_rate": 2.485975010046687e-07, "loss": 0.0208, "step": 32262 }, { "epoch": 3.825803391438397, "grad_norm": 1.0387656285667042, "learning_rate": 2.482599488701692e-07, "loss": 0.043, "step": 32263 }, { "epoch": 3.8259219732005216, "grad_norm": 0.5466711818317451, "learning_rate": 2.4792262491636355e-07, "loss": 0.0211, "step": 32264 }, { "epoch": 3.826040554962647, "grad_norm": 0.3134211991696923, "learning_rate": 2.4758552914635193e-07, "loss": 0.0155, "step": 32265 }, { "epoch": 3.8261591367247716, "grad_norm": 0.8402152122733638, "learning_rate": 2.472486615632541e-07, "loss": 0.0571, "step": 32266 }, { "epoch": 3.826277718486897, "grad_norm": 0.6360947961695947, "learning_rate": 2.4691202217016485e-07, "loss": 0.034, "step": 32267 }, { "epoch": 3.8263963002490216, "grad_norm": 0.39897112065437246, "learning_rate": 2.4657561097019555e-07, "loss": 0.0137, "step": 32268 }, { "epoch": 3.8265148820111468, "grad_norm": 0.6036848400631302, "learning_rate": 2.4623942796644373e-07, "loss": 0.0294, "step": 32269 }, { "epoch": 3.8266334637732715, "grad_norm": 0.6474233328341904, "learning_rate": 2.4590347316200966e-07, "loss": 0.0221, "step": 32270 }, { "epoch": 3.8267520455353967, "grad_norm": 0.6198689176750978, "learning_rate": 2.455677465599909e-07, "loss": 0.0244, "step": 32271 }, { "epoch": 3.8268706272975215, "grad_norm": 0.4060254710941895, "learning_rate": 2.4523224816347946e-07, "loss": 0.0173, "step": 32272 }, { "epoch": 3.8269892090596467, "grad_norm": 0.5246333407283597, "learning_rate": 2.4489697797557e-07, "loss": 0.0275, "step": 32273 }, { "epoch": 3.8271077908217714, "grad_norm": 0.5084248289973646, "learning_rate": 2.445619359993545e-07, "loss": 0.0178, "step": 32274 }, { "epoch": 3.8272263725838966, "grad_norm": 0.6122074111181862, "learning_rate": 2.44227122237925e-07, "loss": 0.0291, "step": 32275 }, { "epoch": 3.827344954346022, "grad_norm": 0.43791797776151653, "learning_rate": 2.438925366943567e-07, "loss": 0.0205, "step": 32276 }, { "epoch": 3.8274635361081466, "grad_norm": 0.5202462067923984, "learning_rate": 2.435581793717473e-07, "loss": 0.0267, "step": 32277 }, { "epoch": 3.8275821178702714, "grad_norm": 0.46010897850555527, "learning_rate": 2.432240502731692e-07, "loss": 0.0161, "step": 32278 }, { "epoch": 3.8277006996323966, "grad_norm": 0.660929119587524, "learning_rate": 2.428901494017116e-07, "loss": 0.0327, "step": 32279 }, { "epoch": 3.8278192813945218, "grad_norm": 0.3948210217950261, "learning_rate": 2.4255647676044156e-07, "loss": 0.021, "step": 32280 }, { "epoch": 3.8279378631566465, "grad_norm": 0.4314419323903525, "learning_rate": 2.422230323524455e-07, "loss": 0.0221, "step": 32281 }, { "epoch": 3.8280564449187713, "grad_norm": 0.4286467959442671, "learning_rate": 2.418898161807903e-07, "loss": 0.0198, "step": 32282 }, { "epoch": 3.8281750266808965, "grad_norm": 0.4027865393466545, "learning_rate": 2.415568282485514e-07, "loss": 0.0182, "step": 32283 }, { "epoch": 3.8282936084430217, "grad_norm": 0.4061894559060245, "learning_rate": 2.412240685587985e-07, "loss": 0.0187, "step": 32284 }, { "epoch": 3.8284121902051464, "grad_norm": 0.48820917865321445, "learning_rate": 2.408915371145959e-07, "loss": 0.0252, "step": 32285 }, { "epoch": 3.828530771967271, "grad_norm": 0.6285162837223278, "learning_rate": 2.4055923391901603e-07, "loss": 0.0352, "step": 32286 }, { "epoch": 3.8286493537293964, "grad_norm": 0.45609878775847923, "learning_rate": 2.4022715897511485e-07, "loss": 0.0146, "step": 32287 }, { "epoch": 3.8287679354915216, "grad_norm": 0.8553391591770297, "learning_rate": 2.398953122859593e-07, "loss": 0.0423, "step": 32288 }, { "epoch": 3.8288865172536464, "grad_norm": 0.7444040512158345, "learning_rate": 2.395636938546025e-07, "loss": 0.0347, "step": 32289 }, { "epoch": 3.829005099015771, "grad_norm": 0.8984575650426113, "learning_rate": 2.3923230368410875e-07, "loss": 0.0316, "step": 32290 }, { "epoch": 3.8291236807778963, "grad_norm": 0.252253717121221, "learning_rate": 2.3890114177752554e-07, "loss": 0.0176, "step": 32291 }, { "epoch": 3.8292422625400215, "grad_norm": 0.87089930028109, "learning_rate": 2.385702081379143e-07, "loss": 0.0361, "step": 32292 }, { "epoch": 3.8293608443021463, "grad_norm": 0.4759857787960349, "learning_rate": 2.3823950276831986e-07, "loss": 0.0232, "step": 32293 }, { "epoch": 3.829479426064271, "grad_norm": 0.2548492522687106, "learning_rate": 2.3790902567178975e-07, "loss": 0.0112, "step": 32294 }, { "epoch": 3.8295980078263963, "grad_norm": 0.5396014585601289, "learning_rate": 2.3757877685137708e-07, "loss": 0.0247, "step": 32295 }, { "epoch": 3.8297165895885215, "grad_norm": 0.56707246949222, "learning_rate": 2.3724875631011835e-07, "loss": 0.022, "step": 32296 }, { "epoch": 3.829835171350646, "grad_norm": 0.30762359048947635, "learning_rate": 2.3691896405106384e-07, "loss": 0.0102, "step": 32297 }, { "epoch": 3.8299537531127714, "grad_norm": 0.46721419410173937, "learning_rate": 2.3658940007724728e-07, "loss": 0.014, "step": 32298 }, { "epoch": 3.830072334874896, "grad_norm": 0.5104014103017009, "learning_rate": 2.3626006439171067e-07, "loss": 0.0184, "step": 32299 }, { "epoch": 3.8301909166370214, "grad_norm": 0.8127604609576257, "learning_rate": 2.3593095699748767e-07, "loss": 0.0287, "step": 32300 }, { "epoch": 3.830309498399146, "grad_norm": 0.3341606975613387, "learning_rate": 2.3560207789761758e-07, "loss": 0.0169, "step": 32301 }, { "epoch": 3.8304280801612713, "grad_norm": 0.48624676261041105, "learning_rate": 2.352734270951229e-07, "loss": 0.0235, "step": 32302 }, { "epoch": 3.830546661923396, "grad_norm": 0.4369865780546054, "learning_rate": 2.3494500459304292e-07, "loss": 0.0176, "step": 32303 }, { "epoch": 3.8306652436855213, "grad_norm": 0.42486631133113273, "learning_rate": 2.3461681039439743e-07, "loss": 0.0211, "step": 32304 }, { "epoch": 3.830783825447646, "grad_norm": 0.5216403676523061, "learning_rate": 2.3428884450221733e-07, "loss": 0.0245, "step": 32305 }, { "epoch": 3.8309024072097713, "grad_norm": 0.40129540639602457, "learning_rate": 2.3396110691952523e-07, "loss": 0.0198, "step": 32306 }, { "epoch": 3.831020988971896, "grad_norm": 0.3388478780161658, "learning_rate": 2.336335976493409e-07, "loss": 0.0108, "step": 32307 }, { "epoch": 3.8311395707340212, "grad_norm": 0.7284856966021265, "learning_rate": 2.3330631669468416e-07, "loss": 0.0396, "step": 32308 }, { "epoch": 3.831258152496146, "grad_norm": 0.5221460312687791, "learning_rate": 2.3297926405857208e-07, "loss": 0.0293, "step": 32309 }, { "epoch": 3.831376734258271, "grad_norm": 0.7147102422588549, "learning_rate": 2.3265243974401885e-07, "loss": 0.0309, "step": 32310 }, { "epoch": 3.831495316020396, "grad_norm": 0.5817799787250829, "learning_rate": 2.3232584375404154e-07, "loss": 0.0302, "step": 32311 }, { "epoch": 3.831613897782521, "grad_norm": 0.3506905357198626, "learning_rate": 2.319994760916433e-07, "loss": 0.0134, "step": 32312 }, { "epoch": 3.831732479544646, "grad_norm": 0.996652070329875, "learning_rate": 2.3167333675984116e-07, "loss": 0.0425, "step": 32313 }, { "epoch": 3.831851061306771, "grad_norm": 0.5155146310844564, "learning_rate": 2.3134742576163548e-07, "loss": 0.0244, "step": 32314 }, { "epoch": 3.831969643068896, "grad_norm": 0.5365646292910884, "learning_rate": 2.3102174310003778e-07, "loss": 0.024, "step": 32315 }, { "epoch": 3.832088224831021, "grad_norm": 0.6071381644504981, "learning_rate": 2.3069628877804005e-07, "loss": 0.022, "step": 32316 }, { "epoch": 3.832206806593146, "grad_norm": 1.2627758771508857, "learning_rate": 2.3037106279865107e-07, "loss": 0.0536, "step": 32317 }, { "epoch": 3.832325388355271, "grad_norm": 0.43219750846162674, "learning_rate": 2.300460651648656e-07, "loss": 0.0232, "step": 32318 }, { "epoch": 3.832443970117396, "grad_norm": 0.4439363793870995, "learning_rate": 2.2972129587968406e-07, "loss": 0.0183, "step": 32319 }, { "epoch": 3.832562551879521, "grad_norm": 0.5287400742560479, "learning_rate": 2.2939675494609293e-07, "loss": 0.0202, "step": 32320 }, { "epoch": 3.8326811336416458, "grad_norm": 0.7413513658735347, "learning_rate": 2.290724423670898e-07, "loss": 0.0294, "step": 32321 }, { "epoch": 3.832799715403771, "grad_norm": 0.795336445165287, "learning_rate": 2.287483581456612e-07, "loss": 0.047, "step": 32322 }, { "epoch": 3.8329182971658957, "grad_norm": 0.38643518473895844, "learning_rate": 2.2842450228479638e-07, "loss": 0.0125, "step": 32323 }, { "epoch": 3.833036878928021, "grad_norm": 0.5877433443800362, "learning_rate": 2.2810087478748188e-07, "loss": 0.0258, "step": 32324 }, { "epoch": 3.833155460690146, "grad_norm": 0.3649339330233206, "learning_rate": 2.2777747565669582e-07, "loss": 0.0146, "step": 32325 }, { "epoch": 3.833274042452271, "grad_norm": 0.7909432419952296, "learning_rate": 2.274543048954303e-07, "loss": 0.0389, "step": 32326 }, { "epoch": 3.8333926242143956, "grad_norm": 0.4332844329127058, "learning_rate": 2.2713136250665513e-07, "loss": 0.0244, "step": 32327 }, { "epoch": 3.833511205976521, "grad_norm": 0.5120039634716914, "learning_rate": 2.2680864849335127e-07, "loss": 0.0224, "step": 32328 }, { "epoch": 3.833629787738646, "grad_norm": 0.4712278762103016, "learning_rate": 2.2648616285848855e-07, "loss": 0.0195, "step": 32329 }, { "epoch": 3.833748369500771, "grad_norm": 0.7766692981044111, "learning_rate": 2.2616390560504795e-07, "loss": 0.0281, "step": 32330 }, { "epoch": 3.8338669512628956, "grad_norm": 0.5286624079002294, "learning_rate": 2.2584187673599655e-07, "loss": 0.0217, "step": 32331 }, { "epoch": 3.8339855330250208, "grad_norm": 0.6403666871471871, "learning_rate": 2.2552007625430137e-07, "loss": 0.0246, "step": 32332 }, { "epoch": 3.834104114787146, "grad_norm": 0.4829232308447923, "learning_rate": 2.2519850416292954e-07, "loss": 0.0197, "step": 32333 }, { "epoch": 3.8342226965492707, "grad_norm": 0.6192070967103722, "learning_rate": 2.248771604648481e-07, "loss": 0.0383, "step": 32334 }, { "epoch": 3.8343412783113955, "grad_norm": 0.7137885539493596, "learning_rate": 2.245560451630213e-07, "loss": 0.0277, "step": 32335 }, { "epoch": 3.8344598600735207, "grad_norm": 0.4452909657052653, "learning_rate": 2.2423515826039965e-07, "loss": 0.0191, "step": 32336 }, { "epoch": 3.834578441835646, "grad_norm": 0.47283426995762634, "learning_rate": 2.2391449975995294e-07, "loss": 0.0217, "step": 32337 }, { "epoch": 3.8346970235977706, "grad_norm": 0.39664387973463916, "learning_rate": 2.235940696646316e-07, "loss": 0.0208, "step": 32338 }, { "epoch": 3.8348156053598954, "grad_norm": 0.41702542274493526, "learning_rate": 2.2327386797738882e-07, "loss": 0.0213, "step": 32339 }, { "epoch": 3.8349341871220206, "grad_norm": 0.5508572236197197, "learning_rate": 2.22953894701175e-07, "loss": 0.0205, "step": 32340 }, { "epoch": 3.835052768884146, "grad_norm": 0.44923706792120477, "learning_rate": 2.2263414983894894e-07, "loss": 0.0191, "step": 32341 }, { "epoch": 3.8351713506462706, "grad_norm": 0.45073190693255993, "learning_rate": 2.223146333936471e-07, "loss": 0.0162, "step": 32342 }, { "epoch": 3.8352899324083953, "grad_norm": 0.45967669959037893, "learning_rate": 2.2199534536821997e-07, "loss": 0.0234, "step": 32343 }, { "epoch": 3.8354085141705205, "grad_norm": 0.6585255123591681, "learning_rate": 2.216762857656124e-07, "loss": 0.0386, "step": 32344 }, { "epoch": 3.8355270959326457, "grad_norm": 0.48906955834510696, "learning_rate": 2.2135745458876368e-07, "loss": 0.0296, "step": 32345 }, { "epoch": 3.8356456776947705, "grad_norm": 0.8845155615688414, "learning_rate": 2.210388518406159e-07, "loss": 0.0459, "step": 32346 }, { "epoch": 3.8357642594568953, "grad_norm": 0.4131396724736266, "learning_rate": 2.2072047752410006e-07, "loss": 0.0238, "step": 32347 }, { "epoch": 3.8358828412190205, "grad_norm": 0.5633948454355348, "learning_rate": 2.2040233164215828e-07, "loss": 0.0264, "step": 32348 }, { "epoch": 3.8360014229811457, "grad_norm": 0.49629830813422643, "learning_rate": 2.2008441419772152e-07, "loss": 0.0201, "step": 32349 }, { "epoch": 3.8361200047432704, "grad_norm": 0.45355420009477887, "learning_rate": 2.19766725193718e-07, "loss": 0.0245, "step": 32350 }, { "epoch": 3.8362385865053956, "grad_norm": 0.6899145395035517, "learning_rate": 2.1944926463307314e-07, "loss": 0.0325, "step": 32351 }, { "epoch": 3.8363571682675204, "grad_norm": 0.5581049053152226, "learning_rate": 2.191320325187235e-07, "loss": 0.0202, "step": 32352 }, { "epoch": 3.8364757500296456, "grad_norm": 0.2885810651338321, "learning_rate": 2.188150288535862e-07, "loss": 0.0084, "step": 32353 }, { "epoch": 3.8365943317917703, "grad_norm": 0.7734100330151826, "learning_rate": 2.1849825364058663e-07, "loss": 0.0443, "step": 32354 }, { "epoch": 3.8367129135538955, "grad_norm": 1.10115929030448, "learning_rate": 2.1818170688264195e-07, "loss": 0.0491, "step": 32355 }, { "epoch": 3.8368314953160203, "grad_norm": 0.5859224093376044, "learning_rate": 2.178653885826748e-07, "loss": 0.0288, "step": 32356 }, { "epoch": 3.8369500770781455, "grad_norm": 0.5158527427362601, "learning_rate": 2.1754929874359954e-07, "loss": 0.0241, "step": 32357 }, { "epoch": 3.8370686588402703, "grad_norm": 0.44969704539666805, "learning_rate": 2.172334373683249e-07, "loss": 0.0143, "step": 32358 }, { "epoch": 3.8371872406023955, "grad_norm": 0.3992747353764825, "learning_rate": 2.1691780445977085e-07, "loss": 0.0163, "step": 32359 }, { "epoch": 3.8373058223645202, "grad_norm": 0.8138595722376765, "learning_rate": 2.166024000208433e-07, "loss": 0.0364, "step": 32360 }, { "epoch": 3.8374244041266454, "grad_norm": 0.33361677727612754, "learning_rate": 2.1628722405445112e-07, "loss": 0.016, "step": 32361 }, { "epoch": 3.83754298588877, "grad_norm": 0.7668842110876931, "learning_rate": 2.159722765634975e-07, "loss": 0.038, "step": 32362 }, { "epoch": 3.8376615676508954, "grad_norm": 0.6187897984533207, "learning_rate": 2.1565755755088568e-07, "loss": 0.0196, "step": 32363 }, { "epoch": 3.83778014941302, "grad_norm": 0.748712181316043, "learning_rate": 2.1534306701952168e-07, "loss": 0.0516, "step": 32364 }, { "epoch": 3.8378987311751454, "grad_norm": 0.5461003813385163, "learning_rate": 2.1502880497230037e-07, "loss": 0.0264, "step": 32365 }, { "epoch": 3.83801731293727, "grad_norm": 0.5668294833245395, "learning_rate": 2.1471477141211948e-07, "loss": 0.023, "step": 32366 }, { "epoch": 3.8381358946993953, "grad_norm": 0.5665004351152113, "learning_rate": 2.1440096634187668e-07, "loss": 0.0313, "step": 32367 }, { "epoch": 3.83825447646152, "grad_norm": 0.640790635943515, "learning_rate": 2.140873897644613e-07, "loss": 0.0267, "step": 32368 }, { "epoch": 3.8383730582236453, "grad_norm": 0.4959819787139154, "learning_rate": 2.1377404168276825e-07, "loss": 0.0162, "step": 32369 }, { "epoch": 3.83849163998577, "grad_norm": 0.7722929892819221, "learning_rate": 2.134609220996814e-07, "loss": 0.0221, "step": 32370 }, { "epoch": 3.8386102217478952, "grad_norm": 0.3919902740813167, "learning_rate": 2.1314803101808723e-07, "loss": 0.0192, "step": 32371 }, { "epoch": 3.83872880351002, "grad_norm": 0.5329213993672982, "learning_rate": 2.1283536844087514e-07, "loss": 0.0238, "step": 32372 }, { "epoch": 3.838847385272145, "grad_norm": 0.5419014964382625, "learning_rate": 2.1252293437092619e-07, "loss": 0.0226, "step": 32373 }, { "epoch": 3.8389659670342704, "grad_norm": 0.8393930749092957, "learning_rate": 2.122107288111158e-07, "loss": 0.0393, "step": 32374 }, { "epoch": 3.839084548796395, "grad_norm": 0.5069578136771319, "learning_rate": 2.1189875176433062e-07, "loss": 0.0352, "step": 32375 }, { "epoch": 3.83920313055852, "grad_norm": 0.5398842269530586, "learning_rate": 2.115870032334377e-07, "loss": 0.0257, "step": 32376 }, { "epoch": 3.839321712320645, "grad_norm": 0.41577663226672046, "learning_rate": 2.112754832213154e-07, "loss": 0.0143, "step": 32377 }, { "epoch": 3.8394402940827703, "grad_norm": 0.37424788037548695, "learning_rate": 2.1096419173083637e-07, "loss": 0.0121, "step": 32378 }, { "epoch": 3.839558875844895, "grad_norm": 0.5706147570145693, "learning_rate": 2.1065312876487053e-07, "loss": 0.0259, "step": 32379 }, { "epoch": 3.83967745760702, "grad_norm": 0.6823555673122391, "learning_rate": 2.1034229432628228e-07, "loss": 0.0282, "step": 32380 }, { "epoch": 3.839796039369145, "grad_norm": 0.46776208204083536, "learning_rate": 2.1003168841793875e-07, "loss": 0.0226, "step": 32381 }, { "epoch": 3.8399146211312702, "grad_norm": 0.320374594414204, "learning_rate": 2.0972131104270432e-07, "loss": 0.0159, "step": 32382 }, { "epoch": 3.840033202893395, "grad_norm": 0.942670068062788, "learning_rate": 2.094111622034378e-07, "loss": 0.0282, "step": 32383 }, { "epoch": 3.8401517846555198, "grad_norm": 0.5970470372927648, "learning_rate": 2.0910124190300363e-07, "loss": 0.0293, "step": 32384 }, { "epoch": 3.840270366417645, "grad_norm": 0.8421805610361163, "learning_rate": 2.0879155014425222e-07, "loss": 0.0298, "step": 32385 }, { "epoch": 3.84038894817977, "grad_norm": 0.5919909256992192, "learning_rate": 2.0848208693004524e-07, "loss": 0.0229, "step": 32386 }, { "epoch": 3.840507529941895, "grad_norm": 0.6095293608635124, "learning_rate": 2.0817285226322757e-07, "loss": 0.0206, "step": 32387 }, { "epoch": 3.8406261117040197, "grad_norm": 0.6157739750354995, "learning_rate": 2.078638461466581e-07, "loss": 0.0274, "step": 32388 }, { "epoch": 3.840744693466145, "grad_norm": 0.9470912813368867, "learning_rate": 2.0755506858317897e-07, "loss": 0.0367, "step": 32389 }, { "epoch": 3.84086327522827, "grad_norm": 0.4976669549629656, "learning_rate": 2.0724651957564344e-07, "loss": 0.0243, "step": 32390 }, { "epoch": 3.840981856990395, "grad_norm": 0.5944447079596484, "learning_rate": 2.0693819912688816e-07, "loss": 0.0288, "step": 32391 }, { "epoch": 3.8411004387525196, "grad_norm": 0.5289208929120277, "learning_rate": 2.0663010723976084e-07, "loss": 0.0259, "step": 32392 }, { "epoch": 3.841219020514645, "grad_norm": 0.260616565795419, "learning_rate": 2.0632224391710086e-07, "loss": 0.0091, "step": 32393 }, { "epoch": 3.84133760227677, "grad_norm": 0.9564587101313327, "learning_rate": 2.0601460916174485e-07, "loss": 0.0478, "step": 32394 }, { "epoch": 3.8414561840388948, "grad_norm": 0.5705764190186831, "learning_rate": 2.0570720297653224e-07, "loss": 0.0304, "step": 32395 }, { "epoch": 3.8415747658010195, "grad_norm": 0.38597600913363606, "learning_rate": 2.0540002536429125e-07, "loss": 0.0127, "step": 32396 }, { "epoch": 3.8416933475631447, "grad_norm": 0.4645373001196363, "learning_rate": 2.0509307632785856e-07, "loss": 0.0244, "step": 32397 }, { "epoch": 3.84181192932527, "grad_norm": 0.46939395527594713, "learning_rate": 2.0478635587005968e-07, "loss": 0.0263, "step": 32398 }, { "epoch": 3.8419305110873947, "grad_norm": 0.4046067665307863, "learning_rate": 2.0447986399372843e-07, "loss": 0.0204, "step": 32399 }, { "epoch": 3.84204909284952, "grad_norm": 0.745602918596502, "learning_rate": 2.0417360070168478e-07, "loss": 0.0312, "step": 32400 }, { "epoch": 3.8421676746116447, "grad_norm": 0.5495547866683418, "learning_rate": 2.0386756599675427e-07, "loss": 0.022, "step": 32401 }, { "epoch": 3.84228625637377, "grad_norm": 0.4852183612633113, "learning_rate": 2.0356175988175686e-07, "loss": 0.014, "step": 32402 }, { "epoch": 3.8424048381358946, "grad_norm": 0.695991659181916, "learning_rate": 2.032561823595125e-07, "loss": 0.0342, "step": 32403 }, { "epoch": 3.84252341989802, "grad_norm": 0.3751964876219734, "learning_rate": 2.029508334328384e-07, "loss": 0.0197, "step": 32404 }, { "epoch": 3.8426420016601446, "grad_norm": 0.44743208858948774, "learning_rate": 2.026457131045517e-07, "loss": 0.0157, "step": 32405 }, { "epoch": 3.84276058342227, "grad_norm": 0.5799259588415134, "learning_rate": 2.0234082137746134e-07, "loss": 0.0238, "step": 32406 }, { "epoch": 3.8428791651843945, "grad_norm": 0.5503692584630047, "learning_rate": 2.0203615825437894e-07, "loss": 0.0327, "step": 32407 }, { "epoch": 3.8429977469465197, "grad_norm": 0.5392787348008629, "learning_rate": 2.017317237381161e-07, "loss": 0.0193, "step": 32408 }, { "epoch": 3.8431163287086445, "grad_norm": 0.5068360982230334, "learning_rate": 2.0142751783147617e-07, "loss": 0.0273, "step": 32409 }, { "epoch": 3.8432349104707697, "grad_norm": 0.38108833450043117, "learning_rate": 2.0112354053726524e-07, "loss": 0.015, "step": 32410 }, { "epoch": 3.8433534922328945, "grad_norm": 0.4784941202046757, "learning_rate": 2.0081979185828105e-07, "loss": 0.0187, "step": 32411 }, { "epoch": 3.8434720739950197, "grad_norm": 0.43166704553536756, "learning_rate": 2.0051627179733247e-07, "loss": 0.0187, "step": 32412 }, { "epoch": 3.8435906557571444, "grad_norm": 0.6195356627914599, "learning_rate": 2.0021298035720894e-07, "loss": 0.0302, "step": 32413 }, { "epoch": 3.8437092375192696, "grad_norm": 0.5782298081269911, "learning_rate": 1.9990991754071098e-07, "loss": 0.0242, "step": 32414 }, { "epoch": 3.8438278192813944, "grad_norm": 0.6559712124181133, "learning_rate": 1.996070833506336e-07, "loss": 0.0262, "step": 32415 }, { "epoch": 3.8439464010435196, "grad_norm": 0.5415918139861473, "learning_rate": 1.9930447778976625e-07, "loss": 0.0195, "step": 32416 }, { "epoch": 3.8440649828056443, "grad_norm": 0.6286716357021125, "learning_rate": 1.990021008608983e-07, "loss": 0.0239, "step": 32417 }, { "epoch": 3.8441835645677696, "grad_norm": 0.5889221989551054, "learning_rate": 1.986999525668165e-07, "loss": 0.0237, "step": 32418 }, { "epoch": 3.8443021463298943, "grad_norm": 0.4058739667662757, "learning_rate": 1.9839803291031024e-07, "loss": 0.0195, "step": 32419 }, { "epoch": 3.8444207280920195, "grad_norm": 0.44022749871706945, "learning_rate": 1.9809634189416059e-07, "loss": 0.016, "step": 32420 }, { "epoch": 3.8445393098541443, "grad_norm": 0.6056911463820162, "learning_rate": 1.9779487952114596e-07, "loss": 0.0285, "step": 32421 }, { "epoch": 3.8446578916162695, "grad_norm": 0.6111791377605957, "learning_rate": 1.9749364579405015e-07, "loss": 0.0247, "step": 32422 }, { "epoch": 3.8447764733783947, "grad_norm": 0.316414865125287, "learning_rate": 1.9719264071564602e-07, "loss": 0.0128, "step": 32423 }, { "epoch": 3.8448950551405194, "grad_norm": 0.3746931675252892, "learning_rate": 1.9689186428871464e-07, "loss": 0.015, "step": 32424 }, { "epoch": 3.845013636902644, "grad_norm": 0.6137069156938877, "learning_rate": 1.9659131651602046e-07, "loss": 0.0257, "step": 32425 }, { "epoch": 3.8451322186647694, "grad_norm": 0.6055044063683356, "learning_rate": 1.9629099740034185e-07, "loss": 0.0316, "step": 32426 }, { "epoch": 3.8452508004268946, "grad_norm": 0.43292167642492774, "learning_rate": 1.9599090694444044e-07, "loss": 0.0209, "step": 32427 }, { "epoch": 3.8453693821890194, "grad_norm": 0.3897214549769648, "learning_rate": 1.9569104515108906e-07, "loss": 0.0172, "step": 32428 }, { "epoch": 3.845487963951144, "grad_norm": 0.6769813231789346, "learning_rate": 1.9539141202304657e-07, "loss": 0.0305, "step": 32429 }, { "epoch": 3.8456065457132693, "grad_norm": 0.7810981482468868, "learning_rate": 1.9509200756308022e-07, "loss": 0.031, "step": 32430 }, { "epoch": 3.8457251274753945, "grad_norm": 0.7005575857424886, "learning_rate": 1.9479283177394614e-07, "loss": 0.0352, "step": 32431 }, { "epoch": 3.8458437092375193, "grad_norm": 0.8022310233210902, "learning_rate": 1.94493884658406e-07, "loss": 0.0368, "step": 32432 }, { "epoch": 3.845962290999644, "grad_norm": 0.45230965356846176, "learning_rate": 1.941951662192104e-07, "loss": 0.0241, "step": 32433 }, { "epoch": 3.8460808727617692, "grad_norm": 0.527165576824917, "learning_rate": 1.9389667645911824e-07, "loss": 0.0247, "step": 32434 }, { "epoch": 3.8461994545238944, "grad_norm": 0.3918215542351256, "learning_rate": 1.9359841538088007e-07, "loss": 0.0213, "step": 32435 }, { "epoch": 3.846318036286019, "grad_norm": 0.5836700055011448, "learning_rate": 1.9330038298724652e-07, "loss": 0.022, "step": 32436 }, { "epoch": 3.846436618048144, "grad_norm": 0.45923216328301647, "learning_rate": 1.9300257928095978e-07, "loss": 0.0188, "step": 32437 }, { "epoch": 3.846555199810269, "grad_norm": 0.4411234959704554, "learning_rate": 1.9270500426477045e-07, "loss": 0.0233, "step": 32438 }, { "epoch": 3.8466737815723944, "grad_norm": 0.3559420469478563, "learning_rate": 1.924076579414208e-07, "loss": 0.0119, "step": 32439 }, { "epoch": 3.846792363334519, "grad_norm": 0.4150036982637551, "learning_rate": 1.9211054031365028e-07, "loss": 0.0168, "step": 32440 }, { "epoch": 3.846910945096644, "grad_norm": 0.5992505175889953, "learning_rate": 1.9181365138420115e-07, "loss": 0.0229, "step": 32441 }, { "epoch": 3.847029526858769, "grad_norm": 0.5176666053278863, "learning_rate": 1.9151699115580734e-07, "loss": 0.0229, "step": 32442 }, { "epoch": 3.8471481086208943, "grad_norm": 0.8086161546212984, "learning_rate": 1.912205596312028e-07, "loss": 0.0343, "step": 32443 }, { "epoch": 3.847266690383019, "grad_norm": 0.6046652225814486, "learning_rate": 1.9092435681312414e-07, "loss": 0.022, "step": 32444 }, { "epoch": 3.847385272145144, "grad_norm": 0.8114257466274432, "learning_rate": 1.9062838270429984e-07, "loss": 0.0312, "step": 32445 }, { "epoch": 3.847503853907269, "grad_norm": 0.6875192022926029, "learning_rate": 1.903326373074582e-07, "loss": 0.041, "step": 32446 }, { "epoch": 3.847622435669394, "grad_norm": 0.8612741673163943, "learning_rate": 1.9003712062532764e-07, "loss": 0.0484, "step": 32447 }, { "epoch": 3.847741017431519, "grad_norm": 0.32424420082075417, "learning_rate": 1.8974183266062816e-07, "loss": 0.0132, "step": 32448 }, { "epoch": 3.847859599193644, "grad_norm": 0.8894203285024376, "learning_rate": 1.8944677341608541e-07, "loss": 0.0349, "step": 32449 }, { "epoch": 3.847978180955769, "grad_norm": 0.3878409849022033, "learning_rate": 1.8915194289442216e-07, "loss": 0.0163, "step": 32450 }, { "epoch": 3.848096762717894, "grad_norm": 0.5626582778284627, "learning_rate": 1.888573410983474e-07, "loss": 0.0333, "step": 32451 }, { "epoch": 3.848215344480019, "grad_norm": 0.3501479499073927, "learning_rate": 1.885629680305867e-07, "loss": 0.0215, "step": 32452 }, { "epoch": 3.848333926242144, "grad_norm": 0.5911750998473173, "learning_rate": 1.8826882369384624e-07, "loss": 0.0197, "step": 32453 }, { "epoch": 3.848452508004269, "grad_norm": 0.38187865855431136, "learning_rate": 1.8797490809084328e-07, "loss": 0.0172, "step": 32454 }, { "epoch": 3.848571089766394, "grad_norm": 0.4410255142760647, "learning_rate": 1.8768122122428679e-07, "loss": 0.0151, "step": 32455 }, { "epoch": 3.848689671528519, "grad_norm": 0.48696846659020765, "learning_rate": 1.8738776309687733e-07, "loss": 0.0182, "step": 32456 }, { "epoch": 3.848808253290644, "grad_norm": 0.5948873012603836, "learning_rate": 1.8709453371132945e-07, "loss": 0.0207, "step": 32457 }, { "epoch": 3.848926835052769, "grad_norm": 0.35413762583935327, "learning_rate": 1.8680153307034376e-07, "loss": 0.0166, "step": 32458 }, { "epoch": 3.849045416814894, "grad_norm": 0.37258803177832117, "learning_rate": 1.865087611766181e-07, "loss": 0.0178, "step": 32459 }, { "epoch": 3.8491639985770187, "grad_norm": 0.4630914189383797, "learning_rate": 1.8621621803285027e-07, "loss": 0.0211, "step": 32460 }, { "epoch": 3.849282580339144, "grad_norm": 0.436522858326172, "learning_rate": 1.8592390364174372e-07, "loss": 0.0153, "step": 32461 }, { "epoch": 3.8494011621012687, "grad_norm": 0.5711499747552092, "learning_rate": 1.8563181800599073e-07, "loss": 0.0292, "step": 32462 }, { "epoch": 3.849519743863394, "grad_norm": 0.5225507070101192, "learning_rate": 1.853399611282808e-07, "loss": 0.0137, "step": 32463 }, { "epoch": 3.8496383256255187, "grad_norm": 0.7479684752033527, "learning_rate": 1.850483330113062e-07, "loss": 0.0516, "step": 32464 }, { "epoch": 3.849756907387644, "grad_norm": 0.23708569042951716, "learning_rate": 1.8475693365775649e-07, "loss": 0.0104, "step": 32465 }, { "epoch": 3.8498754891497686, "grad_norm": 0.5087421000897505, "learning_rate": 1.8446576307031837e-07, "loss": 0.0294, "step": 32466 }, { "epoch": 3.849994070911894, "grad_norm": 0.7338227349270761, "learning_rate": 1.8417482125167308e-07, "loss": 0.0265, "step": 32467 }, { "epoch": 3.8501126526740186, "grad_norm": 1.1878088275533103, "learning_rate": 1.838841082045073e-07, "loss": 0.0525, "step": 32468 }, { "epoch": 3.850231234436144, "grad_norm": 0.39641135134358163, "learning_rate": 1.835936239314967e-07, "loss": 0.0211, "step": 32469 }, { "epoch": 3.8503498161982685, "grad_norm": 0.46873401641381574, "learning_rate": 1.8330336843532247e-07, "loss": 0.0293, "step": 32470 }, { "epoch": 3.8504683979603938, "grad_norm": 0.5371046912586487, "learning_rate": 1.8301334171865747e-07, "loss": 0.0268, "step": 32471 }, { "epoch": 3.8505869797225185, "grad_norm": 1.118398488433352, "learning_rate": 1.8272354378417733e-07, "loss": 0.0276, "step": 32472 }, { "epoch": 3.8507055614846437, "grad_norm": 0.7860618001472475, "learning_rate": 1.824339746345549e-07, "loss": 0.0343, "step": 32473 }, { "epoch": 3.8508241432467685, "grad_norm": 0.6950512381983767, "learning_rate": 1.8214463427245475e-07, "loss": 0.029, "step": 32474 }, { "epoch": 3.8509427250088937, "grad_norm": 0.5610608052353848, "learning_rate": 1.8185552270054974e-07, "loss": 0.0223, "step": 32475 }, { "epoch": 3.851061306771019, "grad_norm": 0.7453127661761787, "learning_rate": 1.8156663992150158e-07, "loss": 0.0286, "step": 32476 }, { "epoch": 3.8511798885331436, "grad_norm": 0.7485745355913629, "learning_rate": 1.8127798593797762e-07, "loss": 0.0555, "step": 32477 }, { "epoch": 3.8512984702952684, "grad_norm": 0.6647250819633721, "learning_rate": 1.8098956075263128e-07, "loss": 0.0239, "step": 32478 }, { "epoch": 3.8514170520573936, "grad_norm": 0.6194999010174073, "learning_rate": 1.807013643681299e-07, "loss": 0.036, "step": 32479 }, { "epoch": 3.851535633819519, "grad_norm": 0.6806066517237038, "learning_rate": 1.804133967871241e-07, "loss": 0.0383, "step": 32480 }, { "epoch": 3.8516542155816436, "grad_norm": 0.5857982605502763, "learning_rate": 1.801256580122701e-07, "loss": 0.0228, "step": 32481 }, { "epoch": 3.8517727973437683, "grad_norm": 0.38283714802027696, "learning_rate": 1.7983814804622413e-07, "loss": 0.0213, "step": 32482 }, { "epoch": 3.8518913791058935, "grad_norm": 0.5286149350683463, "learning_rate": 1.7955086689163125e-07, "loss": 0.0342, "step": 32483 }, { "epoch": 3.8520099608680187, "grad_norm": 0.3323645480576516, "learning_rate": 1.7926381455114493e-07, "loss": 0.0165, "step": 32484 }, { "epoch": 3.8521285426301435, "grad_norm": 0.48823588342732377, "learning_rate": 1.7897699102741027e-07, "loss": 0.0224, "step": 32485 }, { "epoch": 3.8522471243922682, "grad_norm": 0.6212002571036752, "learning_rate": 1.7869039632306684e-07, "loss": 0.0338, "step": 32486 }, { "epoch": 3.8523657061543934, "grad_norm": 0.29447054329551997, "learning_rate": 1.7840403044076248e-07, "loss": 0.0114, "step": 32487 }, { "epoch": 3.8524842879165186, "grad_norm": 0.48922821214559636, "learning_rate": 1.78117893383134e-07, "loss": 0.019, "step": 32488 }, { "epoch": 3.8526028696786434, "grad_norm": 0.4109783988584283, "learning_rate": 1.778319851528182e-07, "loss": 0.0182, "step": 32489 }, { "epoch": 3.852721451440768, "grad_norm": 0.47963680189137675, "learning_rate": 1.7754630575245456e-07, "loss": 0.022, "step": 32490 }, { "epoch": 3.8528400332028934, "grad_norm": 0.6592529300626596, "learning_rate": 1.772608551846744e-07, "loss": 0.0379, "step": 32491 }, { "epoch": 3.8529586149650186, "grad_norm": 0.4299692830422249, "learning_rate": 1.769756334521089e-07, "loss": 0.0321, "step": 32492 }, { "epoch": 3.8530771967271433, "grad_norm": 0.5087442599535382, "learning_rate": 1.7669064055738648e-07, "loss": 0.0271, "step": 32493 }, { "epoch": 3.853195778489268, "grad_norm": 0.39857837986078615, "learning_rate": 1.7640587650313844e-07, "loss": 0.0153, "step": 32494 }, { "epoch": 3.8533143602513933, "grad_norm": 0.5718766207531726, "learning_rate": 1.7612134129198765e-07, "loss": 0.0315, "step": 32495 }, { "epoch": 3.8534329420135185, "grad_norm": 0.45342682338847656, "learning_rate": 1.75837034926557e-07, "loss": 0.0175, "step": 32496 }, { "epoch": 3.8535515237756433, "grad_norm": 0.5206439903417396, "learning_rate": 1.7555295740946665e-07, "loss": 0.0209, "step": 32497 }, { "epoch": 3.8536701055377685, "grad_norm": 0.36920806883873586, "learning_rate": 1.752691087433339e-07, "loss": 0.0131, "step": 32498 }, { "epoch": 3.853788687299893, "grad_norm": 0.5299792786256508, "learning_rate": 1.7498548893078171e-07, "loss": 0.0219, "step": 32499 }, { "epoch": 3.8539072690620184, "grad_norm": 0.7015202949460598, "learning_rate": 1.7470209797441906e-07, "loss": 0.0308, "step": 32500 }, { "epoch": 3.854025850824143, "grad_norm": 0.5965854218483934, "learning_rate": 1.7441893587686052e-07, "loss": 0.0232, "step": 32501 }, { "epoch": 3.8541444325862684, "grad_norm": 0.8258156690652769, "learning_rate": 1.7413600264071795e-07, "loss": 0.037, "step": 32502 }, { "epoch": 3.854263014348393, "grad_norm": 0.5831528520899272, "learning_rate": 1.7385329826859477e-07, "loss": 0.0251, "step": 32503 }, { "epoch": 3.8543815961105183, "grad_norm": 0.5446240532467341, "learning_rate": 1.7357082276310276e-07, "loss": 0.0193, "step": 32504 }, { "epoch": 3.854500177872643, "grad_norm": 0.5619574227027426, "learning_rate": 1.732885761268427e-07, "loss": 0.0286, "step": 32505 }, { "epoch": 3.8546187596347683, "grad_norm": 0.8018811059214683, "learning_rate": 1.7300655836241797e-07, "loss": 0.035, "step": 32506 }, { "epoch": 3.854737341396893, "grad_norm": 0.8355602577710525, "learning_rate": 1.7272476947242655e-07, "loss": 0.0335, "step": 32507 }, { "epoch": 3.8548559231590183, "grad_norm": 0.4149542216494321, "learning_rate": 1.7244320945946913e-07, "loss": 0.0206, "step": 32508 }, { "epoch": 3.854974504921143, "grad_norm": 0.4789781986789796, "learning_rate": 1.7216187832613806e-07, "loss": 0.024, "step": 32509 }, { "epoch": 3.855093086683268, "grad_norm": 0.9633668199891151, "learning_rate": 1.7188077607503127e-07, "loss": 0.0335, "step": 32510 }, { "epoch": 3.855211668445393, "grad_norm": 0.41960371595441864, "learning_rate": 1.7159990270873562e-07, "loss": 0.0175, "step": 32511 }, { "epoch": 3.855330250207518, "grad_norm": 0.4465398651398696, "learning_rate": 1.713192582298434e-07, "loss": 0.0182, "step": 32512 }, { "epoch": 3.855448831969643, "grad_norm": 0.6737734133148547, "learning_rate": 1.7103884264093872e-07, "loss": 0.0221, "step": 32513 }, { "epoch": 3.855567413731768, "grad_norm": 1.1062103766472522, "learning_rate": 1.7075865594460838e-07, "loss": 0.0275, "step": 32514 }, { "epoch": 3.855685995493893, "grad_norm": 0.5154008957115662, "learning_rate": 1.704786981434392e-07, "loss": 0.0268, "step": 32515 }, { "epoch": 3.855804577256018, "grad_norm": 0.4646780278516789, "learning_rate": 1.701989692400041e-07, "loss": 0.0226, "step": 32516 }, { "epoch": 3.855923159018143, "grad_norm": 0.4644077707148362, "learning_rate": 1.6991946923688995e-07, "loss": 0.0248, "step": 32517 }, { "epoch": 3.856041740780268, "grad_norm": 0.8093377712971662, "learning_rate": 1.6964019813666687e-07, "loss": 0.0327, "step": 32518 }, { "epoch": 3.856160322542393, "grad_norm": 0.329778899648655, "learning_rate": 1.6936115594191338e-07, "loss": 0.0137, "step": 32519 }, { "epoch": 3.856278904304518, "grad_norm": 0.5765300202802016, "learning_rate": 1.6908234265519963e-07, "loss": 0.0331, "step": 32520 }, { "epoch": 3.856397486066643, "grad_norm": 0.4052307643419008, "learning_rate": 1.6880375827909855e-07, "loss": 0.015, "step": 32521 }, { "epoch": 3.856516067828768, "grad_norm": 0.8820742402985841, "learning_rate": 1.6852540281617756e-07, "loss": 0.0409, "step": 32522 }, { "epoch": 3.8566346495908927, "grad_norm": 0.5868784695749976, "learning_rate": 1.682472762689985e-07, "loss": 0.028, "step": 32523 }, { "epoch": 3.856753231353018, "grad_norm": 0.4003930038605936, "learning_rate": 1.6796937864013153e-07, "loss": 0.0219, "step": 32524 }, { "epoch": 3.856871813115143, "grad_norm": 0.46924965321059614, "learning_rate": 1.6769170993213566e-07, "loss": 0.0241, "step": 32525 }, { "epoch": 3.856990394877268, "grad_norm": 0.6764271229089087, "learning_rate": 1.6741427014757283e-07, "loss": 0.0304, "step": 32526 }, { "epoch": 3.8571089766393927, "grad_norm": 0.6448300404506155, "learning_rate": 1.671370592889937e-07, "loss": 0.0288, "step": 32527 }, { "epoch": 3.857227558401518, "grad_norm": 0.33328457478712464, "learning_rate": 1.6686007735896292e-07, "loss": 0.0199, "step": 32528 }, { "epoch": 3.857346140163643, "grad_norm": 0.5674992149440867, "learning_rate": 1.6658332436002843e-07, "loss": 0.0211, "step": 32529 }, { "epoch": 3.857464721925768, "grad_norm": 0.4393760236942314, "learning_rate": 1.66306800294741e-07, "loss": 0.0286, "step": 32530 }, { "epoch": 3.8575833036878926, "grad_norm": 0.5375259787696522, "learning_rate": 1.660305051656541e-07, "loss": 0.0231, "step": 32531 }, { "epoch": 3.857701885450018, "grad_norm": 0.46188760276485713, "learning_rate": 1.6575443897531295e-07, "loss": 0.0236, "step": 32532 }, { "epoch": 3.857820467212143, "grad_norm": 0.4613621365248006, "learning_rate": 1.6547860172625995e-07, "loss": 0.0212, "step": 32533 }, { "epoch": 3.8579390489742678, "grad_norm": 0.5420782726536425, "learning_rate": 1.6520299342104028e-07, "loss": 0.023, "step": 32534 }, { "epoch": 3.8580576307363925, "grad_norm": 0.4945081794659402, "learning_rate": 1.6492761406219637e-07, "loss": 0.0215, "step": 32535 }, { "epoch": 3.8581762124985177, "grad_norm": 0.3272197332088455, "learning_rate": 1.6465246365226505e-07, "loss": 0.0125, "step": 32536 }, { "epoch": 3.858294794260643, "grad_norm": 0.5278203566519076, "learning_rate": 1.6437754219378322e-07, "loss": 0.0231, "step": 32537 }, { "epoch": 3.8584133760227677, "grad_norm": 0.5977466215756991, "learning_rate": 1.6410284968928215e-07, "loss": 0.026, "step": 32538 }, { "epoch": 3.8585319577848924, "grad_norm": 0.5843106118387974, "learning_rate": 1.6382838614130148e-07, "loss": 0.0278, "step": 32539 }, { "epoch": 3.8586505395470176, "grad_norm": 0.5529693488076857, "learning_rate": 1.635541515523642e-07, "loss": 0.024, "step": 32540 }, { "epoch": 3.858769121309143, "grad_norm": 0.521920872700592, "learning_rate": 1.6328014592500163e-07, "loss": 0.0287, "step": 32541 }, { "epoch": 3.8588877030712676, "grad_norm": 0.8162077326266146, "learning_rate": 1.6300636926173674e-07, "loss": 0.0334, "step": 32542 }, { "epoch": 3.8590062848333924, "grad_norm": 0.4432165542717633, "learning_rate": 1.6273282156509805e-07, "loss": 0.0188, "step": 32543 }, { "epoch": 3.8591248665955176, "grad_norm": 0.4920262360834353, "learning_rate": 1.6245950283760857e-07, "loss": 0.0255, "step": 32544 }, { "epoch": 3.8592434483576428, "grad_norm": 0.6649274853748884, "learning_rate": 1.6218641308178017e-07, "loss": 0.0302, "step": 32545 }, { "epoch": 3.8593620301197675, "grad_norm": 0.6368580936145761, "learning_rate": 1.6191355230013582e-07, "loss": 0.032, "step": 32546 }, { "epoch": 3.8594806118818927, "grad_norm": 0.5079795467659366, "learning_rate": 1.616409204951902e-07, "loss": 0.0259, "step": 32547 }, { "epoch": 3.8595991936440175, "grad_norm": 0.4636785887611869, "learning_rate": 1.6136851766945793e-07, "loss": 0.0234, "step": 32548 }, { "epoch": 3.8597177754061427, "grad_norm": 0.6456261048204028, "learning_rate": 1.6109634382544536e-07, "loss": 0.0365, "step": 32549 }, { "epoch": 3.8598363571682675, "grad_norm": 0.5272810157226054, "learning_rate": 1.6082439896566992e-07, "loss": 0.0186, "step": 32550 }, { "epoch": 3.8599549389303927, "grad_norm": 0.45842699271838383, "learning_rate": 1.605526830926296e-07, "loss": 0.0219, "step": 32551 }, { "epoch": 3.8600735206925174, "grad_norm": 0.6244543677393063, "learning_rate": 1.602811962088363e-07, "loss": 0.0296, "step": 32552 }, { "epoch": 3.8601921024546426, "grad_norm": 0.3299489140475676, "learning_rate": 1.6000993831678802e-07, "loss": 0.0138, "step": 32553 }, { "epoch": 3.8603106842167674, "grad_norm": 0.43678983474528055, "learning_rate": 1.5973890941898827e-07, "loss": 0.0188, "step": 32554 }, { "epoch": 3.8604292659788926, "grad_norm": 0.6434501662717126, "learning_rate": 1.594681095179351e-07, "loss": 0.0221, "step": 32555 }, { "epoch": 3.8605478477410173, "grad_norm": 0.5776018623464471, "learning_rate": 1.5919753861612373e-07, "loss": 0.0307, "step": 32556 }, { "epoch": 3.8606664295031425, "grad_norm": 0.42405311301419707, "learning_rate": 1.589271967160466e-07, "loss": 0.0154, "step": 32557 }, { "epoch": 3.8607850112652673, "grad_norm": 0.8726191628366329, "learning_rate": 1.5865708382020172e-07, "loss": 0.0412, "step": 32558 }, { "epoch": 3.8609035930273925, "grad_norm": 0.4526214168962555, "learning_rate": 1.58387199931076e-07, "loss": 0.0233, "step": 32559 }, { "epoch": 3.8610221747895173, "grad_norm": 0.480839757817766, "learning_rate": 1.5811754505115627e-07, "loss": 0.0183, "step": 32560 }, { "epoch": 3.8611407565516425, "grad_norm": 0.7477853852776319, "learning_rate": 1.578481191829323e-07, "loss": 0.0395, "step": 32561 }, { "epoch": 3.861259338313767, "grad_norm": 0.45251300622481844, "learning_rate": 1.575789223288826e-07, "loss": 0.0171, "step": 32562 }, { "epoch": 3.8613779200758924, "grad_norm": 0.8490845185478879, "learning_rate": 1.5730995449149133e-07, "loss": 0.0501, "step": 32563 }, { "epoch": 3.861496501838017, "grad_norm": 0.3023903083545019, "learning_rate": 1.5704121567323703e-07, "loss": 0.0083, "step": 32564 }, { "epoch": 3.8616150836001424, "grad_norm": 0.3538015505940077, "learning_rate": 1.5677270587660108e-07, "loss": 0.0102, "step": 32565 }, { "epoch": 3.861733665362267, "grad_norm": 0.6167620188295707, "learning_rate": 1.565044251040565e-07, "loss": 0.0344, "step": 32566 }, { "epoch": 3.8618522471243923, "grad_norm": 0.4158700744483799, "learning_rate": 1.5623637335807352e-07, "loss": 0.0232, "step": 32567 }, { "epoch": 3.861970828886517, "grad_norm": 0.6094921571482055, "learning_rate": 1.5596855064112791e-07, "loss": 0.0244, "step": 32568 }, { "epoch": 3.8620894106486423, "grad_norm": 0.4727696875878514, "learning_rate": 1.5570095695568443e-07, "loss": 0.0191, "step": 32569 }, { "epoch": 3.862207992410767, "grad_norm": 0.5337179392268638, "learning_rate": 1.5543359230421328e-07, "loss": 0.0368, "step": 32570 }, { "epoch": 3.8623265741728923, "grad_norm": 0.5184890661024976, "learning_rate": 1.5516645668917919e-07, "loss": 0.0246, "step": 32571 }, { "epoch": 3.862445155935017, "grad_norm": 0.8150848037929325, "learning_rate": 1.5489955011303848e-07, "loss": 0.0362, "step": 32572 }, { "epoch": 3.8625637376971422, "grad_norm": 0.38327592598594395, "learning_rate": 1.5463287257826142e-07, "loss": 0.0193, "step": 32573 }, { "epoch": 3.8626823194592674, "grad_norm": 0.5707252548096468, "learning_rate": 1.5436642408730162e-07, "loss": 0.0261, "step": 32574 }, { "epoch": 3.862800901221392, "grad_norm": 0.4644390512433499, "learning_rate": 1.5410020464261542e-07, "loss": 0.0211, "step": 32575 }, { "epoch": 3.862919482983517, "grad_norm": 0.46502698357955063, "learning_rate": 1.5383421424665645e-07, "loss": 0.0227, "step": 32576 }, { "epoch": 3.863038064745642, "grad_norm": 0.3372099655788095, "learning_rate": 1.5356845290187827e-07, "loss": 0.0134, "step": 32577 }, { "epoch": 3.8631566465077674, "grad_norm": 0.6275208783984894, "learning_rate": 1.5330292061072892e-07, "loss": 0.0265, "step": 32578 }, { "epoch": 3.863275228269892, "grad_norm": 0.3739830551335698, "learning_rate": 1.5303761737565926e-07, "loss": 0.0177, "step": 32579 }, { "epoch": 3.863393810032017, "grad_norm": 0.3977507847107187, "learning_rate": 1.527725431991145e-07, "loss": 0.0209, "step": 32580 }, { "epoch": 3.863512391794142, "grad_norm": 0.4574066676901986, "learning_rate": 1.5250769808353438e-07, "loss": 0.0177, "step": 32581 }, { "epoch": 3.8636309735562673, "grad_norm": 0.8739498626552537, "learning_rate": 1.5224308203136418e-07, "loss": 0.0483, "step": 32582 }, { "epoch": 3.863749555318392, "grad_norm": 0.36832713544731427, "learning_rate": 1.519786950450408e-07, "loss": 0.0192, "step": 32583 }, { "epoch": 3.863868137080517, "grad_norm": 0.31205952273542736, "learning_rate": 1.517145371270068e-07, "loss": 0.0152, "step": 32584 }, { "epoch": 3.863986718842642, "grad_norm": 0.5511660524477593, "learning_rate": 1.514506082796935e-07, "loss": 0.0217, "step": 32585 }, { "epoch": 3.864105300604767, "grad_norm": 0.45495838503889174, "learning_rate": 1.511869085055323e-07, "loss": 0.0287, "step": 32586 }, { "epoch": 3.864223882366892, "grad_norm": 0.43821984145677745, "learning_rate": 1.509234378069546e-07, "loss": 0.0185, "step": 32587 }, { "epoch": 3.8643424641290167, "grad_norm": 0.4417145967653241, "learning_rate": 1.506601961863946e-07, "loss": 0.0209, "step": 32588 }, { "epoch": 3.864461045891142, "grad_norm": 0.5058744809609778, "learning_rate": 1.503971836462753e-07, "loss": 0.0195, "step": 32589 }, { "epoch": 3.864579627653267, "grad_norm": 0.5387575487223066, "learning_rate": 1.501344001890198e-07, "loss": 0.0234, "step": 32590 }, { "epoch": 3.864698209415392, "grad_norm": 0.5388228200467652, "learning_rate": 1.4987184581705116e-07, "loss": 0.0208, "step": 32591 }, { "epoch": 3.8648167911775166, "grad_norm": 0.36665198569599994, "learning_rate": 1.496095205327924e-07, "loss": 0.0186, "step": 32592 }, { "epoch": 3.864935372939642, "grad_norm": 0.3508647444401475, "learning_rate": 1.4934742433865833e-07, "loss": 0.0188, "step": 32593 }, { "epoch": 3.865053954701767, "grad_norm": 0.636859849319127, "learning_rate": 1.490855572370692e-07, "loss": 0.0493, "step": 32594 }, { "epoch": 3.865172536463892, "grad_norm": 0.6016004795352826, "learning_rate": 1.4882391923043693e-07, "loss": 0.024, "step": 32595 }, { "epoch": 3.8652911182260166, "grad_norm": 0.42879046148625155, "learning_rate": 1.4856251032117353e-07, "loss": 0.014, "step": 32596 }, { "epoch": 3.8654096999881418, "grad_norm": 0.46020555330281576, "learning_rate": 1.4830133051168816e-07, "loss": 0.0192, "step": 32597 }, { "epoch": 3.865528281750267, "grad_norm": 0.667127191127804, "learning_rate": 1.4804037980439002e-07, "loss": 0.0319, "step": 32598 }, { "epoch": 3.8656468635123917, "grad_norm": 0.9748382091368105, "learning_rate": 1.4777965820168272e-07, "loss": 0.0422, "step": 32599 }, { "epoch": 3.865765445274517, "grad_norm": 0.44772435688266465, "learning_rate": 1.475191657059727e-07, "loss": 0.0199, "step": 32600 }, { "epoch": 3.8658840270366417, "grad_norm": 0.4299117651862585, "learning_rate": 1.47258902319658e-07, "loss": 0.0226, "step": 32601 }, { "epoch": 3.866002608798767, "grad_norm": 0.322586282202831, "learning_rate": 1.4699886804514228e-07, "loss": 0.0184, "step": 32602 }, { "epoch": 3.8661211905608917, "grad_norm": 0.5820923736145253, "learning_rate": 1.467390628848181e-07, "loss": 0.0252, "step": 32603 }, { "epoch": 3.866239772323017, "grad_norm": 0.2807807620307794, "learning_rate": 1.4647948684108626e-07, "loss": 0.0138, "step": 32604 }, { "epoch": 3.8663583540851416, "grad_norm": 0.3843646765834339, "learning_rate": 1.4622013991633099e-07, "loss": 0.0119, "step": 32605 }, { "epoch": 3.866476935847267, "grad_norm": 0.614756510278305, "learning_rate": 1.4596102211295316e-07, "loss": 0.0293, "step": 32606 }, { "epoch": 3.8665955176093916, "grad_norm": 0.5938635103656196, "learning_rate": 1.4570213343333416e-07, "loss": 0.0217, "step": 32607 }, { "epoch": 3.8667140993715168, "grad_norm": 0.397662247879681, "learning_rate": 1.454434738798638e-07, "loss": 0.0203, "step": 32608 }, { "epoch": 3.8668326811336415, "grad_norm": 0.557100193557595, "learning_rate": 1.4518504345492346e-07, "loss": 0.0214, "step": 32609 }, { "epoch": 3.8669512628957667, "grad_norm": 0.39168045151340314, "learning_rate": 1.4492684216090012e-07, "loss": 0.0154, "step": 32610 }, { "epoch": 3.8670698446578915, "grad_norm": 0.5273518975725061, "learning_rate": 1.446688700001725e-07, "loss": 0.0188, "step": 32611 }, { "epoch": 3.8671884264200167, "grad_norm": 0.7006204166364842, "learning_rate": 1.444111269751164e-07, "loss": 0.0272, "step": 32612 }, { "epoch": 3.8673070081821415, "grad_norm": 0.5222318819563422, "learning_rate": 1.4415361308811327e-07, "loss": 0.0237, "step": 32613 }, { "epoch": 3.8674255899442667, "grad_norm": 0.4233017187079948, "learning_rate": 1.4389632834153065e-07, "loss": 0.0153, "step": 32614 }, { "epoch": 3.8675441717063914, "grad_norm": 0.49248646272515284, "learning_rate": 1.4363927273774446e-07, "loss": 0.0192, "step": 32615 }, { "epoch": 3.8676627534685166, "grad_norm": 0.34932417469521815, "learning_rate": 1.4338244627912222e-07, "loss": 0.0162, "step": 32616 }, { "epoch": 3.8677813352306414, "grad_norm": 0.3872661797413739, "learning_rate": 1.4312584896803426e-07, "loss": 0.015, "step": 32617 }, { "epoch": 3.8678999169927666, "grad_norm": 0.4833310323065501, "learning_rate": 1.4286948080684259e-07, "loss": 0.0196, "step": 32618 }, { "epoch": 3.8680184987548913, "grad_norm": 0.7362968660159754, "learning_rate": 1.4261334179791197e-07, "loss": 0.0348, "step": 32619 }, { "epoch": 3.8681370805170165, "grad_norm": 0.4174521369125346, "learning_rate": 1.4235743194360718e-07, "loss": 0.0165, "step": 32620 }, { "epoch": 3.8682556622791413, "grad_norm": 0.3375835263670387, "learning_rate": 1.4210175124628188e-07, "loss": 0.0133, "step": 32621 }, { "epoch": 3.8683742440412665, "grad_norm": 0.6463917733944984, "learning_rate": 1.418462997082981e-07, "loss": 0.0329, "step": 32622 }, { "epoch": 3.8684928258033917, "grad_norm": 0.9757135490783077, "learning_rate": 1.4159107733200672e-07, "loss": 0.053, "step": 32623 }, { "epoch": 3.8686114075655165, "grad_norm": 0.43964253269988895, "learning_rate": 1.413360841197614e-07, "loss": 0.0206, "step": 32624 }, { "epoch": 3.8687299893276412, "grad_norm": 0.5729133169003059, "learning_rate": 1.4108132007391584e-07, "loss": 0.0286, "step": 32625 }, { "epoch": 3.8688485710897664, "grad_norm": 0.6183637340618612, "learning_rate": 1.4082678519681537e-07, "loss": 0.0293, "step": 32626 }, { "epoch": 3.8689671528518916, "grad_norm": 0.3439875413132369, "learning_rate": 1.405724794908081e-07, "loss": 0.0152, "step": 32627 }, { "epoch": 3.8690857346140164, "grad_norm": 0.4444873425787215, "learning_rate": 1.4031840295823662e-07, "loss": 0.0231, "step": 32628 }, { "epoch": 3.869204316376141, "grad_norm": 0.29338984331864887, "learning_rate": 1.400645556014435e-07, "loss": 0.0154, "step": 32629 }, { "epoch": 3.8693228981382664, "grad_norm": 0.4876335223974506, "learning_rate": 1.3981093742277129e-07, "loss": 0.0221, "step": 32630 }, { "epoch": 3.8694414799003916, "grad_norm": 0.4263178096265252, "learning_rate": 1.3955754842455704e-07, "loss": 0.0184, "step": 32631 }, { "epoch": 3.8695600616625163, "grad_norm": 0.38948734488735515, "learning_rate": 1.3930438860913775e-07, "loss": 0.017, "step": 32632 }, { "epoch": 3.869678643424641, "grad_norm": 0.43797553012151835, "learning_rate": 1.390514579788449e-07, "loss": 0.027, "step": 32633 }, { "epoch": 3.8697972251867663, "grad_norm": 0.581059440215136, "learning_rate": 1.3879875653600993e-07, "loss": 0.0266, "step": 32634 }, { "epoch": 3.8699158069488915, "grad_norm": 0.7273038854572882, "learning_rate": 1.3854628428296435e-07, "loss": 0.0436, "step": 32635 }, { "epoch": 3.8700343887110162, "grad_norm": 0.6272042933103779, "learning_rate": 1.3829404122203404e-07, "loss": 0.0195, "step": 32636 }, { "epoch": 3.870152970473141, "grad_norm": 0.5801904446495383, "learning_rate": 1.3804202735554772e-07, "loss": 0.0266, "step": 32637 }, { "epoch": 3.870271552235266, "grad_norm": 0.5114057206037644, "learning_rate": 1.3779024268582296e-07, "loss": 0.0222, "step": 32638 }, { "epoch": 3.8703901339973914, "grad_norm": 0.483942764237893, "learning_rate": 1.375386872151857e-07, "loss": 0.0215, "step": 32639 }, { "epoch": 3.870508715759516, "grad_norm": 0.4259764777135623, "learning_rate": 1.3728736094595352e-07, "loss": 0.02, "step": 32640 }, { "epoch": 3.870627297521641, "grad_norm": 0.5569648023364093, "learning_rate": 1.37036263880444e-07, "loss": 0.022, "step": 32641 }, { "epoch": 3.870745879283766, "grad_norm": 0.6299291923501396, "learning_rate": 1.3678539602096917e-07, "loss": 0.0157, "step": 32642 }, { "epoch": 3.8708644610458913, "grad_norm": 0.34514613755750634, "learning_rate": 1.365347573698439e-07, "loss": 0.02, "step": 32643 }, { "epoch": 3.870983042808016, "grad_norm": 0.5735864020611046, "learning_rate": 1.3628434792938016e-07, "loss": 0.0303, "step": 32644 }, { "epoch": 3.871101624570141, "grad_norm": 0.34656648606142626, "learning_rate": 1.360341677018845e-07, "loss": 0.0122, "step": 32645 }, { "epoch": 3.871220206332266, "grad_norm": 0.4074037171280406, "learning_rate": 1.3578421668966334e-07, "loss": 0.0205, "step": 32646 }, { "epoch": 3.8713387880943912, "grad_norm": 0.5165907289237643, "learning_rate": 1.3553449489502045e-07, "loss": 0.019, "step": 32647 }, { "epoch": 3.871457369856516, "grad_norm": 0.43287724502955566, "learning_rate": 1.3528500232025955e-07, "loss": 0.0201, "step": 32648 }, { "epoch": 3.871575951618641, "grad_norm": 0.6047712722356997, "learning_rate": 1.3503573896767873e-07, "loss": 0.0343, "step": 32649 }, { "epoch": 3.871694533380766, "grad_norm": 0.4267500716910032, "learning_rate": 1.3478670483957624e-07, "loss": 0.023, "step": 32650 }, { "epoch": 3.871813115142891, "grad_norm": 0.5627223101939857, "learning_rate": 1.3453789993825018e-07, "loss": 0.0249, "step": 32651 }, { "epoch": 3.871931696905016, "grad_norm": 0.8460577459011723, "learning_rate": 1.3428932426599318e-07, "loss": 0.0385, "step": 32652 }, { "epoch": 3.872050278667141, "grad_norm": 0.7398513723441352, "learning_rate": 1.3404097782509506e-07, "loss": 0.0428, "step": 32653 }, { "epoch": 3.872168860429266, "grad_norm": 0.8259795343590133, "learning_rate": 1.337928606178457e-07, "loss": 0.0387, "step": 32654 }, { "epoch": 3.872287442191391, "grad_norm": 0.6386803047370578, "learning_rate": 1.3354497264653765e-07, "loss": 0.0229, "step": 32655 }, { "epoch": 3.872406023953516, "grad_norm": 0.44662598739206627, "learning_rate": 1.332973139134469e-07, "loss": 0.0164, "step": 32656 }, { "epoch": 3.872524605715641, "grad_norm": 0.5520515698719809, "learning_rate": 1.330498844208633e-07, "loss": 0.0228, "step": 32657 }, { "epoch": 3.872643187477766, "grad_norm": 0.6585588359203679, "learning_rate": 1.3280268417106555e-07, "loss": 0.0245, "step": 32658 }, { "epoch": 3.872761769239891, "grad_norm": 0.6544577731106964, "learning_rate": 1.3255571316633242e-07, "loss": 0.0298, "step": 32659 }, { "epoch": 3.8728803510020158, "grad_norm": 0.6659104984935416, "learning_rate": 1.323089714089426e-07, "loss": 0.029, "step": 32660 }, { "epoch": 3.872998932764141, "grad_norm": 0.6708623544079515, "learning_rate": 1.3206245890116653e-07, "loss": 0.0256, "step": 32661 }, { "epoch": 3.8731175145262657, "grad_norm": 0.6902240818017343, "learning_rate": 1.3181617564528293e-07, "loss": 0.026, "step": 32662 }, { "epoch": 3.873236096288391, "grad_norm": 0.7705162082727598, "learning_rate": 1.3157012164355665e-07, "loss": 0.0355, "step": 32663 }, { "epoch": 3.8733546780505157, "grad_norm": 0.39765396973103706, "learning_rate": 1.313242968982581e-07, "loss": 0.0194, "step": 32664 }, { "epoch": 3.873473259812641, "grad_norm": 0.5170717349944588, "learning_rate": 1.310787014116549e-07, "loss": 0.0264, "step": 32665 }, { "epoch": 3.8735918415747657, "grad_norm": 0.3964385058255222, "learning_rate": 1.3083333518600917e-07, "loss": 0.0188, "step": 32666 }, { "epoch": 3.873710423336891, "grad_norm": 0.5528311483799406, "learning_rate": 1.3058819822358293e-07, "loss": 0.0258, "step": 32667 }, { "epoch": 3.8738290050990156, "grad_norm": 0.2997862165700499, "learning_rate": 1.303432905266383e-07, "loss": 0.0137, "step": 32668 }, { "epoch": 3.873947586861141, "grad_norm": 0.43445629950690956, "learning_rate": 1.30098612097429e-07, "loss": 0.0228, "step": 32669 }, { "epoch": 3.8740661686232656, "grad_norm": 0.394825865083493, "learning_rate": 1.298541629382144e-07, "loss": 0.0135, "step": 32670 }, { "epoch": 3.874184750385391, "grad_norm": 0.3896290887168801, "learning_rate": 1.296099430512454e-07, "loss": 0.0162, "step": 32671 }, { "epoch": 3.874303332147516, "grad_norm": 0.5819294192951346, "learning_rate": 1.293659524387758e-07, "loss": 0.0267, "step": 32672 }, { "epoch": 3.8744219139096407, "grad_norm": 0.7409501656122045, "learning_rate": 1.291221911030538e-07, "loss": 0.0295, "step": 32673 }, { "epoch": 3.8745404956717655, "grad_norm": 0.4330243910513964, "learning_rate": 1.288786590463248e-07, "loss": 0.0221, "step": 32674 }, { "epoch": 3.8746590774338907, "grad_norm": 0.37196662282884163, "learning_rate": 1.28635356270837e-07, "loss": 0.0194, "step": 32675 }, { "epoch": 3.874777659196016, "grad_norm": 0.48212988845631294, "learning_rate": 1.283922827788303e-07, "loss": 0.023, "step": 32676 }, { "epoch": 3.8748962409581407, "grad_norm": 0.5303091513937456, "learning_rate": 1.2814943857255014e-07, "loss": 0.0233, "step": 32677 }, { "epoch": 3.8750148227202654, "grad_norm": 0.38482728897082114, "learning_rate": 1.2790682365423078e-07, "loss": 0.0208, "step": 32678 }, { "epoch": 3.8751334044823906, "grad_norm": 0.4456371267641812, "learning_rate": 1.276644380261094e-07, "loss": 0.0193, "step": 32679 }, { "epoch": 3.875251986244516, "grad_norm": 0.5253486954001398, "learning_rate": 1.2742228169042304e-07, "loss": 0.0256, "step": 32680 }, { "epoch": 3.8753705680066406, "grad_norm": 0.5596053493868849, "learning_rate": 1.2718035464940326e-07, "loss": 0.0249, "step": 32681 }, { "epoch": 3.8754891497687654, "grad_norm": 0.5890691795724299, "learning_rate": 1.2693865690527884e-07, "loss": 0.027, "step": 32682 }, { "epoch": 3.8756077315308906, "grad_norm": 0.5101345500493113, "learning_rate": 1.2669718846027578e-07, "loss": 0.0237, "step": 32683 }, { "epoch": 3.8757263132930158, "grad_norm": 0.5582198955708008, "learning_rate": 1.2645594931662563e-07, "loss": 0.0253, "step": 32684 }, { "epoch": 3.8758448950551405, "grad_norm": 0.37333783924266273, "learning_rate": 1.2621493947654882e-07, "loss": 0.0165, "step": 32685 }, { "epoch": 3.8759634768172653, "grad_norm": 0.4923635723583958, "learning_rate": 1.2597415894227138e-07, "loss": 0.0189, "step": 32686 }, { "epoch": 3.8760820585793905, "grad_norm": 0.6993910643997822, "learning_rate": 1.257336077160054e-07, "loss": 0.0298, "step": 32687 }, { "epoch": 3.8762006403415157, "grad_norm": 0.5208924870166101, "learning_rate": 1.254932857999741e-07, "loss": 0.0268, "step": 32688 }, { "epoch": 3.8763192221036404, "grad_norm": 0.4298567123730573, "learning_rate": 1.2525319319638962e-07, "loss": 0.019, "step": 32689 }, { "epoch": 3.876437803865765, "grad_norm": 0.5837464616752625, "learning_rate": 1.250133299074696e-07, "loss": 0.0315, "step": 32690 }, { "epoch": 3.8765563856278904, "grad_norm": 0.4567739911858364, "learning_rate": 1.2477369593542344e-07, "loss": 0.0338, "step": 32691 }, { "epoch": 3.8766749673900156, "grad_norm": 0.47100624937266755, "learning_rate": 1.2453429128245763e-07, "loss": 0.016, "step": 32692 }, { "epoch": 3.8767935491521404, "grad_norm": 0.4158855844144647, "learning_rate": 1.2429511595078435e-07, "loss": 0.0207, "step": 32693 }, { "epoch": 3.876912130914265, "grad_norm": 0.29119027337245906, "learning_rate": 1.2405616994260184e-07, "loss": 0.0132, "step": 32694 }, { "epoch": 3.8770307126763903, "grad_norm": 0.4664870741907755, "learning_rate": 1.238174532601194e-07, "loss": 0.0216, "step": 32695 }, { "epoch": 3.8771492944385155, "grad_norm": 0.5321490654801614, "learning_rate": 1.2357896590553254e-07, "loss": 0.0218, "step": 32696 }, { "epoch": 3.8772678762006403, "grad_norm": 0.5928500043530449, "learning_rate": 1.2334070788104501e-07, "loss": 0.0291, "step": 32697 }, { "epoch": 3.8773864579627655, "grad_norm": 0.5183502338939752, "learning_rate": 1.2310267918884956e-07, "loss": 0.0246, "step": 32698 }, { "epoch": 3.8775050397248902, "grad_norm": 0.5102843066292511, "learning_rate": 1.2286487983113882e-07, "loss": 0.0191, "step": 32699 }, { "epoch": 3.8776236214870154, "grad_norm": 0.8412423165425478, "learning_rate": 1.2262730981010828e-07, "loss": 0.0281, "step": 32700 }, { "epoch": 3.87774220324914, "grad_norm": 0.44953158255585496, "learning_rate": 1.2238996912795065e-07, "loss": 0.0192, "step": 32701 }, { "epoch": 3.8778607850112654, "grad_norm": 0.44850607515340973, "learning_rate": 1.2215285778684748e-07, "loss": 0.019, "step": 32702 }, { "epoch": 3.87797936677339, "grad_norm": 0.5150948120731565, "learning_rate": 1.2191597578898872e-07, "loss": 0.0235, "step": 32703 }, { "epoch": 3.8780979485355154, "grad_norm": 0.6963698675632806, "learning_rate": 1.2167932313655595e-07, "loss": 0.026, "step": 32704 }, { "epoch": 3.87821653029764, "grad_norm": 0.39658544649260224, "learning_rate": 1.214428998317335e-07, "loss": 0.0188, "step": 32705 }, { "epoch": 3.8783351120597653, "grad_norm": 0.6774797496853845, "learning_rate": 1.2120670587669747e-07, "loss": 0.0373, "step": 32706 }, { "epoch": 3.87845369382189, "grad_norm": 0.43733324552338454, "learning_rate": 1.209707412736294e-07, "loss": 0.0241, "step": 32707 }, { "epoch": 3.8785722755840153, "grad_norm": 0.27665934241143364, "learning_rate": 1.2073500602470255e-07, "loss": 0.0146, "step": 32708 }, { "epoch": 3.87869085734614, "grad_norm": 0.25842259851662985, "learning_rate": 1.2049950013209022e-07, "loss": 0.0131, "step": 32709 }, { "epoch": 3.8788094391082653, "grad_norm": 0.8342220298453545, "learning_rate": 1.202642235979601e-07, "loss": 0.0386, "step": 32710 }, { "epoch": 3.87892802087039, "grad_norm": 0.6037545181142556, "learning_rate": 1.2002917642448543e-07, "loss": 0.0224, "step": 32711 }, { "epoch": 3.879046602632515, "grad_norm": 0.43170714610825284, "learning_rate": 1.1979435861383392e-07, "loss": 0.0189, "step": 32712 }, { "epoch": 3.87916518439464, "grad_norm": 0.3148970951399997, "learning_rate": 1.1955977016816778e-07, "loss": 0.0136, "step": 32713 }, { "epoch": 3.879283766156765, "grad_norm": 0.5088820528993692, "learning_rate": 1.1932541108964912e-07, "loss": 0.0266, "step": 32714 }, { "epoch": 3.87940234791889, "grad_norm": 0.47823358255777154, "learning_rate": 1.1909128138044013e-07, "loss": 0.0212, "step": 32715 }, { "epoch": 3.879520929681015, "grad_norm": 0.43968723391023573, "learning_rate": 1.1885738104269739e-07, "loss": 0.0226, "step": 32716 }, { "epoch": 3.87963951144314, "grad_norm": 0.9353379765466897, "learning_rate": 1.186237100785803e-07, "loss": 0.0546, "step": 32717 }, { "epoch": 3.879758093205265, "grad_norm": 0.8303197126263923, "learning_rate": 1.1839026849023993e-07, "loss": 0.0302, "step": 32718 }, { "epoch": 3.87987667496739, "grad_norm": 0.6217878356193468, "learning_rate": 1.1815705627983009e-07, "loss": 0.0254, "step": 32719 }, { "epoch": 3.879995256729515, "grad_norm": 0.8122231892836824, "learning_rate": 1.1792407344949907e-07, "loss": 0.0392, "step": 32720 }, { "epoch": 3.88011383849164, "grad_norm": 0.959321928834248, "learning_rate": 1.1769132000139516e-07, "loss": 0.0314, "step": 32721 }, { "epoch": 3.880232420253765, "grad_norm": 0.4474968966648696, "learning_rate": 1.1745879593766385e-07, "loss": 0.02, "step": 32722 }, { "epoch": 3.88035100201589, "grad_norm": 0.43264463828014044, "learning_rate": 1.1722650126045065e-07, "loss": 0.0188, "step": 32723 }, { "epoch": 3.880469583778015, "grad_norm": 0.48160691091089064, "learning_rate": 1.169944359718983e-07, "loss": 0.0275, "step": 32724 }, { "epoch": 3.88058816554014, "grad_norm": 0.6313937253576575, "learning_rate": 1.167626000741412e-07, "loss": 0.0292, "step": 32725 }, { "epoch": 3.880706747302265, "grad_norm": 0.5447381273443939, "learning_rate": 1.1653099356931929e-07, "loss": 0.0196, "step": 32726 }, { "epoch": 3.8808253290643897, "grad_norm": 0.5090796980296977, "learning_rate": 1.1629961645956699e-07, "loss": 0.0263, "step": 32727 }, { "epoch": 3.880943910826515, "grad_norm": 0.43953016485975943, "learning_rate": 1.160684687470187e-07, "loss": 0.0204, "step": 32728 }, { "epoch": 3.88106249258864, "grad_norm": 0.3650618191931547, "learning_rate": 1.1583755043380328e-07, "loss": 0.0164, "step": 32729 }, { "epoch": 3.881181074350765, "grad_norm": 0.7372840033121476, "learning_rate": 1.1560686152205236e-07, "loss": 0.0336, "step": 32730 }, { "epoch": 3.8812996561128896, "grad_norm": 0.41050835570446714, "learning_rate": 1.1537640201389199e-07, "loss": 0.0182, "step": 32731 }, { "epoch": 3.881418237875015, "grad_norm": 0.4541691933886445, "learning_rate": 1.1514617191144272e-07, "loss": 0.022, "step": 32732 }, { "epoch": 3.88153681963714, "grad_norm": 0.8957272574983618, "learning_rate": 1.1491617121683062e-07, "loss": 0.0399, "step": 32733 }, { "epoch": 3.881655401399265, "grad_norm": 0.4293997359170599, "learning_rate": 1.1468639993217623e-07, "loss": 0.0247, "step": 32734 }, { "epoch": 3.8817739831613896, "grad_norm": 0.5070062109570288, "learning_rate": 1.1445685805959727e-07, "loss": 0.0174, "step": 32735 }, { "epoch": 3.8818925649235148, "grad_norm": 0.5639938760973098, "learning_rate": 1.1422754560120596e-07, "loss": 0.0242, "step": 32736 }, { "epoch": 3.88201114668564, "grad_norm": 0.42810846539402814, "learning_rate": 1.1399846255912283e-07, "loss": 0.0208, "step": 32737 }, { "epoch": 3.8821297284477647, "grad_norm": 0.47922806329418427, "learning_rate": 1.137696089354573e-07, "loss": 0.0228, "step": 32738 }, { "epoch": 3.8822483102098895, "grad_norm": 0.4544732797019944, "learning_rate": 1.13540984732316e-07, "loss": 0.0208, "step": 32739 }, { "epoch": 3.8823668919720147, "grad_norm": 0.5949258008748086, "learning_rate": 1.1331258995181116e-07, "loss": 0.0302, "step": 32740 }, { "epoch": 3.88248547373414, "grad_norm": 0.47189121966141123, "learning_rate": 1.1308442459604385e-07, "loss": 0.0192, "step": 32741 }, { "epoch": 3.8826040554962646, "grad_norm": 1.0201583203393738, "learning_rate": 1.128564886671235e-07, "loss": 0.0601, "step": 32742 }, { "epoch": 3.8827226372583894, "grad_norm": 0.4052178664765206, "learning_rate": 1.1262878216714567e-07, "loss": 0.0224, "step": 32743 }, { "epoch": 3.8828412190205146, "grad_norm": 0.39847991429314666, "learning_rate": 1.1240130509821145e-07, "loss": 0.0147, "step": 32744 }, { "epoch": 3.88295980078264, "grad_norm": 0.5729902493668378, "learning_rate": 1.1217405746241639e-07, "loss": 0.0291, "step": 32745 }, { "epoch": 3.8830783825447646, "grad_norm": 0.3889004385021702, "learning_rate": 1.1194703926185879e-07, "loss": 0.0164, "step": 32746 }, { "epoch": 3.8831969643068898, "grad_norm": 0.40104142287437694, "learning_rate": 1.1172025049862866e-07, "loss": 0.0203, "step": 32747 }, { "epoch": 3.8833155460690145, "grad_norm": 0.3963653755864554, "learning_rate": 1.1149369117481878e-07, "loss": 0.0162, "step": 32748 }, { "epoch": 3.8834341278311397, "grad_norm": 0.4441498411739043, "learning_rate": 1.1126736129251636e-07, "loss": 0.0143, "step": 32749 }, { "epoch": 3.8835527095932645, "grad_norm": 0.28915864276129905, "learning_rate": 1.1104126085380861e-07, "loss": 0.0141, "step": 32750 }, { "epoch": 3.8836712913553897, "grad_norm": 0.7152308303839191, "learning_rate": 1.1081538986078e-07, "loss": 0.0349, "step": 32751 }, { "epoch": 3.8837898731175144, "grad_norm": 0.3789100313548978, "learning_rate": 1.1058974831551216e-07, "loss": 0.02, "step": 32752 }, { "epoch": 3.8839084548796396, "grad_norm": 1.1224636511978052, "learning_rate": 1.1036433622008679e-07, "loss": 0.0617, "step": 32753 }, { "epoch": 3.8840270366417644, "grad_norm": 0.7675610444189201, "learning_rate": 1.1013915357657722e-07, "loss": 0.0438, "step": 32754 }, { "epoch": 3.8841456184038896, "grad_norm": 0.6330947798242184, "learning_rate": 1.0991420038706512e-07, "loss": 0.028, "step": 32755 }, { "epoch": 3.8842642001660144, "grad_norm": 0.6326599877976278, "learning_rate": 1.0968947665362106e-07, "loss": 0.0297, "step": 32756 }, { "epoch": 3.8843827819281396, "grad_norm": 0.7910524063705733, "learning_rate": 1.0946498237831837e-07, "loss": 0.0269, "step": 32757 }, { "epoch": 3.8845013636902643, "grad_norm": 0.6093089722636637, "learning_rate": 1.0924071756322484e-07, "loss": 0.0278, "step": 32758 }, { "epoch": 3.8846199454523895, "grad_norm": 0.3368534495151929, "learning_rate": 1.0901668221040828e-07, "loss": 0.0132, "step": 32759 }, { "epoch": 3.8847385272145143, "grad_norm": 0.42057484901431474, "learning_rate": 1.0879287632193369e-07, "loss": 0.0247, "step": 32760 }, { "epoch": 3.8848571089766395, "grad_norm": 0.49696282477057535, "learning_rate": 1.0856929989986886e-07, "loss": 0.0226, "step": 32761 }, { "epoch": 3.8849756907387643, "grad_norm": 0.5214393746752765, "learning_rate": 1.0834595294626771e-07, "loss": 0.0215, "step": 32762 }, { "epoch": 3.8850942725008895, "grad_norm": 0.7856818622390687, "learning_rate": 1.0812283546319247e-07, "loss": 0.0364, "step": 32763 }, { "epoch": 3.885212854263014, "grad_norm": 0.31908470132255695, "learning_rate": 1.078999474527026e-07, "loss": 0.0127, "step": 32764 }, { "epoch": 3.8853314360251394, "grad_norm": 0.36084043875327637, "learning_rate": 1.0767728891685202e-07, "loss": 0.0168, "step": 32765 }, { "epoch": 3.885450017787264, "grad_norm": 0.31563387800699755, "learning_rate": 1.0745485985768911e-07, "loss": 0.0155, "step": 32766 }, { "epoch": 3.8855685995493894, "grad_norm": 0.43839616004862586, "learning_rate": 1.0723266027726775e-07, "loss": 0.0285, "step": 32767 }, { "epoch": 3.885687181311514, "grad_norm": 0.3856497069224779, "learning_rate": 1.0701069017763631e-07, "loss": 0.0145, "step": 32768 }, { "epoch": 3.8858057630736393, "grad_norm": 0.5788784397916891, "learning_rate": 1.0678894956083763e-07, "loss": 0.0281, "step": 32769 }, { "epoch": 3.885924344835764, "grad_norm": 0.5776328558034969, "learning_rate": 1.0656743842892003e-07, "loss": 0.0324, "step": 32770 }, { "epoch": 3.8860429265978893, "grad_norm": 0.5269075935142127, "learning_rate": 1.0634615678392635e-07, "loss": 0.0244, "step": 32771 }, { "epoch": 3.886161508360014, "grad_norm": 0.5031807132057741, "learning_rate": 1.0612510462789382e-07, "loss": 0.025, "step": 32772 }, { "epoch": 3.8862800901221393, "grad_norm": 0.4567748895873529, "learning_rate": 1.0590428196285973e-07, "loss": 0.0194, "step": 32773 }, { "epoch": 3.8863986718842645, "grad_norm": 0.4216460798898806, "learning_rate": 1.0568368879086132e-07, "loss": 0.02, "step": 32774 }, { "epoch": 3.8865172536463892, "grad_norm": 0.3919279908100094, "learning_rate": 1.0546332511393309e-07, "loss": 0.0188, "step": 32775 }, { "epoch": 3.886635835408514, "grad_norm": 0.7314088447911175, "learning_rate": 1.0524319093410395e-07, "loss": 0.0347, "step": 32776 }, { "epoch": 3.886754417170639, "grad_norm": 0.5811210190326367, "learning_rate": 1.0502328625340563e-07, "loss": 0.0275, "step": 32777 }, { "epoch": 3.8868729989327644, "grad_norm": 0.41839274905863744, "learning_rate": 1.0480361107386427e-07, "loss": 0.0176, "step": 32778 }, { "epoch": 3.886991580694889, "grad_norm": 0.5095030741528924, "learning_rate": 1.0458416539750327e-07, "loss": 0.0224, "step": 32779 }, { "epoch": 3.887110162457014, "grad_norm": 0.418915685102738, "learning_rate": 1.0436494922634876e-07, "loss": 0.0187, "step": 32780 }, { "epoch": 3.887228744219139, "grad_norm": 0.6170322333671703, "learning_rate": 1.0414596256242137e-07, "loss": 0.0304, "step": 32781 }, { "epoch": 3.8873473259812643, "grad_norm": 0.37629412387613376, "learning_rate": 1.0392720540773893e-07, "loss": 0.0212, "step": 32782 }, { "epoch": 3.887465907743389, "grad_norm": 0.9104152957694075, "learning_rate": 1.0370867776431647e-07, "loss": 0.0415, "step": 32783 }, { "epoch": 3.887584489505514, "grad_norm": 0.41589310761526155, "learning_rate": 1.0349037963417185e-07, "loss": 0.0135, "step": 32784 }, { "epoch": 3.887703071267639, "grad_norm": 0.4717108845163511, "learning_rate": 1.0327231101931179e-07, "loss": 0.0177, "step": 32785 }, { "epoch": 3.8878216530297642, "grad_norm": 0.39102252140928045, "learning_rate": 1.0305447192175688e-07, "loss": 0.0174, "step": 32786 }, { "epoch": 3.887940234791889, "grad_norm": 0.3387846898414189, "learning_rate": 1.0283686234350554e-07, "loss": 0.0169, "step": 32787 }, { "epoch": 3.8880588165540138, "grad_norm": 0.6421575470721878, "learning_rate": 1.0261948228656726e-07, "loss": 0.0259, "step": 32788 }, { "epoch": 3.888177398316139, "grad_norm": 0.3716080902196915, "learning_rate": 1.02402331752946e-07, "loss": 0.0153, "step": 32789 }, { "epoch": 3.888295980078264, "grad_norm": 0.7303524519062408, "learning_rate": 1.0218541074464295e-07, "loss": 0.0345, "step": 32790 }, { "epoch": 3.888414561840389, "grad_norm": 0.5660289616288642, "learning_rate": 1.0196871926366202e-07, "loss": 0.0214, "step": 32791 }, { "epoch": 3.8885331436025137, "grad_norm": 0.7402812384359199, "learning_rate": 1.0175225731199612e-07, "loss": 0.0247, "step": 32792 }, { "epoch": 3.888651725364639, "grad_norm": 0.7100805595942826, "learning_rate": 1.0153602489164082e-07, "loss": 0.0271, "step": 32793 }, { "epoch": 3.888770307126764, "grad_norm": 0.375949644124607, "learning_rate": 1.0132002200459179e-07, "loss": 0.0139, "step": 32794 }, { "epoch": 3.888888888888889, "grad_norm": 0.6934647110242815, "learning_rate": 1.0110424865284185e-07, "loss": 0.0284, "step": 32795 }, { "epoch": 3.8890074706510136, "grad_norm": 0.5582115788056977, "learning_rate": 1.0088870483837276e-07, "loss": 0.0283, "step": 32796 }, { "epoch": 3.889126052413139, "grad_norm": 0.6437577030619719, "learning_rate": 1.0067339056318015e-07, "loss": 0.0285, "step": 32797 }, { "epoch": 3.889244634175264, "grad_norm": 0.576021150631878, "learning_rate": 1.0045830582924299e-07, "loss": 0.0261, "step": 32798 }, { "epoch": 3.8893632159373888, "grad_norm": 0.42052630463577434, "learning_rate": 1.0024345063854857e-07, "loss": 0.0172, "step": 32799 }, { "epoch": 3.889481797699514, "grad_norm": 0.5239410520219584, "learning_rate": 1.0002882499307309e-07, "loss": 0.0291, "step": 32800 }, { "epoch": 3.8896003794616387, "grad_norm": 0.5861771628526631, "learning_rate": 9.98144288947983e-08, "loss": 0.0312, "step": 32801 }, { "epoch": 3.889718961223764, "grad_norm": 0.7258926547118401, "learning_rate": 9.960026234570319e-08, "loss": 0.032, "step": 32802 }, { "epoch": 3.8898375429858887, "grad_norm": 0.702795300303714, "learning_rate": 9.938632534775283e-08, "loss": 0.0172, "step": 32803 }, { "epoch": 3.889956124748014, "grad_norm": 0.42999719913882106, "learning_rate": 9.917261790292898e-08, "loss": 0.0199, "step": 32804 }, { "epoch": 3.8900747065101386, "grad_norm": 0.6316927313466153, "learning_rate": 9.89591400131995e-08, "loss": 0.0265, "step": 32805 }, { "epoch": 3.890193288272264, "grad_norm": 0.44357778744587406, "learning_rate": 9.87458916805295e-08, "loss": 0.0266, "step": 32806 }, { "epoch": 3.8903118700343886, "grad_norm": 0.4896669669796747, "learning_rate": 9.853287290688407e-08, "loss": 0.0301, "step": 32807 }, { "epoch": 3.890430451796514, "grad_norm": 0.6290958229867226, "learning_rate": 9.832008369423385e-08, "loss": 0.0244, "step": 32808 }, { "epoch": 3.8905490335586386, "grad_norm": 0.35718820892983694, "learning_rate": 9.810752404453282e-08, "loss": 0.0208, "step": 32809 }, { "epoch": 3.8906676153207638, "grad_norm": 0.7707842913987072, "learning_rate": 9.789519395974334e-08, "loss": 0.0333, "step": 32810 }, { "epoch": 3.8907861970828885, "grad_norm": 0.85157083317171, "learning_rate": 9.768309344182491e-08, "loss": 0.0399, "step": 32811 }, { "epoch": 3.8909047788450137, "grad_norm": 0.4635860266861261, "learning_rate": 9.747122249273156e-08, "loss": 0.0194, "step": 32812 }, { "epoch": 3.8910233606071385, "grad_norm": 0.6570219329228497, "learning_rate": 9.725958111441447e-08, "loss": 0.0314, "step": 32813 }, { "epoch": 3.8911419423692637, "grad_norm": 0.7861729356571042, "learning_rate": 9.704816930882766e-08, "loss": 0.0374, "step": 32814 }, { "epoch": 3.8912605241313885, "grad_norm": 0.47530149520852905, "learning_rate": 9.683698707791678e-08, "loss": 0.0256, "step": 32815 }, { "epoch": 3.8913791058935137, "grad_norm": 0.32085734754947204, "learning_rate": 9.662603442363305e-08, "loss": 0.0159, "step": 32816 }, { "epoch": 3.8914976876556384, "grad_norm": 0.6458862860241278, "learning_rate": 9.64153113479166e-08, "loss": 0.0332, "step": 32817 }, { "epoch": 3.8916162694177636, "grad_norm": 0.41997720247075776, "learning_rate": 9.620481785271308e-08, "loss": 0.0202, "step": 32818 }, { "epoch": 3.8917348511798884, "grad_norm": 0.7869130602016425, "learning_rate": 9.59945539399626e-08, "loss": 0.0317, "step": 32819 }, { "epoch": 3.8918534329420136, "grad_norm": 0.5762158332847528, "learning_rate": 9.57845196116025e-08, "loss": 0.0224, "step": 32820 }, { "epoch": 3.8919720147041383, "grad_norm": 0.6823062729033833, "learning_rate": 9.557471486957015e-08, "loss": 0.0249, "step": 32821 }, { "epoch": 3.8920905964662635, "grad_norm": 0.6310552163531695, "learning_rate": 9.53651397157973e-08, "loss": 0.0304, "step": 32822 }, { "epoch": 3.8922091782283887, "grad_norm": 0.5673159765606386, "learning_rate": 9.515579415221853e-08, "loss": 0.0232, "step": 32823 }, { "epoch": 3.8923277599905135, "grad_norm": 0.6094942233146888, "learning_rate": 9.494667818076564e-08, "loss": 0.0264, "step": 32824 }, { "epoch": 3.8924463417526383, "grad_norm": 0.689014377579962, "learning_rate": 9.473779180335929e-08, "loss": 0.0259, "step": 32825 }, { "epoch": 3.8925649235147635, "grad_norm": 0.3424209245162095, "learning_rate": 9.452913502193406e-08, "loss": 0.0168, "step": 32826 }, { "epoch": 3.8926835052768887, "grad_norm": 0.2972656116314417, "learning_rate": 9.432070783840508e-08, "loss": 0.0137, "step": 32827 }, { "epoch": 3.8928020870390134, "grad_norm": 0.6026373629606993, "learning_rate": 9.411251025470136e-08, "loss": 0.0292, "step": 32828 }, { "epoch": 3.892920668801138, "grad_norm": 0.7516910012946831, "learning_rate": 9.390454227273804e-08, "loss": 0.0327, "step": 32829 }, { "epoch": 3.8930392505632634, "grad_norm": 0.6393735658323294, "learning_rate": 9.369680389443025e-08, "loss": 0.0234, "step": 32830 }, { "epoch": 3.8931578323253886, "grad_norm": 0.5808091018491908, "learning_rate": 9.348929512169869e-08, "loss": 0.0261, "step": 32831 }, { "epoch": 3.8932764140875133, "grad_norm": 0.6638134285769574, "learning_rate": 9.328201595645014e-08, "loss": 0.024, "step": 32832 }, { "epoch": 3.893394995849638, "grad_norm": 0.571560928438208, "learning_rate": 9.307496640060254e-08, "loss": 0.0297, "step": 32833 }, { "epoch": 3.8935135776117633, "grad_norm": 0.6871526937737936, "learning_rate": 9.286814645605713e-08, "loss": 0.0299, "step": 32834 }, { "epoch": 3.8936321593738885, "grad_norm": 0.9110841662031888, "learning_rate": 9.266155612472905e-08, "loss": 0.0438, "step": 32835 }, { "epoch": 3.8937507411360133, "grad_norm": 0.6765622424916666, "learning_rate": 9.2455195408514e-08, "loss": 0.033, "step": 32836 }, { "epoch": 3.893869322898138, "grad_norm": 0.5259619561554134, "learning_rate": 9.224906430931879e-08, "loss": 0.0205, "step": 32837 }, { "epoch": 3.8939879046602632, "grad_norm": 0.43854611454014875, "learning_rate": 9.204316282904468e-08, "loss": 0.0198, "step": 32838 }, { "epoch": 3.8941064864223884, "grad_norm": 0.5320183729866419, "learning_rate": 9.183749096958739e-08, "loss": 0.0348, "step": 32839 }, { "epoch": 3.894225068184513, "grad_norm": 0.41484897189812814, "learning_rate": 9.163204873284536e-08, "loss": 0.0156, "step": 32840 }, { "epoch": 3.894343649946638, "grad_norm": 0.5115332087687546, "learning_rate": 9.142683612071157e-08, "loss": 0.0234, "step": 32841 }, { "epoch": 3.894462231708763, "grad_norm": 0.5134065637406902, "learning_rate": 9.122185313507614e-08, "loss": 0.0255, "step": 32842 }, { "epoch": 3.8945808134708884, "grad_norm": 0.5982826510574623, "learning_rate": 9.101709977782925e-08, "loss": 0.0216, "step": 32843 }, { "epoch": 3.894699395233013, "grad_norm": 0.46371174354657063, "learning_rate": 9.081257605086102e-08, "loss": 0.0171, "step": 32844 }, { "epoch": 3.894817976995138, "grad_norm": 0.898499077534335, "learning_rate": 9.060828195605331e-08, "loss": 0.0325, "step": 32845 }, { "epoch": 3.894936558757263, "grad_norm": 0.41795125921866366, "learning_rate": 9.040421749529349e-08, "loss": 0.019, "step": 32846 }, { "epoch": 3.8950551405193883, "grad_norm": 0.32961729025458625, "learning_rate": 9.02003826704606e-08, "loss": 0.0136, "step": 32847 }, { "epoch": 3.895173722281513, "grad_norm": 0.419550882220157, "learning_rate": 8.999677748343093e-08, "loss": 0.0181, "step": 32848 }, { "epoch": 3.8952923040436382, "grad_norm": 0.6757401016852386, "learning_rate": 8.979340193608631e-08, "loss": 0.0323, "step": 32849 }, { "epoch": 3.895410885805763, "grad_norm": 0.35706580529966797, "learning_rate": 8.959025603030025e-08, "loss": 0.0112, "step": 32850 }, { "epoch": 3.895529467567888, "grad_norm": 0.5230995169960441, "learning_rate": 8.938733976794623e-08, "loss": 0.0182, "step": 32851 }, { "epoch": 3.895648049330013, "grad_norm": 0.40099316019209763, "learning_rate": 8.918465315088942e-08, "loss": 0.0188, "step": 32852 }, { "epoch": 3.895766631092138, "grad_norm": 0.38897716161339707, "learning_rate": 8.898219618100612e-08, "loss": 0.0196, "step": 32853 }, { "epoch": 3.895885212854263, "grad_norm": 0.562783969368509, "learning_rate": 8.877996886015594e-08, "loss": 0.0165, "step": 32854 }, { "epoch": 3.896003794616388, "grad_norm": 0.5702160459761578, "learning_rate": 8.857797119020961e-08, "loss": 0.0244, "step": 32855 }, { "epoch": 3.896122376378513, "grad_norm": 0.5140791873352034, "learning_rate": 8.83762031730212e-08, "loss": 0.0258, "step": 32856 }, { "epoch": 3.896240958140638, "grad_norm": 0.766761405139536, "learning_rate": 8.817466481045867e-08, "loss": 0.0354, "step": 32857 }, { "epoch": 3.896359539902763, "grad_norm": 0.41232158991628676, "learning_rate": 8.797335610437885e-08, "loss": 0.0161, "step": 32858 }, { "epoch": 3.896478121664888, "grad_norm": 0.6125417443246531, "learning_rate": 8.777227705663305e-08, "loss": 0.0186, "step": 32859 }, { "epoch": 3.896596703427013, "grad_norm": 0.3741472353426438, "learning_rate": 8.757142766907533e-08, "loss": 0.0237, "step": 32860 }, { "epoch": 3.896715285189138, "grad_norm": 0.9043322142303796, "learning_rate": 8.737080794356257e-08, "loss": 0.0417, "step": 32861 }, { "epoch": 3.8968338669512628, "grad_norm": 0.401239451032039, "learning_rate": 8.717041788194047e-08, "loss": 0.0139, "step": 32862 }, { "epoch": 3.896952448713388, "grad_norm": 0.7235971582878648, "learning_rate": 8.697025748605758e-08, "loss": 0.032, "step": 32863 }, { "epoch": 3.8970710304755127, "grad_norm": 0.501756964436278, "learning_rate": 8.677032675775687e-08, "loss": 0.0224, "step": 32864 }, { "epoch": 3.897189612237638, "grad_norm": 0.7270043629491337, "learning_rate": 8.657062569888408e-08, "loss": 0.0346, "step": 32865 }, { "epoch": 3.8973081939997627, "grad_norm": 0.4540139820499246, "learning_rate": 8.63711543112794e-08, "loss": 0.0192, "step": 32866 }, { "epoch": 3.897426775761888, "grad_norm": 0.6118033547626213, "learning_rate": 8.617191259678026e-08, "loss": 0.0248, "step": 32867 }, { "epoch": 3.8975453575240127, "grad_norm": 0.8295366637529634, "learning_rate": 8.597290055722684e-08, "loss": 0.0442, "step": 32868 }, { "epoch": 3.897663939286138, "grad_norm": 0.807480710882785, "learning_rate": 8.577411819445103e-08, "loss": 0.0469, "step": 32869 }, { "epoch": 3.8977825210482626, "grad_norm": 0.676652404952133, "learning_rate": 8.557556551028467e-08, "loss": 0.0255, "step": 32870 }, { "epoch": 3.897901102810388, "grad_norm": 0.6524269590680122, "learning_rate": 8.537724250656243e-08, "loss": 0.0235, "step": 32871 }, { "epoch": 3.898019684572513, "grad_norm": 0.312292043569829, "learning_rate": 8.517914918510506e-08, "loss": 0.0138, "step": 32872 }, { "epoch": 3.898138266334638, "grad_norm": 0.751214359431838, "learning_rate": 8.49812855477472e-08, "loss": 0.0357, "step": 32873 }, { "epoch": 3.8982568480967625, "grad_norm": 0.5573124972621954, "learning_rate": 8.478365159630686e-08, "loss": 0.0277, "step": 32874 }, { "epoch": 3.8983754298588877, "grad_norm": 0.3250948642892914, "learning_rate": 8.458624733261034e-08, "loss": 0.0131, "step": 32875 }, { "epoch": 3.898494011621013, "grad_norm": 0.6917656648996087, "learning_rate": 8.438907275847562e-08, "loss": 0.0229, "step": 32876 }, { "epoch": 3.8986125933831377, "grad_norm": 0.36421845016466153, "learning_rate": 8.419212787571796e-08, "loss": 0.021, "step": 32877 }, { "epoch": 3.8987311751452625, "grad_norm": 0.3953958110462938, "learning_rate": 8.39954126861553e-08, "loss": 0.0149, "step": 32878 }, { "epoch": 3.8988497569073877, "grad_norm": 0.6020667719120232, "learning_rate": 8.379892719160009e-08, "loss": 0.0275, "step": 32879 }, { "epoch": 3.898968338669513, "grad_norm": 0.38360657037909646, "learning_rate": 8.360267139386758e-08, "loss": 0.0195, "step": 32880 }, { "epoch": 3.8990869204316376, "grad_norm": 0.39835012429657934, "learning_rate": 8.340664529476184e-08, "loss": 0.0197, "step": 32881 }, { "epoch": 3.8992055021937624, "grad_norm": 0.6915888707119626, "learning_rate": 8.321084889609255e-08, "loss": 0.0287, "step": 32882 }, { "epoch": 3.8993240839558876, "grad_norm": 0.6671810243583413, "learning_rate": 8.301528219966381e-08, "loss": 0.0332, "step": 32883 }, { "epoch": 3.899442665718013, "grad_norm": 0.3844554787794611, "learning_rate": 8.281994520727976e-08, "loss": 0.0245, "step": 32884 }, { "epoch": 3.8995612474801375, "grad_norm": 0.9111618955133782, "learning_rate": 8.262483792074172e-08, "loss": 0.0468, "step": 32885 }, { "epoch": 3.8996798292422623, "grad_norm": 0.23687884464870704, "learning_rate": 8.242996034184269e-08, "loss": 0.0093, "step": 32886 }, { "epoch": 3.8997984110043875, "grad_norm": 0.4474607147590572, "learning_rate": 8.223531247238681e-08, "loss": 0.0126, "step": 32887 }, { "epoch": 3.8999169927665127, "grad_norm": 0.9386253202892607, "learning_rate": 8.204089431416428e-08, "loss": 0.0556, "step": 32888 }, { "epoch": 3.9000355745286375, "grad_norm": 0.5890312024866474, "learning_rate": 8.184670586896814e-08, "loss": 0.0282, "step": 32889 }, { "epoch": 3.9001541562907622, "grad_norm": 0.6306002405539147, "learning_rate": 8.165274713858861e-08, "loss": 0.0287, "step": 32890 }, { "epoch": 3.9002727380528874, "grad_norm": 0.4137961860236405, "learning_rate": 8.145901812481593e-08, "loss": 0.0182, "step": 32891 }, { "epoch": 3.9003913198150126, "grad_norm": 0.539683929045907, "learning_rate": 8.1265518829432e-08, "loss": 0.0299, "step": 32892 }, { "epoch": 3.9005099015771374, "grad_norm": 0.41483196513336973, "learning_rate": 8.107224925422152e-08, "loss": 0.0154, "step": 32893 }, { "epoch": 3.900628483339262, "grad_norm": 0.2986712269280829, "learning_rate": 8.087920940096915e-08, "loss": 0.0146, "step": 32894 }, { "epoch": 3.9007470651013874, "grad_norm": 0.46451732431283665, "learning_rate": 8.068639927145127e-08, "loss": 0.0235, "step": 32895 }, { "epoch": 3.9008656468635126, "grad_norm": 0.36352713961093003, "learning_rate": 8.0493818867447e-08, "loss": 0.0112, "step": 32896 }, { "epoch": 3.9009842286256373, "grad_norm": 0.5132113228291761, "learning_rate": 8.03014681907327e-08, "loss": 0.0285, "step": 32897 }, { "epoch": 3.9011028103877625, "grad_norm": 0.6949493168059061, "learning_rate": 8.010934724307917e-08, "loss": 0.0275, "step": 32898 }, { "epoch": 3.9012213921498873, "grad_norm": 0.6659674839874709, "learning_rate": 7.991745602625999e-08, "loss": 0.0286, "step": 32899 }, { "epoch": 3.9013399739120125, "grad_norm": 0.4668956889527074, "learning_rate": 7.972579454204043e-08, "loss": 0.0259, "step": 32900 }, { "epoch": 3.9014585556741372, "grad_norm": 0.6735016183700935, "learning_rate": 7.953436279219129e-08, "loss": 0.0364, "step": 32901 }, { "epoch": 3.9015771374362624, "grad_norm": 0.6715648304527518, "learning_rate": 7.934316077847782e-08, "loss": 0.0234, "step": 32902 }, { "epoch": 3.901695719198387, "grad_norm": 0.3723557679640903, "learning_rate": 7.915218850265693e-08, "loss": 0.023, "step": 32903 }, { "epoch": 3.9018143009605124, "grad_norm": 0.543036926928761, "learning_rate": 7.896144596649391e-08, "loss": 0.0258, "step": 32904 }, { "epoch": 3.901932882722637, "grad_norm": 0.7090154972436289, "learning_rate": 7.877093317174844e-08, "loss": 0.0327, "step": 32905 }, { "epoch": 3.9020514644847624, "grad_norm": 0.6538918612700062, "learning_rate": 7.858065012017191e-08, "loss": 0.0266, "step": 32906 }, { "epoch": 3.902170046246887, "grad_norm": 0.46811859992661825, "learning_rate": 7.839059681352401e-08, "loss": 0.0294, "step": 32907 }, { "epoch": 3.9022886280090123, "grad_norm": 0.6505003396308524, "learning_rate": 7.820077325355057e-08, "loss": 0.0311, "step": 32908 }, { "epoch": 3.902407209771137, "grad_norm": 0.44175053523052654, "learning_rate": 7.801117944200576e-08, "loss": 0.022, "step": 32909 }, { "epoch": 3.9025257915332623, "grad_norm": 0.8529840062317057, "learning_rate": 7.782181538063815e-08, "loss": 0.0359, "step": 32910 }, { "epoch": 3.902644373295387, "grad_norm": 0.5952132828975113, "learning_rate": 7.763268107119081e-08, "loss": 0.0313, "step": 32911 }, { "epoch": 3.9027629550575123, "grad_norm": 0.49774204534298827, "learning_rate": 7.74437765154068e-08, "loss": 0.0224, "step": 32912 }, { "epoch": 3.902881536819637, "grad_norm": 0.40663268491190196, "learning_rate": 7.725510171503192e-08, "loss": 0.0119, "step": 32913 }, { "epoch": 3.903000118581762, "grad_norm": 0.4735647728658473, "learning_rate": 7.706665667180091e-08, "loss": 0.0231, "step": 32914 }, { "epoch": 3.903118700343887, "grad_norm": 0.34041533285556563, "learning_rate": 7.687844138745404e-08, "loss": 0.0111, "step": 32915 }, { "epoch": 3.903237282106012, "grad_norm": 0.4857610276712844, "learning_rate": 7.669045586372326e-08, "loss": 0.0259, "step": 32916 }, { "epoch": 3.903355863868137, "grad_norm": 0.29692043303895543, "learning_rate": 7.650270010234606e-08, "loss": 0.0123, "step": 32917 }, { "epoch": 3.903474445630262, "grad_norm": 0.6345046237272106, "learning_rate": 7.631517410504884e-08, "loss": 0.0292, "step": 32918 }, { "epoch": 3.903593027392387, "grad_norm": 0.5850987267950636, "learning_rate": 7.612787787356356e-08, "loss": 0.0226, "step": 32919 }, { "epoch": 3.903711609154512, "grad_norm": 0.7090509278815005, "learning_rate": 7.594081140961384e-08, "loss": 0.0262, "step": 32920 }, { "epoch": 3.903830190916637, "grad_norm": 0.5541594377179736, "learning_rate": 7.575397471492607e-08, "loss": 0.0184, "step": 32921 }, { "epoch": 3.903948772678762, "grad_norm": 0.5004512834343101, "learning_rate": 7.556736779122386e-08, "loss": 0.0395, "step": 32922 }, { "epoch": 3.904067354440887, "grad_norm": 0.4331170731944438, "learning_rate": 7.53809906402253e-08, "loss": 0.0138, "step": 32923 }, { "epoch": 3.904185936203012, "grad_norm": 0.6745381426386242, "learning_rate": 7.519484326365123e-08, "loss": 0.0279, "step": 32924 }, { "epoch": 3.9043045179651372, "grad_norm": 0.5596771728752179, "learning_rate": 7.500892566321416e-08, "loss": 0.0289, "step": 32925 }, { "epoch": 3.904423099727262, "grad_norm": 0.4355973845100918, "learning_rate": 7.482323784062939e-08, "loss": 0.0225, "step": 32926 }, { "epoch": 3.9045416814893867, "grad_norm": 0.7099160821623313, "learning_rate": 7.463777979760944e-08, "loss": 0.0344, "step": 32927 }, { "epoch": 3.904660263251512, "grad_norm": 0.41994845159728594, "learning_rate": 7.445255153586406e-08, "loss": 0.0176, "step": 32928 }, { "epoch": 3.904778845013637, "grad_norm": 0.8705774616506163, "learning_rate": 7.42675530571002e-08, "loss": 0.0475, "step": 32929 }, { "epoch": 3.904897426775762, "grad_norm": 0.5865378919995873, "learning_rate": 7.408278436302485e-08, "loss": 0.022, "step": 32930 }, { "epoch": 3.9050160085378867, "grad_norm": 0.37940968143323733, "learning_rate": 7.389824545533664e-08, "loss": 0.0166, "step": 32931 }, { "epoch": 3.905134590300012, "grad_norm": 0.40980681244540185, "learning_rate": 7.371393633574253e-08, "loss": 0.0157, "step": 32932 }, { "epoch": 3.905253172062137, "grad_norm": 0.3992028089922306, "learning_rate": 7.352985700594118e-08, "loss": 0.0171, "step": 32933 }, { "epoch": 3.905371753824262, "grad_norm": 0.5264043004505774, "learning_rate": 7.334600746762288e-08, "loss": 0.025, "step": 32934 }, { "epoch": 3.9054903355863866, "grad_norm": 0.4319524224894581, "learning_rate": 7.316238772249184e-08, "loss": 0.024, "step": 32935 }, { "epoch": 3.905608917348512, "grad_norm": 0.4657221431186572, "learning_rate": 7.297899777223561e-08, "loss": 0.0229, "step": 32936 }, { "epoch": 3.905727499110637, "grad_norm": 0.31052235889051605, "learning_rate": 7.27958376185417e-08, "loss": 0.0143, "step": 32937 }, { "epoch": 3.9058460808727617, "grad_norm": 0.5369516316934927, "learning_rate": 7.261290726310599e-08, "loss": 0.0283, "step": 32938 }, { "epoch": 3.9059646626348865, "grad_norm": 0.386034781795001, "learning_rate": 7.243020670761046e-08, "loss": 0.0184, "step": 32939 }, { "epoch": 3.9060832443970117, "grad_norm": 0.8060195347206971, "learning_rate": 7.22477359537399e-08, "loss": 0.0354, "step": 32940 }, { "epoch": 3.906201826159137, "grad_norm": 0.47624059882962017, "learning_rate": 7.206549500317905e-08, "loss": 0.0183, "step": 32941 }, { "epoch": 3.9063204079212617, "grad_norm": 0.5044358141714613, "learning_rate": 7.188348385760158e-08, "loss": 0.0286, "step": 32942 }, { "epoch": 3.9064389896833864, "grad_norm": 0.406794629609817, "learning_rate": 7.170170251869224e-08, "loss": 0.0205, "step": 32943 }, { "epoch": 3.9065575714455116, "grad_norm": 0.6841136654869208, "learning_rate": 7.15201509881247e-08, "loss": 0.0237, "step": 32944 }, { "epoch": 3.906676153207637, "grad_norm": 0.5809399907864505, "learning_rate": 7.133882926756985e-08, "loss": 0.0205, "step": 32945 }, { "epoch": 3.9067947349697616, "grad_norm": 0.6076950028180731, "learning_rate": 7.115773735870413e-08, "loss": 0.0181, "step": 32946 }, { "epoch": 3.906913316731887, "grad_norm": 0.30361387586249594, "learning_rate": 7.097687526319007e-08, "loss": 0.011, "step": 32947 }, { "epoch": 3.9070318984940116, "grad_norm": 0.46178108231233955, "learning_rate": 7.079624298270416e-08, "loss": 0.0263, "step": 32948 }, { "epoch": 3.9071504802561368, "grad_norm": 1.0865953892085038, "learning_rate": 7.061584051890336e-08, "loss": 0.036, "step": 32949 }, { "epoch": 3.9072690620182615, "grad_norm": 0.48785260108525, "learning_rate": 7.043566787345579e-08, "loss": 0.0241, "step": 32950 }, { "epoch": 3.9073876437803867, "grad_norm": 0.5550353431025201, "learning_rate": 7.025572504801847e-08, "loss": 0.0221, "step": 32951 }, { "epoch": 3.9075062255425115, "grad_norm": 0.6680996886467149, "learning_rate": 7.007601204425395e-08, "loss": 0.0261, "step": 32952 }, { "epoch": 3.9076248073046367, "grad_norm": 0.40244891627757445, "learning_rate": 6.989652886381925e-08, "loss": 0.0177, "step": 32953 }, { "epoch": 3.9077433890667614, "grad_norm": 0.48016686514357376, "learning_rate": 6.971727550836304e-08, "loss": 0.0225, "step": 32954 }, { "epoch": 3.9078619708288866, "grad_norm": 0.3417325779522888, "learning_rate": 6.95382519795451e-08, "loss": 0.0184, "step": 32955 }, { "epoch": 3.9079805525910114, "grad_norm": 0.37418097618542734, "learning_rate": 6.935945827901413e-08, "loss": 0.0194, "step": 32956 }, { "epoch": 3.9080991343531366, "grad_norm": 0.47077300395122074, "learning_rate": 6.918089440841324e-08, "loss": 0.0182, "step": 32957 }, { "epoch": 3.9082177161152614, "grad_norm": 0.34108800372612075, "learning_rate": 6.900256036939389e-08, "loss": 0.0141, "step": 32958 }, { "epoch": 3.9083362978773866, "grad_norm": 0.7801701781331113, "learning_rate": 6.882445616359923e-08, "loss": 0.0423, "step": 32959 }, { "epoch": 3.9084548796395113, "grad_norm": 0.638567192548044, "learning_rate": 6.864658179266959e-08, "loss": 0.0347, "step": 32960 }, { "epoch": 3.9085734614016365, "grad_norm": 0.5343114857993443, "learning_rate": 6.846893725824532e-08, "loss": 0.0227, "step": 32961 }, { "epoch": 3.9086920431637613, "grad_norm": 0.615990381434428, "learning_rate": 6.82915225619668e-08, "loss": 0.0285, "step": 32962 }, { "epoch": 3.9088106249258865, "grad_norm": 0.5740717297435133, "learning_rate": 6.811433770546604e-08, "loss": 0.0275, "step": 32963 }, { "epoch": 3.9089292066880112, "grad_norm": 0.5030386020633882, "learning_rate": 6.793738269037508e-08, "loss": 0.0271, "step": 32964 }, { "epoch": 3.9090477884501365, "grad_norm": 0.6585761448686483, "learning_rate": 6.776065751832872e-08, "loss": 0.0251, "step": 32965 }, { "epoch": 3.909166370212261, "grad_norm": 0.3331400030130475, "learning_rate": 6.758416219095898e-08, "loss": 0.0139, "step": 32966 }, { "epoch": 3.9092849519743864, "grad_norm": 0.3975716560877251, "learning_rate": 6.740789670988402e-08, "loss": 0.0176, "step": 32967 }, { "epoch": 3.909403533736511, "grad_norm": 0.6658183077930677, "learning_rate": 6.723186107673585e-08, "loss": 0.0281, "step": 32968 }, { "epoch": 3.9095221154986364, "grad_norm": 0.5943754991882794, "learning_rate": 6.705605529313541e-08, "loss": 0.0217, "step": 32969 }, { "epoch": 3.909640697260761, "grad_norm": 0.3198826967311972, "learning_rate": 6.688047936070362e-08, "loss": 0.0155, "step": 32970 }, { "epoch": 3.9097592790228863, "grad_norm": 0.6031963327117228, "learning_rate": 6.670513328106142e-08, "loss": 0.0242, "step": 32971 }, { "epoch": 3.909877860785011, "grad_norm": 0.4405241988868358, "learning_rate": 6.653001705581863e-08, "loss": 0.014, "step": 32972 }, { "epoch": 3.9099964425471363, "grad_norm": 0.5089244925562553, "learning_rate": 6.635513068659338e-08, "loss": 0.0235, "step": 32973 }, { "epoch": 3.9101150243092615, "grad_norm": 0.3604888435220218, "learning_rate": 6.618047417500106e-08, "loss": 0.0233, "step": 32974 }, { "epoch": 3.9102336060713863, "grad_norm": 0.8421949696444124, "learning_rate": 6.600604752264594e-08, "loss": 0.0466, "step": 32975 }, { "epoch": 3.910352187833511, "grad_norm": 0.37876976231293186, "learning_rate": 6.583185073114062e-08, "loss": 0.0146, "step": 32976 }, { "epoch": 3.910470769595636, "grad_norm": 0.4579200565070535, "learning_rate": 6.565788380209215e-08, "loss": 0.0265, "step": 32977 }, { "epoch": 3.9105893513577614, "grad_norm": 0.4775670397346715, "learning_rate": 6.548414673709647e-08, "loss": 0.023, "step": 32978 }, { "epoch": 3.910707933119886, "grad_norm": 0.6579632655112039, "learning_rate": 6.53106395377634e-08, "loss": 0.0245, "step": 32979 }, { "epoch": 3.910826514882011, "grad_norm": 0.4989610905025986, "learning_rate": 6.513736220568889e-08, "loss": 0.0271, "step": 32980 }, { "epoch": 3.910945096644136, "grad_norm": 0.5983036049415464, "learning_rate": 6.496431474246889e-08, "loss": 0.0247, "step": 32981 }, { "epoch": 3.9110636784062613, "grad_norm": 0.3461611207106446, "learning_rate": 6.479149714970212e-08, "loss": 0.0153, "step": 32982 }, { "epoch": 3.911182260168386, "grad_norm": 0.609243471929688, "learning_rate": 6.461890942897897e-08, "loss": 0.0228, "step": 32983 }, { "epoch": 3.911300841930511, "grad_norm": 0.31066156899094916, "learning_rate": 6.444655158189261e-08, "loss": 0.0129, "step": 32984 }, { "epoch": 3.911419423692636, "grad_norm": 0.5298891058693376, "learning_rate": 6.427442361003066e-08, "loss": 0.0165, "step": 32985 }, { "epoch": 3.9115380054547613, "grad_norm": 0.3044268728228278, "learning_rate": 6.410252551498074e-08, "loss": 0.0135, "step": 32986 }, { "epoch": 3.911656587216886, "grad_norm": 0.4647043871438658, "learning_rate": 6.39308572983277e-08, "loss": 0.0213, "step": 32987 }, { "epoch": 3.911775168979011, "grad_norm": 0.6552810568568124, "learning_rate": 6.375941896165361e-08, "loss": 0.0352, "step": 32988 }, { "epoch": 3.911893750741136, "grad_norm": 0.35337127542404184, "learning_rate": 6.358821050653774e-08, "loss": 0.0124, "step": 32989 }, { "epoch": 3.912012332503261, "grad_norm": 0.5243307093639258, "learning_rate": 6.34172319345594e-08, "loss": 0.0239, "step": 32990 }, { "epoch": 3.912130914265386, "grad_norm": 0.6127113158375077, "learning_rate": 6.32464832472951e-08, "loss": 0.0277, "step": 32991 }, { "epoch": 3.9122494960275107, "grad_norm": 0.4960606733995128, "learning_rate": 6.307596444631858e-08, "loss": 0.0183, "step": 32992 }, { "epoch": 3.912368077789636, "grad_norm": 0.6240554060153949, "learning_rate": 6.29056755332036e-08, "loss": 0.0252, "step": 32993 }, { "epoch": 3.912486659551761, "grad_norm": 0.8218259881123516, "learning_rate": 6.273561650951554e-08, "loss": 0.0482, "step": 32994 }, { "epoch": 3.912605241313886, "grad_norm": 0.4267298067502847, "learning_rate": 6.256578737682816e-08, "loss": 0.0173, "step": 32995 }, { "epoch": 3.912723823076011, "grad_norm": 0.6134715486534913, "learning_rate": 6.239618813670411e-08, "loss": 0.0271, "step": 32996 }, { "epoch": 3.912842404838136, "grad_norm": 0.7618570342770326, "learning_rate": 6.222681879070325e-08, "loss": 0.0351, "step": 32997 }, { "epoch": 3.912960986600261, "grad_norm": 0.9338991047229275, "learning_rate": 6.205767934039375e-08, "loss": 0.0334, "step": 32998 }, { "epoch": 3.913079568362386, "grad_norm": 0.8289952074328788, "learning_rate": 6.188876978732994e-08, "loss": 0.0341, "step": 32999 }, { "epoch": 3.913198150124511, "grad_norm": 0.33851184210983687, "learning_rate": 6.17200901330689e-08, "loss": 0.0155, "step": 33000 }, { "epoch": 3.9133167318866358, "grad_norm": 0.5298907122565548, "learning_rate": 6.15516403791705e-08, "loss": 0.0165, "step": 33001 }, { "epoch": 3.913435313648761, "grad_norm": 0.54309737451329, "learning_rate": 6.138342052718349e-08, "loss": 0.0244, "step": 33002 }, { "epoch": 3.9135538954108857, "grad_norm": 0.41459818200713805, "learning_rate": 6.121543057865664e-08, "loss": 0.0156, "step": 33003 }, { "epoch": 3.913672477173011, "grad_norm": 0.27709135037008514, "learning_rate": 6.104767053514426e-08, "loss": 0.0154, "step": 33004 }, { "epoch": 3.9137910589351357, "grad_norm": 0.5787553175433322, "learning_rate": 6.088014039818957e-08, "loss": 0.0292, "step": 33005 }, { "epoch": 3.913909640697261, "grad_norm": 0.7010322390032268, "learning_rate": 6.071284016933853e-08, "loss": 0.0332, "step": 33006 }, { "epoch": 3.9140282224593856, "grad_norm": 0.45354049904178717, "learning_rate": 6.05457698501316e-08, "loss": 0.0204, "step": 33007 }, { "epoch": 3.914146804221511, "grad_norm": 0.44352584636429543, "learning_rate": 6.037892944211199e-08, "loss": 0.0289, "step": 33008 }, { "epoch": 3.9142653859836356, "grad_norm": 0.5367390444147271, "learning_rate": 6.021231894681179e-08, "loss": 0.0147, "step": 33009 }, { "epoch": 3.914383967745761, "grad_norm": 0.568213932058313, "learning_rate": 6.004593836577422e-08, "loss": 0.0257, "step": 33010 }, { "epoch": 3.9145025495078856, "grad_norm": 0.5246343010782624, "learning_rate": 5.98797877005286e-08, "loss": 0.0234, "step": 33011 }, { "epoch": 3.9146211312700108, "grad_norm": 0.42576872552792205, "learning_rate": 5.971386695260706e-08, "loss": 0.0197, "step": 33012 }, { "epoch": 3.9147397130321355, "grad_norm": 0.6774404771052593, "learning_rate": 5.954817612354169e-08, "loss": 0.0365, "step": 33013 }, { "epoch": 3.9148582947942607, "grad_norm": 0.7740460751056242, "learning_rate": 5.9382715214856275e-08, "loss": 0.0412, "step": 33014 }, { "epoch": 3.9149768765563855, "grad_norm": 0.5199037539637023, "learning_rate": 5.921748422807738e-08, "loss": 0.0174, "step": 33015 }, { "epoch": 3.9150954583185107, "grad_norm": 0.4624829378298274, "learning_rate": 5.9052483164731553e-08, "loss": 0.0154, "step": 33016 }, { "epoch": 3.9152140400806354, "grad_norm": 0.6148328187536695, "learning_rate": 5.888771202633425e-08, "loss": 0.031, "step": 33017 }, { "epoch": 3.9153326218427607, "grad_norm": 0.6234809121273952, "learning_rate": 5.872317081440926e-08, "loss": 0.0252, "step": 33018 }, { "epoch": 3.9154512036048854, "grad_norm": 0.47275339810232675, "learning_rate": 5.855885953047202e-08, "loss": 0.0203, "step": 33019 }, { "epoch": 3.9155697853670106, "grad_norm": 0.5291180032123055, "learning_rate": 5.839477817603523e-08, "loss": 0.0157, "step": 33020 }, { "epoch": 3.9156883671291354, "grad_norm": 0.3022432741736082, "learning_rate": 5.823092675261432e-08, "loss": 0.0115, "step": 33021 }, { "epoch": 3.9158069488912606, "grad_norm": 0.4657298636259324, "learning_rate": 5.806730526171922e-08, "loss": 0.0172, "step": 33022 }, { "epoch": 3.9159255306533858, "grad_norm": 0.6063817074818547, "learning_rate": 5.790391370485704e-08, "loss": 0.0318, "step": 33023 }, { "epoch": 3.9160441124155105, "grad_norm": 0.5109135363732606, "learning_rate": 5.774075208353491e-08, "loss": 0.0241, "step": 33024 }, { "epoch": 3.9161626941776353, "grad_norm": 0.3073761036938315, "learning_rate": 5.757782039925441e-08, "loss": 0.0141, "step": 33025 }, { "epoch": 3.9162812759397605, "grad_norm": 0.5233649816789105, "learning_rate": 5.741511865352267e-08, "loss": 0.0208, "step": 33026 }, { "epoch": 3.9163998577018857, "grad_norm": 0.49501540848749265, "learning_rate": 5.7252646847838485e-08, "loss": 0.0161, "step": 33027 }, { "epoch": 3.9165184394640105, "grad_norm": 0.8442860188809693, "learning_rate": 5.709040498369511e-08, "loss": 0.045, "step": 33028 }, { "epoch": 3.916637021226135, "grad_norm": 0.45596161472251984, "learning_rate": 5.692839306259134e-08, "loss": 0.0204, "step": 33029 }, { "epoch": 3.9167556029882604, "grad_norm": 0.6384184507037882, "learning_rate": 5.676661108602044e-08, "loss": 0.0296, "step": 33030 }, { "epoch": 3.9168741847503856, "grad_norm": 0.7016435239244908, "learning_rate": 5.660505905547564e-08, "loss": 0.0305, "step": 33031 }, { "epoch": 3.9169927665125104, "grad_norm": 0.7163570012146104, "learning_rate": 5.6443736972444646e-08, "loss": 0.0337, "step": 33032 }, { "epoch": 3.917111348274635, "grad_norm": 0.3894546107277461, "learning_rate": 5.628264483841239e-08, "loss": 0.0172, "step": 33033 }, { "epoch": 3.9172299300367603, "grad_norm": 0.42144793233674815, "learning_rate": 5.6121782654866564e-08, "loss": 0.0113, "step": 33034 }, { "epoch": 3.9173485117988855, "grad_norm": 0.4655561589540687, "learning_rate": 5.5961150423292085e-08, "loss": 0.0171, "step": 33035 }, { "epoch": 3.9174670935610103, "grad_norm": 0.4586725749332586, "learning_rate": 5.580074814516556e-08, "loss": 0.0133, "step": 33036 }, { "epoch": 3.917585675323135, "grad_norm": 0.41976887509044497, "learning_rate": 5.564057582196636e-08, "loss": 0.0196, "step": 33037 }, { "epoch": 3.9177042570852603, "grad_norm": 0.6476123937913421, "learning_rate": 5.5480633455176624e-08, "loss": 0.0352, "step": 33038 }, { "epoch": 3.9178228388473855, "grad_norm": 0.5098838499199789, "learning_rate": 5.532092104626185e-08, "loss": 0.0243, "step": 33039 }, { "epoch": 3.9179414206095102, "grad_norm": 0.5176971796482911, "learning_rate": 5.516143859670142e-08, "loss": 0.0244, "step": 33040 }, { "epoch": 3.918060002371635, "grad_norm": 0.6249235425650302, "learning_rate": 5.500218610796082e-08, "loss": 0.0191, "step": 33041 }, { "epoch": 3.91817858413376, "grad_norm": 0.7414085517637462, "learning_rate": 5.4843163581513866e-08, "loss": 0.0256, "step": 33042 }, { "epoch": 3.9182971658958854, "grad_norm": 0.4406364333894077, "learning_rate": 5.4684371018820514e-08, "loss": 0.0187, "step": 33043 }, { "epoch": 3.91841574765801, "grad_norm": 0.5575786983776004, "learning_rate": 5.452580842134902e-08, "loss": 0.035, "step": 33044 }, { "epoch": 3.918534329420135, "grad_norm": 0.6746227744512483, "learning_rate": 5.4367475790559344e-08, "loss": 0.022, "step": 33045 }, { "epoch": 3.91865291118226, "grad_norm": 0.5997787448280097, "learning_rate": 5.4209373127908635e-08, "loss": 0.0246, "step": 33046 }, { "epoch": 3.9187714929443853, "grad_norm": 0.30636786611899997, "learning_rate": 5.405150043485963e-08, "loss": 0.0128, "step": 33047 }, { "epoch": 3.91889007470651, "grad_norm": 0.5389069366976549, "learning_rate": 5.3893857712863924e-08, "loss": 0.0199, "step": 33048 }, { "epoch": 3.9190086564686353, "grad_norm": 0.5271612477935981, "learning_rate": 5.373644496337593e-08, "loss": 0.0308, "step": 33049 }, { "epoch": 3.91912723823076, "grad_norm": 0.7005190947108599, "learning_rate": 5.3579262187847255e-08, "loss": 0.0287, "step": 33050 }, { "epoch": 3.9192458199928852, "grad_norm": 0.5616150000492885, "learning_rate": 5.342230938772674e-08, "loss": 0.0269, "step": 33051 }, { "epoch": 3.91936440175501, "grad_norm": 0.5145694922688626, "learning_rate": 5.3265586564460456e-08, "loss": 0.0208, "step": 33052 }, { "epoch": 3.919482983517135, "grad_norm": 0.521885830734472, "learning_rate": 5.3109093719494464e-08, "loss": 0.0197, "step": 33053 }, { "epoch": 3.91960156527926, "grad_norm": 0.7018333265089138, "learning_rate": 5.295283085426927e-08, "loss": 0.0281, "step": 33054 }, { "epoch": 3.919720147041385, "grad_norm": 0.6563639111464061, "learning_rate": 5.279679797022818e-08, "loss": 0.0228, "step": 33055 }, { "epoch": 3.91983872880351, "grad_norm": 0.6434116737410002, "learning_rate": 5.264099506880616e-08, "loss": 0.0337, "step": 33056 }, { "epoch": 3.919957310565635, "grad_norm": 0.8529126073560828, "learning_rate": 5.24854221514437e-08, "loss": 0.0377, "step": 33057 }, { "epoch": 3.92007589232776, "grad_norm": 0.4774569633002864, "learning_rate": 5.2330079219573004e-08, "loss": 0.0195, "step": 33058 }, { "epoch": 3.920194474089885, "grad_norm": 0.36863473623174137, "learning_rate": 5.217496627462626e-08, "loss": 0.0187, "step": 33059 }, { "epoch": 3.92031305585201, "grad_norm": 0.9060294105479724, "learning_rate": 5.2020083318032875e-08, "loss": 0.0227, "step": 33060 }, { "epoch": 3.920431637614135, "grad_norm": 0.4924065087439102, "learning_rate": 5.186543035121949e-08, "loss": 0.0247, "step": 33061 }, { "epoch": 3.92055021937626, "grad_norm": 0.6196460055036818, "learning_rate": 5.1711007375615514e-08, "loss": 0.0181, "step": 33062 }, { "epoch": 3.920668801138385, "grad_norm": 0.5518588333603022, "learning_rate": 5.155681439264204e-08, "loss": 0.0194, "step": 33063 }, { "epoch": 3.9207873829005098, "grad_norm": 0.4196004198953665, "learning_rate": 5.140285140372292e-08, "loss": 0.018, "step": 33064 }, { "epoch": 3.920905964662635, "grad_norm": 0.7333077094798433, "learning_rate": 5.124911841027091e-08, "loss": 0.0574, "step": 33065 }, { "epoch": 3.9210245464247597, "grad_norm": 0.5469766726345228, "learning_rate": 5.109561541371266e-08, "loss": 0.0247, "step": 33066 }, { "epoch": 3.921143128186885, "grad_norm": 0.5762327811745762, "learning_rate": 5.094234241545537e-08, "loss": 0.0214, "step": 33067 }, { "epoch": 3.9212617099490097, "grad_norm": 0.4262736839153872, "learning_rate": 5.078929941691457e-08, "loss": 0.0194, "step": 33068 }, { "epoch": 3.921380291711135, "grad_norm": 0.7585186058562639, "learning_rate": 5.063648641950303e-08, "loss": 0.0426, "step": 33069 }, { "epoch": 3.9214988734732596, "grad_norm": 0.44931001453646896, "learning_rate": 5.048390342462794e-08, "loss": 0.025, "step": 33070 }, { "epoch": 3.921617455235385, "grad_norm": 0.6521546841017399, "learning_rate": 5.0331550433696525e-08, "loss": 0.0345, "step": 33071 }, { "epoch": 3.92173603699751, "grad_norm": 0.477448628083289, "learning_rate": 5.0179427448113215e-08, "loss": 0.0198, "step": 33072 }, { "epoch": 3.921854618759635, "grad_norm": 0.7874420827649931, "learning_rate": 5.002753446927966e-08, "loss": 0.0285, "step": 33073 }, { "epoch": 3.9219732005217596, "grad_norm": 0.45961007875840815, "learning_rate": 4.987587149859751e-08, "loss": 0.0162, "step": 33074 }, { "epoch": 3.9220917822838848, "grad_norm": 0.505388721512239, "learning_rate": 4.972443853746289e-08, "loss": 0.0234, "step": 33075 }, { "epoch": 3.92221036404601, "grad_norm": 0.3108187798821018, "learning_rate": 4.957323558727189e-08, "loss": 0.0098, "step": 33076 }, { "epoch": 3.9223289458081347, "grad_norm": 0.4780725380399818, "learning_rate": 4.9422262649420625e-08, "loss": 0.0213, "step": 33077 }, { "epoch": 3.9224475275702595, "grad_norm": 0.3568838776075792, "learning_rate": 4.9271519725299643e-08, "loss": 0.0124, "step": 33078 }, { "epoch": 3.9225661093323847, "grad_norm": 0.33366859585047653, "learning_rate": 4.91210068162995e-08, "loss": 0.0155, "step": 33079 }, { "epoch": 3.92268469109451, "grad_norm": 0.4113984607022125, "learning_rate": 4.89707239238052e-08, "loss": 0.0194, "step": 33080 }, { "epoch": 3.9228032728566347, "grad_norm": 0.5028827417762474, "learning_rate": 4.8820671049204516e-08, "loss": 0.0188, "step": 33081 }, { "epoch": 3.9229218546187594, "grad_norm": 0.5877262686492479, "learning_rate": 4.867084819387968e-08, "loss": 0.0277, "step": 33082 }, { "epoch": 3.9230404363808846, "grad_norm": 0.7963123886228259, "learning_rate": 4.852125535921292e-08, "loss": 0.0471, "step": 33083 }, { "epoch": 3.92315901814301, "grad_norm": 0.3758374282047978, "learning_rate": 4.837189254658092e-08, "loss": 0.0151, "step": 33084 }, { "epoch": 3.9232775999051346, "grad_norm": 0.4317499916739273, "learning_rate": 4.82227597573659e-08, "loss": 0.0219, "step": 33085 }, { "epoch": 3.9233961816672593, "grad_norm": 0.9642844808740944, "learning_rate": 4.8073856992936203e-08, "loss": 0.0467, "step": 33086 }, { "epoch": 3.9235147634293845, "grad_norm": 0.39848350380435915, "learning_rate": 4.792518425466852e-08, "loss": 0.0158, "step": 33087 }, { "epoch": 3.9236333451915097, "grad_norm": 1.0079605291189995, "learning_rate": 4.777674154393119e-08, "loss": 0.0377, "step": 33088 }, { "epoch": 3.9237519269536345, "grad_norm": 0.37837960293550066, "learning_rate": 4.762852886209535e-08, "loss": 0.0167, "step": 33089 }, { "epoch": 3.9238705087157593, "grad_norm": 0.6375981734358109, "learning_rate": 4.748054621052655e-08, "loss": 0.0235, "step": 33090 }, { "epoch": 3.9239890904778845, "grad_norm": 0.7799259297241589, "learning_rate": 4.733279359058762e-08, "loss": 0.0319, "step": 33091 }, { "epoch": 3.9241076722400097, "grad_norm": 0.7676675618908122, "learning_rate": 4.718527100364134e-08, "loss": 0.0337, "step": 33092 }, { "epoch": 3.9242262540021344, "grad_norm": 0.7007096411438644, "learning_rate": 4.703797845104774e-08, "loss": 0.03, "step": 33093 }, { "epoch": 3.924344835764259, "grad_norm": 0.5762635043344015, "learning_rate": 4.689091593416406e-08, "loss": 0.0224, "step": 33094 }, { "epoch": 3.9244634175263844, "grad_norm": 0.5435145034750177, "learning_rate": 4.6744083454344775e-08, "loss": 0.0206, "step": 33095 }, { "epoch": 3.9245819992885096, "grad_norm": 0.3934901676837269, "learning_rate": 4.6597481012947145e-08, "loss": 0.0157, "step": 33096 }, { "epoch": 3.9247005810506344, "grad_norm": 0.3462056194362654, "learning_rate": 4.645110861132285e-08, "loss": 0.0106, "step": 33097 }, { "epoch": 3.9248191628127596, "grad_norm": 0.4266725728929685, "learning_rate": 4.6304966250818035e-08, "loss": 0.0242, "step": 33098 }, { "epoch": 3.9249377445748843, "grad_norm": 0.6381850341844583, "learning_rate": 4.6159053932778864e-08, "loss": 0.0288, "step": 33099 }, { "epoch": 3.9250563263370095, "grad_norm": 0.5430919230920782, "learning_rate": 4.601337165855424e-08, "loss": 0.0284, "step": 33100 }, { "epoch": 3.9251749080991343, "grad_norm": 0.7194125584293024, "learning_rate": 4.586791942948754e-08, "loss": 0.0342, "step": 33101 }, { "epoch": 3.9252934898612595, "grad_norm": 0.6270654073822307, "learning_rate": 4.572269724691658e-08, "loss": 0.0248, "step": 33102 }, { "epoch": 3.9254120716233842, "grad_norm": 0.432857286158678, "learning_rate": 4.557770511217918e-08, "loss": 0.0205, "step": 33103 }, { "epoch": 3.9255306533855094, "grad_norm": 0.6419981750018985, "learning_rate": 4.543294302661594e-08, "loss": 0.0262, "step": 33104 }, { "epoch": 3.925649235147634, "grad_norm": 0.7567140976746265, "learning_rate": 4.5288410991561894e-08, "loss": 0.0372, "step": 33105 }, { "epoch": 3.9257678169097594, "grad_norm": 0.8283006941233774, "learning_rate": 4.514410900834376e-08, "loss": 0.0396, "step": 33106 }, { "epoch": 3.925886398671884, "grad_norm": 0.5934859873797844, "learning_rate": 4.5000037078296584e-08, "loss": 0.0281, "step": 33107 }, { "epoch": 3.9260049804340094, "grad_norm": 0.42448996525031696, "learning_rate": 4.4856195202747084e-08, "loss": 0.019, "step": 33108 }, { "epoch": 3.926123562196134, "grad_norm": 0.41466429825121087, "learning_rate": 4.471258338302198e-08, "loss": 0.0177, "step": 33109 }, { "epoch": 3.9262421439582593, "grad_norm": 0.9533245004341644, "learning_rate": 4.45692016204452e-08, "loss": 0.0515, "step": 33110 }, { "epoch": 3.926360725720384, "grad_norm": 0.4488598396655786, "learning_rate": 4.442604991633792e-08, "loss": 0.0146, "step": 33111 }, { "epoch": 3.9264793074825093, "grad_norm": 0.5498208770290471, "learning_rate": 4.4283128272018524e-08, "loss": 0.0295, "step": 33112 }, { "epoch": 3.926597889244634, "grad_norm": 0.898604253435429, "learning_rate": 4.414043668880818e-08, "loss": 0.0323, "step": 33113 }, { "epoch": 3.9267164710067592, "grad_norm": 0.30873680916875024, "learning_rate": 4.3997975168016956e-08, "loss": 0.011, "step": 33114 }, { "epoch": 3.926835052768884, "grad_norm": 0.5348067234472883, "learning_rate": 4.385574371096324e-08, "loss": 0.0237, "step": 33115 }, { "epoch": 3.926953634531009, "grad_norm": 0.70168107970285, "learning_rate": 4.371374231895431e-08, "loss": 0.0289, "step": 33116 }, { "epoch": 3.927072216293134, "grad_norm": 0.35371884537521286, "learning_rate": 4.3571970993303015e-08, "loss": 0.0144, "step": 33117 }, { "epoch": 3.927190798055259, "grad_norm": 0.5624580965141538, "learning_rate": 4.3430429735316634e-08, "loss": 0.0267, "step": 33118 }, { "epoch": 3.927309379817384, "grad_norm": 0.3965053636727945, "learning_rate": 4.328911854629414e-08, "loss": 0.0097, "step": 33119 }, { "epoch": 3.927427961579509, "grad_norm": 0.46511595900883246, "learning_rate": 4.314803742754558e-08, "loss": 0.0205, "step": 33120 }, { "epoch": 3.9275465433416343, "grad_norm": 0.5570656393182959, "learning_rate": 4.300718638036438e-08, "loss": 0.0207, "step": 33121 }, { "epoch": 3.927665125103759, "grad_norm": 0.6636079466716717, "learning_rate": 4.286656540605505e-08, "loss": 0.034, "step": 33122 }, { "epoch": 3.927783706865884, "grad_norm": 0.8799056143492018, "learning_rate": 4.2726174505910986e-08, "loss": 0.0409, "step": 33123 }, { "epoch": 3.927902288628009, "grad_norm": 0.6181368863590084, "learning_rate": 4.258601368122839e-08, "loss": 0.0306, "step": 33124 }, { "epoch": 3.9280208703901343, "grad_norm": 0.450699844113969, "learning_rate": 4.244608293329511e-08, "loss": 0.0287, "step": 33125 }, { "epoch": 3.928139452152259, "grad_norm": 0.377954395457792, "learning_rate": 4.230638226340455e-08, "loss": 0.0158, "step": 33126 }, { "epoch": 3.9282580339143838, "grad_norm": 0.3398317873758562, "learning_rate": 4.216691167284459e-08, "loss": 0.0139, "step": 33127 }, { "epoch": 3.928376615676509, "grad_norm": 0.6318808930145723, "learning_rate": 4.202767116290029e-08, "loss": 0.0348, "step": 33128 }, { "epoch": 3.928495197438634, "grad_norm": 0.6722589274244242, "learning_rate": 4.188866073485676e-08, "loss": 0.0364, "step": 33129 }, { "epoch": 3.928613779200759, "grad_norm": 0.7765680709835717, "learning_rate": 4.1749880389990726e-08, "loss": 0.0309, "step": 33130 }, { "epoch": 3.9287323609628837, "grad_norm": 0.5440263888052566, "learning_rate": 4.1611330129590066e-08, "loss": 0.0316, "step": 33131 }, { "epoch": 3.928850942725009, "grad_norm": 0.8957897067697977, "learning_rate": 4.14730099549232e-08, "loss": 0.0342, "step": 33132 }, { "epoch": 3.928969524487134, "grad_norm": 0.7105814482141289, "learning_rate": 4.133491986726967e-08, "loss": 0.027, "step": 33133 }, { "epoch": 3.929088106249259, "grad_norm": 0.475778516535104, "learning_rate": 4.119705986790068e-08, "loss": 0.0279, "step": 33134 }, { "epoch": 3.9292066880113836, "grad_norm": 0.8298778691106624, "learning_rate": 4.10594299580902e-08, "loss": 0.0427, "step": 33135 }, { "epoch": 3.929325269773509, "grad_norm": 0.5808850713305251, "learning_rate": 4.09220301391039e-08, "loss": 0.0272, "step": 33136 }, { "epoch": 3.929443851535634, "grad_norm": 0.5079665699685264, "learning_rate": 4.0784860412212964e-08, "loss": 0.0207, "step": 33137 }, { "epoch": 3.929562433297759, "grad_norm": 0.5309765293603176, "learning_rate": 4.0647920778674743e-08, "loss": 0.0207, "step": 33138 }, { "epoch": 3.9296810150598835, "grad_norm": 0.43936324454081127, "learning_rate": 4.0511211239757654e-08, "loss": 0.0213, "step": 33139 }, { "epoch": 3.9297995968220087, "grad_norm": 0.48872366741690554, "learning_rate": 4.0374731796716245e-08, "loss": 0.0155, "step": 33140 }, { "epoch": 3.929918178584134, "grad_norm": 0.3603992114630368, "learning_rate": 4.023848245081618e-08, "loss": 0.0139, "step": 33141 }, { "epoch": 3.9300367603462587, "grad_norm": 0.43415684891731093, "learning_rate": 4.010246320330924e-08, "loss": 0.0175, "step": 33142 }, { "epoch": 3.9301553421083835, "grad_norm": 0.45155047569634915, "learning_rate": 3.9966674055447185e-08, "loss": 0.0196, "step": 33143 }, { "epoch": 3.9302739238705087, "grad_norm": 1.0040246016267995, "learning_rate": 3.983111500848735e-08, "loss": 0.0568, "step": 33144 }, { "epoch": 3.930392505632634, "grad_norm": 0.6125714224414741, "learning_rate": 3.9695786063675965e-08, "loss": 0.0306, "step": 33145 }, { "epoch": 3.9305110873947586, "grad_norm": 0.50315825378595, "learning_rate": 3.956068722225925e-08, "loss": 0.025, "step": 33146 }, { "epoch": 3.930629669156884, "grad_norm": 0.5753798411477716, "learning_rate": 3.942581848548621e-08, "loss": 0.0226, "step": 33147 }, { "epoch": 3.9307482509190086, "grad_norm": 0.508628078965429, "learning_rate": 3.929117985459751e-08, "loss": 0.0131, "step": 33148 }, { "epoch": 3.930866832681134, "grad_norm": 0.8783409294940893, "learning_rate": 3.915677133083662e-08, "loss": 0.0353, "step": 33149 }, { "epoch": 3.9309854144432586, "grad_norm": 0.5099467611711603, "learning_rate": 3.902259291543864e-08, "loss": 0.021, "step": 33150 }, { "epoch": 3.9311039962053838, "grad_norm": 0.5908223496661885, "learning_rate": 3.888864460964425e-08, "loss": 0.0271, "step": 33151 }, { "epoch": 3.9312225779675085, "grad_norm": 0.6108463436131902, "learning_rate": 3.8754926414688585e-08, "loss": 0.0248, "step": 33152 }, { "epoch": 3.9313411597296337, "grad_norm": 0.4772747587491697, "learning_rate": 3.862143833180121e-08, "loss": 0.0257, "step": 33153 }, { "epoch": 3.9314597414917585, "grad_norm": 0.8451223038279385, "learning_rate": 3.848818036221724e-08, "loss": 0.0389, "step": 33154 }, { "epoch": 3.9315783232538837, "grad_norm": 0.9885428711235217, "learning_rate": 3.835515250716071e-08, "loss": 0.0363, "step": 33155 }, { "epoch": 3.9316969050160084, "grad_norm": 0.6691829432019198, "learning_rate": 3.8222354767858405e-08, "loss": 0.0345, "step": 33156 }, { "epoch": 3.9318154867781336, "grad_norm": 0.48614569565631255, "learning_rate": 3.808978714553713e-08, "loss": 0.0212, "step": 33157 }, { "epoch": 3.9319340685402584, "grad_norm": 0.7046640849096598, "learning_rate": 3.7957449641418116e-08, "loss": 0.0239, "step": 33158 }, { "epoch": 3.9320526503023836, "grad_norm": 0.49734972294694013, "learning_rate": 3.782534225671985e-08, "loss": 0.0211, "step": 33159 }, { "epoch": 3.9321712320645084, "grad_norm": 0.6250097939477269, "learning_rate": 3.76934649926608e-08, "loss": 0.0253, "step": 33160 }, { "epoch": 3.9322898138266336, "grad_norm": 0.7244424407115737, "learning_rate": 3.756181785045943e-08, "loss": 0.0322, "step": 33161 }, { "epoch": 3.9324083955887583, "grad_norm": 0.6234903433678087, "learning_rate": 3.743040083132587e-08, "loss": 0.0315, "step": 33162 }, { "epoch": 3.9325269773508835, "grad_norm": 0.596452013742019, "learning_rate": 3.729921393647306e-08, "loss": 0.0264, "step": 33163 }, { "epoch": 3.9326455591130083, "grad_norm": 0.6640776938425151, "learning_rate": 3.716825716711114e-08, "loss": 0.0294, "step": 33164 }, { "epoch": 3.9327641408751335, "grad_norm": 0.4346146813307494, "learning_rate": 3.70375305244447e-08, "loss": 0.0239, "step": 33165 }, { "epoch": 3.9328827226372582, "grad_norm": 0.4611357728695404, "learning_rate": 3.6907034009681095e-08, "loss": 0.0206, "step": 33166 }, { "epoch": 3.9330013043993834, "grad_norm": 0.6019266500127408, "learning_rate": 3.6776767624022157e-08, "loss": 0.0238, "step": 33167 }, { "epoch": 3.933119886161508, "grad_norm": 0.5603044402884042, "learning_rate": 3.6646731368672474e-08, "loss": 0.0176, "step": 33168 }, { "epoch": 3.9332384679236334, "grad_norm": 0.49162346613557406, "learning_rate": 3.651692524482553e-08, "loss": 0.0184, "step": 33169 }, { "epoch": 3.933357049685758, "grad_norm": 0.4850518289455356, "learning_rate": 3.638734925368037e-08, "loss": 0.0174, "step": 33170 }, { "epoch": 3.9334756314478834, "grad_norm": 0.42144445058179075, "learning_rate": 3.625800339643048e-08, "loss": 0.0194, "step": 33171 }, { "epoch": 3.933594213210008, "grad_norm": 0.3621577943795314, "learning_rate": 3.612888767427214e-08, "loss": 0.0196, "step": 33172 }, { "epoch": 3.9337127949721333, "grad_norm": 0.3713833232364303, "learning_rate": 3.6000002088390494e-08, "loss": 0.0179, "step": 33173 }, { "epoch": 3.9338313767342585, "grad_norm": 0.6192841690748263, "learning_rate": 3.587134663997627e-08, "loss": 0.0317, "step": 33174 }, { "epoch": 3.9339499584963833, "grad_norm": 0.547089258566546, "learning_rate": 3.574292133021462e-08, "loss": 0.0349, "step": 33175 }, { "epoch": 3.934068540258508, "grad_norm": 0.49308723945741006, "learning_rate": 3.561472616029071e-08, "loss": 0.0138, "step": 33176 }, { "epoch": 3.9341871220206333, "grad_norm": 0.42595721377571655, "learning_rate": 3.548676113138694e-08, "loss": 0.0145, "step": 33177 }, { "epoch": 3.9343057037827585, "grad_norm": 0.29605988085380397, "learning_rate": 3.5359026244680125e-08, "loss": 0.012, "step": 33178 }, { "epoch": 3.934424285544883, "grad_norm": 0.96150257037187, "learning_rate": 3.52315215013499e-08, "loss": 0.0358, "step": 33179 }, { "epoch": 3.934542867307008, "grad_norm": 0.36970466754519, "learning_rate": 3.5104246902570304e-08, "loss": 0.0159, "step": 33180 }, { "epoch": 3.934661449069133, "grad_norm": 0.4131746450955399, "learning_rate": 3.497720244951819e-08, "loss": 0.0203, "step": 33181 }, { "epoch": 3.9347800308312584, "grad_norm": 0.6275786783352804, "learning_rate": 3.485038814335928e-08, "loss": 0.0291, "step": 33182 }, { "epoch": 3.934898612593383, "grad_norm": 0.7016730013552248, "learning_rate": 3.4723803985267646e-08, "loss": 0.0289, "step": 33183 }, { "epoch": 3.935017194355508, "grad_norm": 0.3518111509736676, "learning_rate": 3.4597449976406235e-08, "loss": 0.0165, "step": 33184 }, { "epoch": 3.935135776117633, "grad_norm": 0.4934906830594519, "learning_rate": 3.447132611794357e-08, "loss": 0.0214, "step": 33185 }, { "epoch": 3.9352543578797583, "grad_norm": 0.5317027999226017, "learning_rate": 3.4345432411039823e-08, "loss": 0.025, "step": 33186 }, { "epoch": 3.935372939641883, "grad_norm": 0.4185212045033872, "learning_rate": 3.4219768856855185e-08, "loss": 0.0168, "step": 33187 }, { "epoch": 3.935491521404008, "grad_norm": 1.0901101654485073, "learning_rate": 3.409433545654983e-08, "loss": 0.0644, "step": 33188 }, { "epoch": 3.935610103166133, "grad_norm": 0.4950595269162272, "learning_rate": 3.39691322112784e-08, "loss": 0.0195, "step": 33189 }, { "epoch": 3.9357286849282582, "grad_norm": 0.380873052551322, "learning_rate": 3.3844159122198296e-08, "loss": 0.016, "step": 33190 }, { "epoch": 3.935847266690383, "grad_norm": 0.6741907057843779, "learning_rate": 3.37194161904586e-08, "loss": 0.0348, "step": 33191 }, { "epoch": 3.9359658484525077, "grad_norm": 0.4640308589084602, "learning_rate": 3.359490341721116e-08, "loss": 0.0166, "step": 33192 }, { "epoch": 3.936084430214633, "grad_norm": 0.892941326586008, "learning_rate": 3.3470620803602306e-08, "loss": 0.0355, "step": 33193 }, { "epoch": 3.936203011976758, "grad_norm": 0.6496688166227542, "learning_rate": 3.3346568350778315e-08, "loss": 0.035, "step": 33194 }, { "epoch": 3.936321593738883, "grad_norm": 0.5356287982132373, "learning_rate": 3.322274605988274e-08, "loss": 0.0315, "step": 33195 }, { "epoch": 3.936440175501008, "grad_norm": 0.5782738800398083, "learning_rate": 3.309915393205632e-08, "loss": 0.0301, "step": 33196 }, { "epoch": 3.936558757263133, "grad_norm": 0.5325688129813458, "learning_rate": 3.297579196843981e-08, "loss": 0.0302, "step": 33197 }, { "epoch": 3.936677339025258, "grad_norm": 0.42190219053246064, "learning_rate": 3.285266017017119e-08, "loss": 0.015, "step": 33198 }, { "epoch": 3.936795920787383, "grad_norm": 0.42001835036684787, "learning_rate": 3.2729758538382894e-08, "loss": 0.0264, "step": 33199 }, { "epoch": 3.936914502549508, "grad_norm": 0.4094001926428675, "learning_rate": 3.2607087074207346e-08, "loss": 0.0223, "step": 33200 }, { "epoch": 3.937033084311633, "grad_norm": 0.6784604467326373, "learning_rate": 3.2484645778779746e-08, "loss": 0.0275, "step": 33201 }, { "epoch": 3.937151666073758, "grad_norm": 0.3585719687994774, "learning_rate": 3.2362434653226967e-08, "loss": 0.0129, "step": 33202 }, { "epoch": 3.9372702478358828, "grad_norm": 0.6700547404098471, "learning_rate": 3.22404536986759e-08, "loss": 0.0223, "step": 33203 }, { "epoch": 3.937388829598008, "grad_norm": 0.5115528287731742, "learning_rate": 3.2118702916247856e-08, "loss": 0.0248, "step": 33204 }, { "epoch": 3.9375074113601327, "grad_norm": 0.5882074965976071, "learning_rate": 3.1997182307069716e-08, "loss": 0.0239, "step": 33205 }, { "epoch": 3.937625993122258, "grad_norm": 0.6998102788630138, "learning_rate": 3.187589187226003e-08, "loss": 0.0262, "step": 33206 }, { "epoch": 3.9377445748843827, "grad_norm": 0.6962159479292769, "learning_rate": 3.175483161293458e-08, "loss": 0.0319, "step": 33207 }, { "epoch": 3.937863156646508, "grad_norm": 0.5348124449723833, "learning_rate": 3.16340015302119e-08, "loss": 0.0315, "step": 33208 }, { "epoch": 3.9379817384086326, "grad_norm": 0.545807237578722, "learning_rate": 3.1513401625207775e-08, "loss": 0.0192, "step": 33209 }, { "epoch": 3.938100320170758, "grad_norm": 0.8783873081406167, "learning_rate": 3.139303189902965e-08, "loss": 0.0314, "step": 33210 }, { "epoch": 3.9382189019328826, "grad_norm": 0.23749680965276923, "learning_rate": 3.1272892352790516e-08, "loss": 0.0116, "step": 33211 }, { "epoch": 3.938337483695008, "grad_norm": 0.48222531178420325, "learning_rate": 3.1152982987595056e-08, "loss": 0.0181, "step": 33212 }, { "epoch": 3.9384560654571326, "grad_norm": 0.4759686117970928, "learning_rate": 3.1033303804550716e-08, "loss": 0.0221, "step": 33213 }, { "epoch": 3.9385746472192578, "grad_norm": 0.5066721080479306, "learning_rate": 3.0913854804762166e-08, "loss": 0.0234, "step": 33214 }, { "epoch": 3.9386932289813825, "grad_norm": 0.3748370218456643, "learning_rate": 3.0794635989325746e-08, "loss": 0.0189, "step": 33215 }, { "epoch": 3.9388118107435077, "grad_norm": 0.46791119341937715, "learning_rate": 3.067564735934614e-08, "loss": 0.0256, "step": 33216 }, { "epoch": 3.9389303925056325, "grad_norm": 1.0329490327700166, "learning_rate": 3.055688891591691e-08, "loss": 0.0433, "step": 33217 }, { "epoch": 3.9390489742677577, "grad_norm": 0.5154009691289044, "learning_rate": 3.0438360660131614e-08, "loss": 0.0246, "step": 33218 }, { "epoch": 3.9391675560298824, "grad_norm": 0.38167205969045503, "learning_rate": 3.0320062593086616e-08, "loss": 0.0183, "step": 33219 }, { "epoch": 3.9392861377920076, "grad_norm": 0.49190438906332634, "learning_rate": 3.020199471587271e-08, "loss": 0.0169, "step": 33220 }, { "epoch": 3.9394047195541324, "grad_norm": 0.5742385634697146, "learning_rate": 3.008415702957512e-08, "loss": 0.024, "step": 33221 }, { "epoch": 3.9395233013162576, "grad_norm": 0.31777270664336815, "learning_rate": 2.996654953527911e-08, "loss": 0.013, "step": 33222 }, { "epoch": 3.939641883078383, "grad_norm": 0.41715105254759793, "learning_rate": 2.9849172234072685e-08, "loss": 0.0194, "step": 33223 }, { "epoch": 3.9397604648405076, "grad_norm": 0.567957809232153, "learning_rate": 2.973202512703832e-08, "loss": 0.0284, "step": 33224 }, { "epoch": 3.9398790466026323, "grad_norm": 0.4615896130847188, "learning_rate": 2.9615108215252928e-08, "loss": 0.0249, "step": 33225 }, { "epoch": 3.9399976283647575, "grad_norm": 0.7999447307807168, "learning_rate": 2.9498421499793427e-08, "loss": 0.036, "step": 33226 }, { "epoch": 3.9401162101268827, "grad_norm": 0.6379839802148768, "learning_rate": 2.9381964981739508e-08, "loss": 0.0309, "step": 33227 }, { "epoch": 3.9402347918890075, "grad_norm": 0.47007058413795816, "learning_rate": 2.9265738662162535e-08, "loss": 0.021, "step": 33228 }, { "epoch": 3.9403533736511323, "grad_norm": 0.514948537388906, "learning_rate": 2.914974254213665e-08, "loss": 0.0211, "step": 33229 }, { "epoch": 3.9404719554132575, "grad_norm": 0.5078316225566771, "learning_rate": 2.903397662272489e-08, "loss": 0.0266, "step": 33230 }, { "epoch": 3.9405905371753827, "grad_norm": 0.5115989437419066, "learning_rate": 2.89184409050014e-08, "loss": 0.0211, "step": 33231 }, { "epoch": 3.9407091189375074, "grad_norm": 0.4907206280381743, "learning_rate": 2.8803135390026435e-08, "loss": 0.0263, "step": 33232 }, { "epoch": 3.940827700699632, "grad_norm": 0.595902187536758, "learning_rate": 2.868806007886582e-08, "loss": 0.0212, "step": 33233 }, { "epoch": 3.9409462824617574, "grad_norm": 0.3440481870762188, "learning_rate": 2.857321497257981e-08, "loss": 0.0125, "step": 33234 }, { "epoch": 3.9410648642238826, "grad_norm": 0.38337261032538417, "learning_rate": 2.8458600072225892e-08, "loss": 0.0175, "step": 33235 }, { "epoch": 3.9411834459860073, "grad_norm": 0.4664249317528855, "learning_rate": 2.834421537886156e-08, "loss": 0.0191, "step": 33236 }, { "epoch": 3.941302027748132, "grad_norm": 0.7826354055009099, "learning_rate": 2.8230060893541523e-08, "loss": 0.0299, "step": 33237 }, { "epoch": 3.9414206095102573, "grad_norm": 0.7336010688011343, "learning_rate": 2.8116136617317713e-08, "loss": 0.0449, "step": 33238 }, { "epoch": 3.9415391912723825, "grad_norm": 0.5086041149943982, "learning_rate": 2.800244255124207e-08, "loss": 0.0274, "step": 33239 }, { "epoch": 3.9416577730345073, "grad_norm": 0.475915966515898, "learning_rate": 2.7888978696360978e-08, "loss": 0.0228, "step": 33240 }, { "epoch": 3.941776354796632, "grad_norm": 0.4976317682800137, "learning_rate": 2.777574505371805e-08, "loss": 0.018, "step": 33241 }, { "epoch": 3.9418949365587572, "grad_norm": 0.6511886869805208, "learning_rate": 2.7662741624362442e-08, "loss": 0.0315, "step": 33242 }, { "epoch": 3.9420135183208824, "grad_norm": 0.5654749039515734, "learning_rate": 2.7549968409332217e-08, "loss": 0.0315, "step": 33243 }, { "epoch": 3.942132100083007, "grad_norm": 0.4326641467331919, "learning_rate": 2.7437425409668206e-08, "loss": 0.0195, "step": 33244 }, { "epoch": 3.942250681845132, "grad_norm": 0.502028371232107, "learning_rate": 2.732511262640569e-08, "loss": 0.0163, "step": 33245 }, { "epoch": 3.942369263607257, "grad_norm": 0.6385219052127855, "learning_rate": 2.721303006058551e-08, "loss": 0.0345, "step": 33246 }, { "epoch": 3.9424878453693823, "grad_norm": 0.47688329075963737, "learning_rate": 2.7101177713237392e-08, "loss": 0.0157, "step": 33247 }, { "epoch": 3.942606427131507, "grad_norm": 0.44126706985848213, "learning_rate": 2.6989555585388294e-08, "loss": 0.0195, "step": 33248 }, { "epoch": 3.9427250088936323, "grad_norm": 0.8309466778645737, "learning_rate": 2.6878163678076274e-08, "loss": 0.0407, "step": 33249 }, { "epoch": 3.942843590655757, "grad_norm": 0.5929931781748773, "learning_rate": 2.6767001992322736e-08, "loss": 0.0195, "step": 33250 }, { "epoch": 3.9429621724178823, "grad_norm": 0.5525776095818556, "learning_rate": 2.6656070529151868e-08, "loss": 0.0404, "step": 33251 }, { "epoch": 3.943080754180007, "grad_norm": 0.584919513568713, "learning_rate": 2.654536928958784e-08, "loss": 0.0258, "step": 33252 }, { "epoch": 3.9431993359421322, "grad_norm": 0.5403495940871487, "learning_rate": 2.6434898274652063e-08, "loss": 0.0361, "step": 33253 }, { "epoch": 3.943317917704257, "grad_norm": 0.4368226971767452, "learning_rate": 2.6324657485360393e-08, "loss": 0.0247, "step": 33254 }, { "epoch": 3.943436499466382, "grad_norm": 0.4681428419532868, "learning_rate": 2.6214646922731458e-08, "loss": 0.0209, "step": 33255 }, { "epoch": 3.943555081228507, "grad_norm": 0.38246262693066607, "learning_rate": 2.6104866587778332e-08, "loss": 0.0133, "step": 33256 }, { "epoch": 3.943673662990632, "grad_norm": 0.544755696655888, "learning_rate": 2.5995316481514097e-08, "loss": 0.0257, "step": 33257 }, { "epoch": 3.943792244752757, "grad_norm": 0.8447314554344055, "learning_rate": 2.5885996604946282e-08, "loss": 0.0392, "step": 33258 }, { "epoch": 3.943910826514882, "grad_norm": 0.3835102045147337, "learning_rate": 2.5776906959087964e-08, "loss": 0.0201, "step": 33259 }, { "epoch": 3.944029408277007, "grad_norm": 0.5466139857997736, "learning_rate": 2.5668047544938346e-08, "loss": 0.0273, "step": 33260 }, { "epoch": 3.944147990039132, "grad_norm": 0.563439669172558, "learning_rate": 2.5559418363502176e-08, "loss": 0.0244, "step": 33261 }, { "epoch": 3.944266571801257, "grad_norm": 0.5477253181803882, "learning_rate": 2.5451019415784204e-08, "loss": 0.0238, "step": 33262 }, { "epoch": 3.944385153563382, "grad_norm": 0.5118823243822774, "learning_rate": 2.534285070278364e-08, "loss": 0.0237, "step": 33263 }, { "epoch": 3.944503735325507, "grad_norm": 0.3961895110886314, "learning_rate": 2.5234912225494123e-08, "loss": 0.018, "step": 33264 }, { "epoch": 3.944622317087632, "grad_norm": 0.6005938511479746, "learning_rate": 2.5127203984912083e-08, "loss": 0.0296, "step": 33265 }, { "epoch": 3.9447408988497568, "grad_norm": 0.6970616408585286, "learning_rate": 2.501972598203395e-08, "loss": 0.0306, "step": 33266 }, { "epoch": 3.944859480611882, "grad_norm": 0.6508395195034129, "learning_rate": 2.4912478217845037e-08, "loss": 0.0307, "step": 33267 }, { "epoch": 3.9449780623740067, "grad_norm": 0.5737107132881263, "learning_rate": 2.4805460693336225e-08, "loss": 0.0179, "step": 33268 }, { "epoch": 3.945096644136132, "grad_norm": 0.49382860710013043, "learning_rate": 2.469867340949561e-08, "loss": 0.0129, "step": 33269 }, { "epoch": 3.9452152258982567, "grad_norm": 0.5625446173804111, "learning_rate": 2.459211636730574e-08, "loss": 0.0211, "step": 33270 }, { "epoch": 3.945333807660382, "grad_norm": 0.30508586872582927, "learning_rate": 2.4485789567751938e-08, "loss": 0.0126, "step": 33271 }, { "epoch": 3.945452389422507, "grad_norm": 0.5624088330829993, "learning_rate": 2.4379693011808425e-08, "loss": 0.0289, "step": 33272 }, { "epoch": 3.945570971184632, "grad_norm": 0.4603544067624388, "learning_rate": 2.4273826700460523e-08, "loss": 0.0224, "step": 33273 }, { "epoch": 3.9456895529467566, "grad_norm": 0.5426730013010926, "learning_rate": 2.4168190634679676e-08, "loss": 0.0301, "step": 33274 }, { "epoch": 3.945808134708882, "grad_norm": 0.2659185190714421, "learning_rate": 2.406278481544011e-08, "loss": 0.0126, "step": 33275 }, { "epoch": 3.945926716471007, "grad_norm": 0.3460479111094722, "learning_rate": 2.3957609243713262e-08, "loss": 0.0141, "step": 33276 }, { "epoch": 3.9460452982331318, "grad_norm": 0.5686137733389033, "learning_rate": 2.3852663920470585e-08, "loss": 0.0235, "step": 33277 }, { "epoch": 3.9461638799952565, "grad_norm": 0.41371014797544314, "learning_rate": 2.3747948846680746e-08, "loss": 0.024, "step": 33278 }, { "epoch": 3.9462824617573817, "grad_norm": 0.6824427298696607, "learning_rate": 2.3643464023304084e-08, "loss": 0.025, "step": 33279 }, { "epoch": 3.946401043519507, "grad_norm": 0.5075238854029375, "learning_rate": 2.35392094513065e-08, "loss": 0.0215, "step": 33280 }, { "epoch": 3.9465196252816317, "grad_norm": 0.4065955285251573, "learning_rate": 2.3435185131651105e-08, "loss": 0.0172, "step": 33281 }, { "epoch": 3.9466382070437565, "grad_norm": 0.432524007741599, "learning_rate": 2.3331391065292695e-08, "loss": 0.0198, "step": 33282 }, { "epoch": 3.9467567888058817, "grad_norm": 0.4178130462399298, "learning_rate": 2.3227827253191613e-08, "loss": 0.0137, "step": 33283 }, { "epoch": 3.946875370568007, "grad_norm": 0.5069259523306548, "learning_rate": 2.312449369630265e-08, "loss": 0.0179, "step": 33284 }, { "epoch": 3.9469939523301316, "grad_norm": 0.8699350571290507, "learning_rate": 2.3021390395577823e-08, "loss": 0.0443, "step": 33285 }, { "epoch": 3.9471125340922564, "grad_norm": 0.41344177232420504, "learning_rate": 2.2918517351963597e-08, "loss": 0.0247, "step": 33286 }, { "epoch": 3.9472311158543816, "grad_norm": 0.29006115358845885, "learning_rate": 2.2815874566414763e-08, "loss": 0.0112, "step": 33287 }, { "epoch": 3.947349697616507, "grad_norm": 0.6424879791521693, "learning_rate": 2.2713462039872234e-08, "loss": 0.0457, "step": 33288 }, { "epoch": 3.9474682793786315, "grad_norm": 0.5103777512446931, "learning_rate": 2.261127977328248e-08, "loss": 0.0158, "step": 33289 }, { "epoch": 3.9475868611407563, "grad_norm": 0.7432848616512039, "learning_rate": 2.2509327767589183e-08, "loss": 0.0299, "step": 33290 }, { "epoch": 3.9477054429028815, "grad_norm": 0.8017004912995923, "learning_rate": 2.2407606023730486e-08, "loss": 0.0361, "step": 33291 }, { "epoch": 3.9478240246650067, "grad_norm": 0.4372709227726136, "learning_rate": 2.2306114542641753e-08, "loss": 0.0189, "step": 33292 }, { "epoch": 3.9479426064271315, "grad_norm": 0.6276430561386005, "learning_rate": 2.2204853325263896e-08, "loss": 0.0415, "step": 33293 }, { "epoch": 3.948061188189256, "grad_norm": 0.727516067484085, "learning_rate": 2.2103822372523952e-08, "loss": 0.0364, "step": 33294 }, { "epoch": 3.9481797699513814, "grad_norm": 0.43798591668962916, "learning_rate": 2.200302168536006e-08, "loss": 0.0191, "step": 33295 }, { "epoch": 3.9482983517135066, "grad_norm": 0.8125084105221255, "learning_rate": 2.1902451264696477e-08, "loss": 0.0318, "step": 33296 }, { "epoch": 3.9484169334756314, "grad_norm": 0.4753295937839806, "learning_rate": 2.180211111146302e-08, "loss": 0.0196, "step": 33297 }, { "epoch": 3.9485355152377566, "grad_norm": 0.4620646722469581, "learning_rate": 2.1702001226583946e-08, "loss": 0.0208, "step": 33298 }, { "epoch": 3.9486540969998813, "grad_norm": 0.31352129138887713, "learning_rate": 2.1602121610980743e-08, "loss": 0.013, "step": 33299 }, { "epoch": 3.9487726787620065, "grad_norm": 0.3530703397406167, "learning_rate": 2.1502472265577666e-08, "loss": 0.02, "step": 33300 }, { "epoch": 3.9488912605241313, "grad_norm": 0.5246471004380944, "learning_rate": 2.1403053191290655e-08, "loss": 0.024, "step": 33301 }, { "epoch": 3.9490098422862565, "grad_norm": 0.5699306367049539, "learning_rate": 2.1303864389035645e-08, "loss": 0.03, "step": 33302 }, { "epoch": 3.9491284240483813, "grad_norm": 0.44784609706341655, "learning_rate": 2.1204905859728565e-08, "loss": 0.0228, "step": 33303 }, { "epoch": 3.9492470058105065, "grad_norm": 0.6628519838695274, "learning_rate": 2.1106177604279797e-08, "loss": 0.0255, "step": 33304 }, { "epoch": 3.9493655875726312, "grad_norm": 0.7598229157371881, "learning_rate": 2.1007679623602506e-08, "loss": 0.0279, "step": 33305 }, { "epoch": 3.9494841693347564, "grad_norm": 0.3488243689353204, "learning_rate": 2.0909411918601518e-08, "loss": 0.0136, "step": 33306 }, { "epoch": 3.949602751096881, "grad_norm": 0.5978145225080252, "learning_rate": 2.0811374490187218e-08, "loss": 0.0319, "step": 33307 }, { "epoch": 3.9497213328590064, "grad_norm": 0.81646662108966, "learning_rate": 2.0713567339258887e-08, "loss": 0.0278, "step": 33308 }, { "epoch": 3.949839914621131, "grad_norm": 0.2906399380038868, "learning_rate": 2.0615990466718583e-08, "loss": 0.0099, "step": 33309 }, { "epoch": 3.9499584963832564, "grad_norm": 0.7839427778139317, "learning_rate": 2.0518643873468358e-08, "loss": 0.0439, "step": 33310 }, { "epoch": 3.950077078145381, "grad_norm": 0.3802063271237251, "learning_rate": 2.0421527560404718e-08, "loss": 0.0174, "step": 33311 }, { "epoch": 3.9501956599075063, "grad_norm": 0.8334606939550585, "learning_rate": 2.0324641528421395e-08, "loss": 0.0258, "step": 33312 }, { "epoch": 3.950314241669631, "grad_norm": 0.48842002412218066, "learning_rate": 2.0227985778414894e-08, "loss": 0.024, "step": 33313 }, { "epoch": 3.9504328234317563, "grad_norm": 0.34482306787857625, "learning_rate": 2.0131560311273388e-08, "loss": 0.016, "step": 33314 }, { "epoch": 3.950551405193881, "grad_norm": 0.668074916915112, "learning_rate": 2.0035365127885065e-08, "loss": 0.0398, "step": 33315 }, { "epoch": 3.9506699869560062, "grad_norm": 0.7292158189015361, "learning_rate": 1.993940022914087e-08, "loss": 0.0234, "step": 33316 }, { "epoch": 3.950788568718131, "grad_norm": 0.5542903141174368, "learning_rate": 1.984366561592066e-08, "loss": 0.0218, "step": 33317 }, { "epoch": 3.950907150480256, "grad_norm": 0.6983907734907087, "learning_rate": 1.9748161289109836e-08, "loss": 0.0318, "step": 33318 }, { "epoch": 3.951025732242381, "grad_norm": 0.5026446665268934, "learning_rate": 1.9652887249588248e-08, "loss": 0.0229, "step": 33319 }, { "epoch": 3.951144314004506, "grad_norm": 0.45035518577119305, "learning_rate": 1.9557843498232974e-08, "loss": 0.0186, "step": 33320 }, { "epoch": 3.9512628957666314, "grad_norm": 0.8736449790911988, "learning_rate": 1.9463030035923868e-08, "loss": 0.0427, "step": 33321 }, { "epoch": 3.951381477528756, "grad_norm": 0.787652893351557, "learning_rate": 1.9368446863529677e-08, "loss": 0.0472, "step": 33322 }, { "epoch": 3.951500059290881, "grad_norm": 0.6938650175795306, "learning_rate": 1.9274093981927478e-08, "loss": 0.0308, "step": 33323 }, { "epoch": 3.951618641053006, "grad_norm": 0.3992487206463021, "learning_rate": 1.917997139198324e-08, "loss": 0.0177, "step": 33324 }, { "epoch": 3.9517372228151313, "grad_norm": 0.38997967967985364, "learning_rate": 1.9086079094565725e-08, "loss": 0.015, "step": 33325 }, { "epoch": 3.951855804577256, "grad_norm": 0.47620322569555584, "learning_rate": 1.8992417090540893e-08, "loss": 0.0184, "step": 33326 }, { "epoch": 3.951974386339381, "grad_norm": 0.7168950964251504, "learning_rate": 1.8898985380774726e-08, "loss": 0.0283, "step": 33327 }, { "epoch": 3.952092968101506, "grad_norm": 0.5900581676943425, "learning_rate": 1.8805783966124867e-08, "loss": 0.0374, "step": 33328 }, { "epoch": 3.952211549863631, "grad_norm": 0.6795039532718486, "learning_rate": 1.8712812847451745e-08, "loss": 0.0301, "step": 33329 }, { "epoch": 3.952330131625756, "grad_norm": 0.4208519848720337, "learning_rate": 1.8620072025610224e-08, "loss": 0.0199, "step": 33330 }, { "epoch": 3.9524487133878807, "grad_norm": 0.32945032353248216, "learning_rate": 1.8527561501460735e-08, "loss": 0.0154, "step": 33331 }, { "epoch": 3.952567295150006, "grad_norm": 0.523526055687681, "learning_rate": 1.8435281275849815e-08, "loss": 0.0219, "step": 33332 }, { "epoch": 3.952685876912131, "grad_norm": 0.645465703850089, "learning_rate": 1.834323134963234e-08, "loss": 0.0318, "step": 33333 }, { "epoch": 3.952804458674256, "grad_norm": 0.29859890297082187, "learning_rate": 1.825141172365763e-08, "loss": 0.0095, "step": 33334 }, { "epoch": 3.9529230404363807, "grad_norm": 0.3980779321233909, "learning_rate": 1.815982239876668e-08, "loss": 0.0171, "step": 33335 }, { "epoch": 3.953041622198506, "grad_norm": 0.5215748494899981, "learning_rate": 1.8068463375811583e-08, "loss": 0.025, "step": 33336 }, { "epoch": 3.953160203960631, "grad_norm": 0.44206510968084234, "learning_rate": 1.7977334655627787e-08, "loss": 0.0206, "step": 33337 }, { "epoch": 3.953278785722756, "grad_norm": 0.4157283237211688, "learning_rate": 1.788643623905628e-08, "loss": 0.0173, "step": 33338 }, { "epoch": 3.9533973674848806, "grad_norm": 0.34662156852177634, "learning_rate": 1.7795768126940836e-08, "loss": 0.0155, "step": 33339 }, { "epoch": 3.953515949247006, "grad_norm": 0.4503307308227166, "learning_rate": 1.7705330320111348e-08, "loss": 0.019, "step": 33340 }, { "epoch": 3.953634531009131, "grad_norm": 0.48493389639301543, "learning_rate": 1.761512281940325e-08, "loss": 0.0189, "step": 33341 }, { "epoch": 3.9537531127712557, "grad_norm": 0.5958974861723171, "learning_rate": 1.7525145625646444e-08, "loss": 0.0244, "step": 33342 }, { "epoch": 3.9538716945333805, "grad_norm": 0.5784253873447858, "learning_rate": 1.7435398739676367e-08, "loss": 0.0263, "step": 33343 }, { "epoch": 3.9539902762955057, "grad_norm": 0.5940825496065845, "learning_rate": 1.734588216231181e-08, "loss": 0.0304, "step": 33344 }, { "epoch": 3.954108858057631, "grad_norm": 0.5415594843788395, "learning_rate": 1.725659589438544e-08, "loss": 0.0293, "step": 33345 }, { "epoch": 3.9542274398197557, "grad_norm": 0.5961538249479175, "learning_rate": 1.716753993671605e-08, "loss": 0.0206, "step": 33346 }, { "epoch": 3.954346021581881, "grad_norm": 0.4568338727030447, "learning_rate": 1.7078714290127973e-08, "loss": 0.0315, "step": 33347 }, { "epoch": 3.9544646033440056, "grad_norm": 0.7711122450524647, "learning_rate": 1.699011895543723e-08, "loss": 0.04, "step": 33348 }, { "epoch": 3.954583185106131, "grad_norm": 0.6052112368059128, "learning_rate": 1.690175393345983e-08, "loss": 0.0349, "step": 33349 }, { "epoch": 3.9547017668682556, "grad_norm": 0.4233202924285294, "learning_rate": 1.681361922501179e-08, "loss": 0.0224, "step": 33350 }, { "epoch": 3.954820348630381, "grad_norm": 0.7050035963003074, "learning_rate": 1.6725714830909124e-08, "loss": 0.0345, "step": 33351 }, { "epoch": 3.9549389303925055, "grad_norm": 0.2899365796227922, "learning_rate": 1.663804075195674e-08, "loss": 0.0137, "step": 33352 }, { "epoch": 3.9550575121546307, "grad_norm": 0.32790344076510186, "learning_rate": 1.6550596988965107e-08, "loss": 0.0152, "step": 33353 }, { "epoch": 3.9551760939167555, "grad_norm": 0.3674005124781923, "learning_rate": 1.646338354273913e-08, "loss": 0.0156, "step": 33354 }, { "epoch": 3.9552946756788807, "grad_norm": 0.3582819154156955, "learning_rate": 1.6376400414083725e-08, "loss": 0.018, "step": 33355 }, { "epoch": 3.9554132574410055, "grad_norm": 0.4737574410197525, "learning_rate": 1.6289647603803802e-08, "loss": 0.0255, "step": 33356 }, { "epoch": 3.9555318392031307, "grad_norm": 0.586575450901515, "learning_rate": 1.6203125112693174e-08, "loss": 0.0204, "step": 33357 }, { "epoch": 3.9556504209652554, "grad_norm": 0.3313639955578634, "learning_rate": 1.6116832941553973e-08, "loss": 0.0159, "step": 33358 }, { "epoch": 3.9557690027273806, "grad_norm": 0.4763974137478306, "learning_rate": 1.603077109118001e-08, "loss": 0.0141, "step": 33359 }, { "epoch": 3.9558875844895054, "grad_norm": 0.4174554132761619, "learning_rate": 1.5944939562367867e-08, "loss": 0.0178, "step": 33360 }, { "epoch": 3.9560061662516306, "grad_norm": 0.7645576234217413, "learning_rate": 1.5859338355900254e-08, "loss": 0.0337, "step": 33361 }, { "epoch": 3.9561247480137554, "grad_norm": 0.4674668474331656, "learning_rate": 1.5773967472576535e-08, "loss": 0.0232, "step": 33362 }, { "epoch": 3.9562433297758806, "grad_norm": 0.47437316723981426, "learning_rate": 1.5688826913176635e-08, "loss": 0.0241, "step": 33363 }, { "epoch": 3.9563619115380053, "grad_norm": 0.7056316030056391, "learning_rate": 1.5603916678488818e-08, "loss": 0.0266, "step": 33364 }, { "epoch": 3.9564804933001305, "grad_norm": 0.9505253538107804, "learning_rate": 1.5519236769295788e-08, "loss": 0.0398, "step": 33365 }, { "epoch": 3.9565990750622553, "grad_norm": 0.6717216063973027, "learning_rate": 1.5434787186377474e-08, "loss": 0.0452, "step": 33366 }, { "epoch": 3.9567176568243805, "grad_norm": 0.4365995629369219, "learning_rate": 1.535056793051104e-08, "loss": 0.0224, "step": 33367 }, { "epoch": 3.9568362385865052, "grad_norm": 0.718716788459023, "learning_rate": 1.5266579002473636e-08, "loss": 0.0324, "step": 33368 }, { "epoch": 3.9569548203486304, "grad_norm": 0.42188557777499547, "learning_rate": 1.518282040304242e-08, "loss": 0.0169, "step": 33369 }, { "epoch": 3.957073402110755, "grad_norm": 0.40807401040173635, "learning_rate": 1.509929213298622e-08, "loss": 0.0176, "step": 33370 }, { "epoch": 3.9571919838728804, "grad_norm": 0.5190384249680117, "learning_rate": 1.5015994193076642e-08, "loss": 0.0214, "step": 33371 }, { "epoch": 3.957310565635005, "grad_norm": 0.4697425186885614, "learning_rate": 1.4932926584079743e-08, "loss": 0.0203, "step": 33372 }, { "epoch": 3.9574291473971304, "grad_norm": 0.4361057637699822, "learning_rate": 1.4850089306761572e-08, "loss": 0.0263, "step": 33373 }, { "epoch": 3.9575477291592556, "grad_norm": 0.27940200951258337, "learning_rate": 1.4767482361888185e-08, "loss": 0.0154, "step": 33374 }, { "epoch": 3.9576663109213803, "grad_norm": 0.4453220082080854, "learning_rate": 1.4685105750220085e-08, "loss": 0.023, "step": 33375 }, { "epoch": 3.957784892683505, "grad_norm": 0.5712400589838775, "learning_rate": 1.4602959472514999e-08, "loss": 0.0194, "step": 33376 }, { "epoch": 3.9579034744456303, "grad_norm": 0.4303004205702741, "learning_rate": 1.4521043529533429e-08, "loss": 0.0214, "step": 33377 }, { "epoch": 3.9580220562077555, "grad_norm": 0.4917654995034687, "learning_rate": 1.4439357922027553e-08, "loss": 0.0198, "step": 33378 }, { "epoch": 3.9581406379698802, "grad_norm": 0.4143132025571263, "learning_rate": 1.4357902650752319e-08, "loss": 0.0169, "step": 33379 }, { "epoch": 3.958259219732005, "grad_norm": 0.9097792205530535, "learning_rate": 1.4276677716457131e-08, "loss": 0.0301, "step": 33380 }, { "epoch": 3.95837780149413, "grad_norm": 0.4603654500396708, "learning_rate": 1.419568311989139e-08, "loss": 0.0253, "step": 33381 }, { "epoch": 3.9584963832562554, "grad_norm": 0.5931878672442378, "learning_rate": 1.4114918861801717e-08, "loss": 0.0234, "step": 33382 }, { "epoch": 3.95861496501838, "grad_norm": 0.514240767272106, "learning_rate": 1.4034384942931966e-08, "loss": 0.0207, "step": 33383 }, { "epoch": 3.958733546780505, "grad_norm": 0.3286816502405666, "learning_rate": 1.3954081364025985e-08, "loss": 0.015, "step": 33384 }, { "epoch": 3.95885212854263, "grad_norm": 0.437126141347465, "learning_rate": 1.3874008125822069e-08, "loss": 0.0179, "step": 33385 }, { "epoch": 3.9589707103047553, "grad_norm": 0.5696328829027724, "learning_rate": 1.3794165229061296e-08, "loss": 0.0225, "step": 33386 }, { "epoch": 3.95908929206688, "grad_norm": 0.6901394592929107, "learning_rate": 1.3714552674479186e-08, "loss": 0.0364, "step": 33387 }, { "epoch": 3.959207873829005, "grad_norm": 0.41681841502768063, "learning_rate": 1.3635170462808488e-08, "loss": 0.0177, "step": 33388 }, { "epoch": 3.95932645559113, "grad_norm": 0.5214951510369354, "learning_rate": 1.3556018594779174e-08, "loss": 0.0286, "step": 33389 }, { "epoch": 3.9594450373532553, "grad_norm": 0.7024763626693468, "learning_rate": 1.347709707112399e-08, "loss": 0.0343, "step": 33390 }, { "epoch": 3.95956361911538, "grad_norm": 0.6370604862385302, "learning_rate": 1.3398405892570132e-08, "loss": 0.0343, "step": 33391 }, { "epoch": 3.9596822008775048, "grad_norm": 0.6862260007516471, "learning_rate": 1.3319945059842021e-08, "loss": 0.0416, "step": 33392 }, { "epoch": 3.95980078263963, "grad_norm": 0.4361854924546859, "learning_rate": 1.3241714573664077e-08, "loss": 0.0183, "step": 33393 }, { "epoch": 3.959919364401755, "grad_norm": 0.5996192596353614, "learning_rate": 1.316371443475517e-08, "loss": 0.0271, "step": 33394 }, { "epoch": 3.96003794616388, "grad_norm": 0.6004792035393172, "learning_rate": 1.3085944643836946e-08, "loss": 0.0255, "step": 33395 }, { "epoch": 3.960156527926005, "grad_norm": 0.5444938220944684, "learning_rate": 1.3008405201625496e-08, "loss": 0.031, "step": 33396 }, { "epoch": 3.96027510968813, "grad_norm": 0.4203790600809413, "learning_rate": 1.2931096108836915e-08, "loss": 0.0199, "step": 33397 }, { "epoch": 3.960393691450255, "grad_norm": 0.3458964617073445, "learning_rate": 1.2854017366178971e-08, "loss": 0.0161, "step": 33398 }, { "epoch": 3.96051227321238, "grad_norm": 0.5679769464024761, "learning_rate": 1.2777168974367759e-08, "loss": 0.0231, "step": 33399 }, { "epoch": 3.960630854974505, "grad_norm": 0.3978239993186301, "learning_rate": 1.2700550934108268e-08, "loss": 0.0169, "step": 33400 }, { "epoch": 3.96074943673663, "grad_norm": 0.41066305813617915, "learning_rate": 1.2624163246111043e-08, "loss": 0.0157, "step": 33401 }, { "epoch": 3.960868018498755, "grad_norm": 0.3856678166152162, "learning_rate": 1.2548005911075523e-08, "loss": 0.0196, "step": 33402 }, { "epoch": 3.96098660026088, "grad_norm": 0.5160662764001938, "learning_rate": 1.2472078929706699e-08, "loss": 0.0229, "step": 33403 }, { "epoch": 3.961105182023005, "grad_norm": 0.3791959134212932, "learning_rate": 1.2396382302701238e-08, "loss": 0.0174, "step": 33404 }, { "epoch": 3.9612237637851297, "grad_norm": 0.45983013355420516, "learning_rate": 1.2320916030761354e-08, "loss": 0.021, "step": 33405 }, { "epoch": 3.961342345547255, "grad_norm": 0.6512972058913038, "learning_rate": 1.2245680114580938e-08, "loss": 0.0277, "step": 33406 }, { "epoch": 3.9614609273093797, "grad_norm": 0.6359189736217512, "learning_rate": 1.2170674554851103e-08, "loss": 0.0206, "step": 33407 }, { "epoch": 3.961579509071505, "grad_norm": 0.48899038012208884, "learning_rate": 1.2095899352268514e-08, "loss": 0.0221, "step": 33408 }, { "epoch": 3.9616980908336297, "grad_norm": 0.6876925752930285, "learning_rate": 1.2021354507518734e-08, "loss": 0.035, "step": 33409 }, { "epoch": 3.961816672595755, "grad_norm": 0.3535833849806326, "learning_rate": 1.1947040021287325e-08, "loss": 0.0217, "step": 33410 }, { "epoch": 3.9619352543578796, "grad_norm": 0.41881379884328623, "learning_rate": 1.1872955894265402e-08, "loss": 0.0161, "step": 33411 }, { "epoch": 3.962053836120005, "grad_norm": 0.7572290776367461, "learning_rate": 1.1799102127130202e-08, "loss": 0.0383, "step": 33412 }, { "epoch": 3.9621724178821296, "grad_norm": 0.48937851169817354, "learning_rate": 1.1725478720564509e-08, "loss": 0.0278, "step": 33413 }, { "epoch": 3.962290999644255, "grad_norm": 0.6023669444576925, "learning_rate": 1.1652085675248337e-08, "loss": 0.0239, "step": 33414 }, { "epoch": 3.9624095814063796, "grad_norm": 0.5230481508513524, "learning_rate": 1.1578922991856145e-08, "loss": 0.0215, "step": 33415 }, { "epoch": 3.9625281631685048, "grad_norm": 0.3531442888298612, "learning_rate": 1.150599067106517e-08, "loss": 0.0148, "step": 33416 }, { "epoch": 3.9626467449306295, "grad_norm": 0.6198212887241226, "learning_rate": 1.1433288713544321e-08, "loss": 0.0256, "step": 33417 }, { "epoch": 3.9627653266927547, "grad_norm": 0.5082527989387251, "learning_rate": 1.1360817119968058e-08, "loss": 0.0234, "step": 33418 }, { "epoch": 3.9628839084548795, "grad_norm": 0.5295352023797615, "learning_rate": 1.1288575890999742e-08, "loss": 0.0343, "step": 33419 }, { "epoch": 3.9630024902170047, "grad_norm": 0.5478557649355785, "learning_rate": 1.1216565027308278e-08, "loss": 0.0327, "step": 33420 }, { "epoch": 3.9631210719791294, "grad_norm": 0.353763510272433, "learning_rate": 1.1144784529554252e-08, "loss": 0.0151, "step": 33421 }, { "epoch": 3.9632396537412546, "grad_norm": 0.42416826347522707, "learning_rate": 1.1073234398406573e-08, "loss": 0.021, "step": 33422 }, { "epoch": 3.96335823550338, "grad_norm": 0.4495168615381652, "learning_rate": 1.1001914634517497e-08, "loss": 0.0218, "step": 33423 }, { "epoch": 3.9634768172655046, "grad_norm": 0.5203847238053332, "learning_rate": 1.0930825238547603e-08, "loss": 0.0319, "step": 33424 }, { "epoch": 3.9635953990276294, "grad_norm": 0.5786984119457723, "learning_rate": 1.0859966211151928e-08, "loss": 0.0282, "step": 33425 }, { "epoch": 3.9637139807897546, "grad_norm": 0.46641932743313264, "learning_rate": 1.0789337552982726e-08, "loss": 0.0287, "step": 33426 }, { "epoch": 3.9638325625518798, "grad_norm": 0.6980987419607332, "learning_rate": 1.0718939264692251e-08, "loss": 0.0427, "step": 33427 }, { "epoch": 3.9639511443140045, "grad_norm": 0.40453593400742477, "learning_rate": 1.0648771346929986e-08, "loss": 0.0147, "step": 33428 }, { "epoch": 3.9640697260761293, "grad_norm": 0.7043887838562016, "learning_rate": 1.0578833800342636e-08, "loss": 0.037, "step": 33429 }, { "epoch": 3.9641883078382545, "grad_norm": 0.37685271666239895, "learning_rate": 1.0509126625574129e-08, "loss": 0.0173, "step": 33430 }, { "epoch": 3.9643068896003797, "grad_norm": 0.4218973852290514, "learning_rate": 1.0439649823268394e-08, "loss": 0.021, "step": 33431 }, { "epoch": 3.9644254713625044, "grad_norm": 0.5153876219981371, "learning_rate": 1.0370403394061034e-08, "loss": 0.02, "step": 33432 }, { "epoch": 3.964544053124629, "grad_norm": 0.4882484352307018, "learning_rate": 1.0301387338598755e-08, "loss": 0.0261, "step": 33433 }, { "epoch": 3.9646626348867544, "grad_norm": 0.5643977769486357, "learning_rate": 1.0232601657508834e-08, "loss": 0.0216, "step": 33434 }, { "epoch": 3.9647812166488796, "grad_norm": 0.48342338574336424, "learning_rate": 1.0164046351432422e-08, "loss": 0.0216, "step": 33435 }, { "epoch": 3.9648997984110044, "grad_norm": 0.549292873580421, "learning_rate": 1.0095721420999572e-08, "loss": 0.0249, "step": 33436 }, { "epoch": 3.965018380173129, "grad_norm": 0.306173806399259, "learning_rate": 1.0027626866837559e-08, "loss": 0.0124, "step": 33437 }, { "epoch": 3.9651369619352543, "grad_norm": 0.704949179219455, "learning_rate": 9.95976268957921e-09, "loss": 0.0235, "step": 33438 }, { "epoch": 3.9652555436973795, "grad_norm": 0.40024629719833515, "learning_rate": 9.892128889843478e-09, "loss": 0.0143, "step": 33439 }, { "epoch": 3.9653741254595043, "grad_norm": 0.6604830832718767, "learning_rate": 9.824725468260409e-09, "loss": 0.0335, "step": 33440 }, { "epoch": 3.965492707221629, "grad_norm": 0.3990751748615953, "learning_rate": 9.757552425446182e-09, "loss": 0.0248, "step": 33441 }, { "epoch": 3.9656112889837543, "grad_norm": 0.636553261562833, "learning_rate": 9.690609762022517e-09, "loss": 0.0328, "step": 33442 }, { "epoch": 3.9657298707458795, "grad_norm": 0.4409620024841515, "learning_rate": 9.623897478608368e-09, "loss": 0.0163, "step": 33443 }, { "epoch": 3.965848452508004, "grad_norm": 0.44156640097557337, "learning_rate": 9.557415575814355e-09, "loss": 0.0238, "step": 33444 }, { "epoch": 3.965967034270129, "grad_norm": 0.3493351787240521, "learning_rate": 9.491164054256652e-09, "loss": 0.0152, "step": 33445 }, { "epoch": 3.966085616032254, "grad_norm": 0.5181913835909334, "learning_rate": 9.425142914545881e-09, "loss": 0.0144, "step": 33446 }, { "epoch": 3.9662041977943794, "grad_norm": 0.41962597688742553, "learning_rate": 9.359352157287116e-09, "loss": 0.0173, "step": 33447 }, { "epoch": 3.966322779556504, "grad_norm": 0.8226836187917135, "learning_rate": 9.293791783090978e-09, "loss": 0.0382, "step": 33448 }, { "epoch": 3.9664413613186293, "grad_norm": 0.43117003801646736, "learning_rate": 9.228461792562538e-09, "loss": 0.0214, "step": 33449 }, { "epoch": 3.966559943080754, "grad_norm": 0.6466098878437938, "learning_rate": 9.163362186298541e-09, "loss": 0.0254, "step": 33450 }, { "epoch": 3.9666785248428793, "grad_norm": 0.5810504328998831, "learning_rate": 9.09849296490406e-09, "loss": 0.0148, "step": 33451 }, { "epoch": 3.966797106605004, "grad_norm": 0.4738296323707724, "learning_rate": 9.033854128975839e-09, "loss": 0.0274, "step": 33452 }, { "epoch": 3.9669156883671293, "grad_norm": 0.4813018982296376, "learning_rate": 8.969445679110621e-09, "loss": 0.0223, "step": 33453 }, { "epoch": 3.967034270129254, "grad_norm": 0.35439683495596225, "learning_rate": 8.905267615899604e-09, "loss": 0.0175, "step": 33454 }, { "epoch": 3.9671528518913792, "grad_norm": 0.5070083797320838, "learning_rate": 8.841319939936754e-09, "loss": 0.0242, "step": 33455 }, { "epoch": 3.967271433653504, "grad_norm": 0.6744508737587046, "learning_rate": 8.77760265181049e-09, "loss": 0.0222, "step": 33456 }, { "epoch": 3.967390015415629, "grad_norm": 0.39031230335063605, "learning_rate": 8.714115752106456e-09, "loss": 0.021, "step": 33457 }, { "epoch": 3.967508597177754, "grad_norm": 0.8119395527306987, "learning_rate": 8.650859241413068e-09, "loss": 0.0401, "step": 33458 }, { "epoch": 3.967627178939879, "grad_norm": 0.438014813769493, "learning_rate": 8.587833120313194e-09, "loss": 0.0206, "step": 33459 }, { "epoch": 3.967745760702004, "grad_norm": 0.480641670293489, "learning_rate": 8.525037389386926e-09, "loss": 0.0195, "step": 33460 }, { "epoch": 3.967864342464129, "grad_norm": 0.5067409468209351, "learning_rate": 8.462472049214354e-09, "loss": 0.0226, "step": 33461 }, { "epoch": 3.967982924226254, "grad_norm": 0.3122453300822548, "learning_rate": 8.400137100370021e-09, "loss": 0.0114, "step": 33462 }, { "epoch": 3.968101505988379, "grad_norm": 0.451518560398409, "learning_rate": 8.338032543428465e-09, "loss": 0.0223, "step": 33463 }, { "epoch": 3.968220087750504, "grad_norm": 0.3303959199202032, "learning_rate": 8.276158378964228e-09, "loss": 0.0181, "step": 33464 }, { "epoch": 3.968338669512629, "grad_norm": 0.6290543314369916, "learning_rate": 8.214514607546298e-09, "loss": 0.0257, "step": 33465 }, { "epoch": 3.968457251274754, "grad_norm": 0.6299644880090217, "learning_rate": 8.15310122974644e-09, "loss": 0.0398, "step": 33466 }, { "epoch": 3.968575833036879, "grad_norm": 0.8500162841630194, "learning_rate": 8.091918246125319e-09, "loss": 0.0449, "step": 33467 }, { "epoch": 3.9686944147990038, "grad_norm": 0.36006024140183307, "learning_rate": 8.030965657249145e-09, "loss": 0.0178, "step": 33468 }, { "epoch": 3.968812996561129, "grad_norm": 0.3977023888771763, "learning_rate": 7.970243463681359e-09, "loss": 0.021, "step": 33469 }, { "epoch": 3.9689315783232537, "grad_norm": 0.477085403659299, "learning_rate": 7.909751665982624e-09, "loss": 0.021, "step": 33470 }, { "epoch": 3.969050160085379, "grad_norm": 0.5173689934712813, "learning_rate": 7.849490264705272e-09, "loss": 0.0188, "step": 33471 }, { "epoch": 3.969168741847504, "grad_norm": 0.7489436484237628, "learning_rate": 7.78945926040997e-09, "loss": 0.0348, "step": 33472 }, { "epoch": 3.969287323609629, "grad_norm": 0.6738675046365007, "learning_rate": 7.729658653649053e-09, "loss": 0.0272, "step": 33473 }, { "epoch": 3.9694059053717536, "grad_norm": 0.6401821038170387, "learning_rate": 7.67008844497208e-09, "loss": 0.0368, "step": 33474 }, { "epoch": 3.969524487133879, "grad_norm": 0.4333466649139615, "learning_rate": 7.61074863492861e-09, "loss": 0.0213, "step": 33475 }, { "epoch": 3.969643068896004, "grad_norm": 0.42664843713408995, "learning_rate": 7.551639224068207e-09, "loss": 0.025, "step": 33476 }, { "epoch": 3.969761650658129, "grad_norm": 0.48769141959727486, "learning_rate": 7.492760212932105e-09, "loss": 0.0194, "step": 33477 }, { "epoch": 3.9698802324202536, "grad_norm": 0.47012410498377144, "learning_rate": 7.4341116020670844e-09, "loss": 0.0255, "step": 33478 }, { "epoch": 3.9699988141823788, "grad_norm": 0.8203046059069566, "learning_rate": 7.3756933920088315e-09, "loss": 0.0303, "step": 33479 }, { "epoch": 3.970117395944504, "grad_norm": 0.550640641505537, "learning_rate": 7.3175055833013536e-09, "loss": 0.023, "step": 33480 }, { "epoch": 3.9702359777066287, "grad_norm": 0.7135904698663438, "learning_rate": 7.259548176474784e-09, "loss": 0.0351, "step": 33481 }, { "epoch": 3.9703545594687535, "grad_norm": 0.4232754774447789, "learning_rate": 7.201821172070356e-09, "loss": 0.0129, "step": 33482 }, { "epoch": 3.9704731412308787, "grad_norm": 0.8924702996251427, "learning_rate": 7.144324570615424e-09, "loss": 0.0401, "step": 33483 }, { "epoch": 3.970591722993004, "grad_norm": 0.38432137252088955, "learning_rate": 7.087058372642896e-09, "loss": 0.0159, "step": 33484 }, { "epoch": 3.9707103047551287, "grad_norm": 0.7670766167633478, "learning_rate": 7.030022578680129e-09, "loss": 0.0421, "step": 33485 }, { "epoch": 3.9708288865172534, "grad_norm": 0.61270673826891, "learning_rate": 6.973217189248926e-09, "loss": 0.0296, "step": 33486 }, { "epoch": 3.9709474682793786, "grad_norm": 0.6288319718577854, "learning_rate": 6.91664220487942e-09, "loss": 0.025, "step": 33487 }, { "epoch": 3.971066050041504, "grad_norm": 0.7424078144057263, "learning_rate": 6.860297626087864e-09, "loss": 0.0286, "step": 33488 }, { "epoch": 3.9711846318036286, "grad_norm": 0.4108934654103966, "learning_rate": 6.804183453396063e-09, "loss": 0.0142, "step": 33489 }, { "epoch": 3.9713032135657533, "grad_norm": 0.7943667004846999, "learning_rate": 6.74829968732027e-09, "loss": 0.0444, "step": 33490 }, { "epoch": 3.9714217953278785, "grad_norm": 0.4321595980954415, "learning_rate": 6.692646328376739e-09, "loss": 0.0195, "step": 33491 }, { "epoch": 3.9715403770900037, "grad_norm": 0.3777444108262476, "learning_rate": 6.6372233770789495e-09, "loss": 0.016, "step": 33492 }, { "epoch": 3.9716589588521285, "grad_norm": 0.6141610936994182, "learning_rate": 6.582030833937602e-09, "loss": 0.0217, "step": 33493 }, { "epoch": 3.9717775406142533, "grad_norm": 0.862652650788516, "learning_rate": 6.527068699460626e-09, "loss": 0.0473, "step": 33494 }, { "epoch": 3.9718961223763785, "grad_norm": 0.38796290388118915, "learning_rate": 6.472336974155946e-09, "loss": 0.0213, "step": 33495 }, { "epoch": 3.9720147041385037, "grad_norm": 0.37864212188776447, "learning_rate": 6.417835658525939e-09, "loss": 0.0166, "step": 33496 }, { "epoch": 3.9721332859006284, "grad_norm": 0.5706579212970199, "learning_rate": 6.363564753072981e-09, "loss": 0.0321, "step": 33497 }, { "epoch": 3.9722518676627536, "grad_norm": 0.6325652909201075, "learning_rate": 6.309524258302224e-09, "loss": 0.0358, "step": 33498 }, { "epoch": 3.9723704494248784, "grad_norm": 0.43497038450153447, "learning_rate": 6.255714174704941e-09, "loss": 0.0207, "step": 33499 }, { "epoch": 3.9724890311870036, "grad_norm": 0.6315274597984656, "learning_rate": 6.202134502780732e-09, "loss": 0.0301, "step": 33500 }, { "epoch": 3.9726076129491283, "grad_norm": 0.38665273122915833, "learning_rate": 6.148785243026423e-09, "loss": 0.0212, "step": 33501 }, { "epoch": 3.9727261947112535, "grad_norm": 0.6453158691205148, "learning_rate": 6.095666395927735e-09, "loss": 0.0187, "step": 33502 }, { "epoch": 3.9728447764733783, "grad_norm": 0.4776977957228523, "learning_rate": 6.042777961978719e-09, "loss": 0.0195, "step": 33503 }, { "epoch": 3.9729633582355035, "grad_norm": 0.45024784708470167, "learning_rate": 5.990119941667871e-09, "loss": 0.0186, "step": 33504 }, { "epoch": 3.9730819399976283, "grad_norm": 0.5816145355326791, "learning_rate": 5.937692335475365e-09, "loss": 0.0259, "step": 33505 }, { "epoch": 3.9732005217597535, "grad_norm": 0.6041035367797645, "learning_rate": 5.885495143889696e-09, "loss": 0.0298, "step": 33506 }, { "epoch": 3.9733191035218782, "grad_norm": 0.6009960308419036, "learning_rate": 5.8335283673882634e-09, "loss": 0.0215, "step": 33507 }, { "epoch": 3.9734376852840034, "grad_norm": 0.3960207046562683, "learning_rate": 5.781792006451236e-09, "loss": 0.0173, "step": 33508 }, { "epoch": 3.973556267046128, "grad_norm": 0.46430158891759216, "learning_rate": 5.730286061558787e-09, "loss": 0.024, "step": 33509 }, { "epoch": 3.9736748488082534, "grad_norm": 0.7191236628255693, "learning_rate": 5.6790105331799845e-09, "loss": 0.032, "step": 33510 }, { "epoch": 3.973793430570378, "grad_norm": 0.6422602867247167, "learning_rate": 5.627965421792225e-09, "loss": 0.0276, "step": 33511 }, { "epoch": 3.9739120123325034, "grad_norm": 0.6455536644402708, "learning_rate": 5.5771507278645775e-09, "loss": 0.0152, "step": 33512 }, { "epoch": 3.974030594094628, "grad_norm": 0.47731731028171004, "learning_rate": 5.526566451866111e-09, "loss": 0.019, "step": 33513 }, { "epoch": 3.9741491758567533, "grad_norm": 0.4308115089134773, "learning_rate": 5.476212594263119e-09, "loss": 0.0162, "step": 33514 }, { "epoch": 3.974267757618878, "grad_norm": 0.6345167814546007, "learning_rate": 5.4260891555163455e-09, "loss": 0.0321, "step": 33515 }, { "epoch": 3.9743863393810033, "grad_norm": 0.4885062637700662, "learning_rate": 5.376196136092082e-09, "loss": 0.0168, "step": 33516 }, { "epoch": 3.974504921143128, "grad_norm": 0.5324106407532789, "learning_rate": 5.326533536448297e-09, "loss": 0.0231, "step": 33517 }, { "epoch": 3.9746235029052532, "grad_norm": 0.6167880811648864, "learning_rate": 5.2771013570457325e-09, "loss": 0.0325, "step": 33518 }, { "epoch": 3.974742084667378, "grad_norm": 0.503905220363612, "learning_rate": 5.227899598334029e-09, "loss": 0.0178, "step": 33519 }, { "epoch": 3.974860666429503, "grad_norm": 0.3824082195610679, "learning_rate": 5.17892826077393e-09, "loss": 0.0165, "step": 33520 }, { "epoch": 3.9749792481916284, "grad_norm": 0.6290994813798025, "learning_rate": 5.130187344812298e-09, "loss": 0.0304, "step": 33521 }, { "epoch": 3.975097829953753, "grad_norm": 0.5484815855733038, "learning_rate": 5.0816768508987756e-09, "loss": 0.0205, "step": 33522 }, { "epoch": 3.975216411715878, "grad_norm": 0.4899614693538434, "learning_rate": 5.033396779480226e-09, "loss": 0.0214, "step": 33523 }, { "epoch": 3.975334993478003, "grad_norm": 0.698078372517803, "learning_rate": 4.985347131006291e-09, "loss": 0.0372, "step": 33524 }, { "epoch": 3.9754535752401283, "grad_norm": 0.2919954622795099, "learning_rate": 4.937527905912731e-09, "loss": 0.0168, "step": 33525 }, { "epoch": 3.975572157002253, "grad_norm": 0.5608720131182903, "learning_rate": 4.889939104646412e-09, "loss": 0.0226, "step": 33526 }, { "epoch": 3.975690738764378, "grad_norm": 0.6189326068737782, "learning_rate": 4.8425807276458735e-09, "loss": 0.0239, "step": 33527 }, { "epoch": 3.975809320526503, "grad_norm": 0.49089097001703935, "learning_rate": 4.7954527753413246e-09, "loss": 0.023, "step": 33528 }, { "epoch": 3.9759279022886282, "grad_norm": 0.5607220910697843, "learning_rate": 4.74855524817408e-09, "loss": 0.0239, "step": 33529 }, { "epoch": 3.976046484050753, "grad_norm": 0.6250316029015703, "learning_rate": 4.701888146574351e-09, "loss": 0.0264, "step": 33530 }, { "epoch": 3.9761650658128778, "grad_norm": 0.5780898785553461, "learning_rate": 4.655451470969574e-09, "loss": 0.0268, "step": 33531 }, { "epoch": 3.976283647575003, "grad_norm": 0.297413857866776, "learning_rate": 4.6092452217927355e-09, "loss": 0.0122, "step": 33532 }, { "epoch": 3.976402229337128, "grad_norm": 0.8776370676449369, "learning_rate": 4.56326939946572e-09, "loss": 0.0319, "step": 33533 }, { "epoch": 3.976520811099253, "grad_norm": 0.3541486486575698, "learning_rate": 4.517524004415962e-09, "loss": 0.0143, "step": 33534 }, { "epoch": 3.9766393928613777, "grad_norm": 0.47232102026890993, "learning_rate": 4.472009037059799e-09, "loss": 0.0288, "step": 33535 }, { "epoch": 3.976757974623503, "grad_norm": 0.7223284535705691, "learning_rate": 4.426724497821888e-09, "loss": 0.0332, "step": 33536 }, { "epoch": 3.976876556385628, "grad_norm": 0.6398927238359544, "learning_rate": 4.381670387118564e-09, "loss": 0.0327, "step": 33537 }, { "epoch": 3.976995138147753, "grad_norm": 0.5492235088649329, "learning_rate": 4.33684670536616e-09, "loss": 0.0334, "step": 33538 }, { "epoch": 3.9771137199098776, "grad_norm": 0.5546684063012337, "learning_rate": 4.292253452972683e-09, "loss": 0.029, "step": 33539 }, { "epoch": 3.977232301672003, "grad_norm": 0.5598314597922052, "learning_rate": 4.247890630354467e-09, "loss": 0.016, "step": 33540 }, { "epoch": 3.977350883434128, "grad_norm": 0.5226542897935899, "learning_rate": 4.203758237919519e-09, "loss": 0.0222, "step": 33541 }, { "epoch": 3.9774694651962528, "grad_norm": 0.4521422840402, "learning_rate": 4.159856276073071e-09, "loss": 0.019, "step": 33542 }, { "epoch": 3.9775880469583775, "grad_norm": 0.4272944331619357, "learning_rate": 4.116184745223128e-09, "loss": 0.0148, "step": 33543 }, { "epoch": 3.9777066287205027, "grad_norm": 0.8953037194128486, "learning_rate": 4.072743645766597e-09, "loss": 0.0357, "step": 33544 }, { "epoch": 3.977825210482628, "grad_norm": 0.3219283539745039, "learning_rate": 4.029532978108708e-09, "loss": 0.0131, "step": 33545 }, { "epoch": 3.9779437922447527, "grad_norm": 0.4052013947506946, "learning_rate": 3.986552742646365e-09, "loss": 0.0158, "step": 33546 }, { "epoch": 3.978062374006878, "grad_norm": 0.48913984674193717, "learning_rate": 3.943802939776475e-09, "loss": 0.0157, "step": 33547 }, { "epoch": 3.9781809557690027, "grad_norm": 1.0835384269692885, "learning_rate": 3.901283569893166e-09, "loss": 0.0469, "step": 33548 }, { "epoch": 3.978299537531128, "grad_norm": 0.6256559962928251, "learning_rate": 3.858994633385016e-09, "loss": 0.0275, "step": 33549 }, { "epoch": 3.9784181192932526, "grad_norm": 0.21401220554262768, "learning_rate": 3.816936130646154e-09, "loss": 0.0072, "step": 33550 }, { "epoch": 3.978536701055378, "grad_norm": 0.4437383640647404, "learning_rate": 3.775108062065158e-09, "loss": 0.0175, "step": 33551 }, { "epoch": 3.9786552828175026, "grad_norm": 0.4889882938553762, "learning_rate": 3.73351042802228e-09, "loss": 0.0177, "step": 33552 }, { "epoch": 3.978773864579628, "grad_norm": 0.3288044462081121, "learning_rate": 3.692143228903322e-09, "loss": 0.0133, "step": 33553 }, { "epoch": 3.9788924463417525, "grad_norm": 0.39509663573677334, "learning_rate": 3.651006465094087e-09, "loss": 0.0172, "step": 33554 }, { "epoch": 3.9790110281038777, "grad_norm": 0.6891727835545239, "learning_rate": 3.6101001369664987e-09, "loss": 0.0323, "step": 33555 }, { "epoch": 3.9791296098660025, "grad_norm": 0.5270520846672859, "learning_rate": 3.569424244903585e-09, "loss": 0.0165, "step": 33556 }, { "epoch": 3.9792481916281277, "grad_norm": 0.4352838776821903, "learning_rate": 3.528978789277271e-09, "loss": 0.019, "step": 33557 }, { "epoch": 3.9793667733902525, "grad_norm": 0.44895081433370093, "learning_rate": 3.48876377045948e-09, "loss": 0.0207, "step": 33558 }, { "epoch": 3.9794853551523777, "grad_norm": 0.6622197918860289, "learning_rate": 3.448779188822138e-09, "loss": 0.0282, "step": 33559 }, { "epoch": 3.9796039369145024, "grad_norm": 0.5170604407490137, "learning_rate": 3.4090250447371687e-09, "loss": 0.0307, "step": 33560 }, { "epoch": 3.9797225186766276, "grad_norm": 0.3476849398124784, "learning_rate": 3.369501338568171e-09, "loss": 0.0114, "step": 33561 }, { "epoch": 3.9798411004387524, "grad_norm": 0.4950655189184398, "learning_rate": 3.3302080706759663e-09, "loss": 0.029, "step": 33562 }, { "epoch": 3.9799596822008776, "grad_norm": 0.5060411297041008, "learning_rate": 3.2911452414297052e-09, "loss": 0.0268, "step": 33563 }, { "epoch": 3.9800782639630023, "grad_norm": 0.4924114267459421, "learning_rate": 3.252312851184658e-09, "loss": 0.0196, "step": 33564 }, { "epoch": 3.9801968457251276, "grad_norm": 0.3647619323292598, "learning_rate": 3.213710900298872e-09, "loss": 0.018, "step": 33565 }, { "epoch": 3.9803154274872523, "grad_norm": 0.5005973391968552, "learning_rate": 3.1753393891331696e-09, "loss": 0.0156, "step": 33566 }, { "epoch": 3.9804340092493775, "grad_norm": 0.6908194805821141, "learning_rate": 3.1371983180344955e-09, "loss": 0.044, "step": 33567 }, { "epoch": 3.9805525910115023, "grad_norm": 0.42267651831988623, "learning_rate": 3.099287687358121e-09, "loss": 0.0185, "step": 33568 }, { "epoch": 3.9806711727736275, "grad_norm": 0.6917653617703248, "learning_rate": 3.0616074974537666e-09, "loss": 0.0425, "step": 33569 }, { "epoch": 3.9807897545357527, "grad_norm": 0.47287206111628477, "learning_rate": 3.024157748668377e-09, "loss": 0.0249, "step": 33570 }, { "epoch": 3.9809083362978774, "grad_norm": 0.6359028996605481, "learning_rate": 2.986938441346121e-09, "loss": 0.0259, "step": 33571 }, { "epoch": 3.981026918060002, "grad_norm": 0.5874908975049339, "learning_rate": 2.9499495758339436e-09, "loss": 0.0275, "step": 33572 }, { "epoch": 3.9811454998221274, "grad_norm": 0.5312154629453262, "learning_rate": 2.9131911524649113e-09, "loss": 0.0199, "step": 33573 }, { "epoch": 3.9812640815842526, "grad_norm": 0.7195741645663762, "learning_rate": 2.8766631715859692e-09, "loss": 0.0307, "step": 33574 }, { "epoch": 3.9813826633463774, "grad_norm": 0.6903524270981802, "learning_rate": 2.8403656335301843e-09, "loss": 0.0331, "step": 33575 }, { "epoch": 3.981501245108502, "grad_norm": 0.6260028250010877, "learning_rate": 2.804298538633399e-09, "loss": 0.0441, "step": 33576 }, { "epoch": 3.9816198268706273, "grad_norm": 0.5577637996397548, "learning_rate": 2.768461887225904e-09, "loss": 0.0298, "step": 33577 }, { "epoch": 3.9817384086327525, "grad_norm": 0.5934141113236776, "learning_rate": 2.732855679640767e-09, "loss": 0.0147, "step": 33578 }, { "epoch": 3.9818569903948773, "grad_norm": 0.3295794638607694, "learning_rate": 2.6974799162027274e-09, "loss": 0.0166, "step": 33579 }, { "epoch": 3.981975572157002, "grad_norm": 0.6702120226997245, "learning_rate": 2.6623345972420777e-09, "loss": 0.0205, "step": 33580 }, { "epoch": 3.9820941539191272, "grad_norm": 0.6181039262004118, "learning_rate": 2.6274197230807817e-09, "loss": 0.024, "step": 33581 }, { "epoch": 3.9822127356812524, "grad_norm": 0.33609809030941484, "learning_rate": 2.5927352940408044e-09, "loss": 0.0188, "step": 33582 }, { "epoch": 3.982331317443377, "grad_norm": 0.35004675308028804, "learning_rate": 2.5582813104441106e-09, "loss": 0.0158, "step": 33583 }, { "epoch": 3.982449899205502, "grad_norm": 0.5200446293059806, "learning_rate": 2.5240577726015623e-09, "loss": 0.0226, "step": 33584 }, { "epoch": 3.982568480967627, "grad_norm": 0.6375770020836394, "learning_rate": 2.4900646808379e-09, "loss": 0.0281, "step": 33585 }, { "epoch": 3.9826870627297524, "grad_norm": 0.6545281818846889, "learning_rate": 2.4563020354584354e-09, "loss": 0.0273, "step": 33586 }, { "epoch": 3.982805644491877, "grad_norm": 0.6045810231284627, "learning_rate": 2.4227698367795816e-09, "loss": 0.0167, "step": 33587 }, { "epoch": 3.982924226254002, "grad_norm": 0.33337380086352114, "learning_rate": 2.3894680851094253e-09, "loss": 0.0129, "step": 33588 }, { "epoch": 3.983042808016127, "grad_norm": 0.4839483059221931, "learning_rate": 2.356396780753278e-09, "loss": 0.0207, "step": 33589 }, { "epoch": 3.9831613897782523, "grad_norm": 0.3518366377939293, "learning_rate": 2.3235559240164518e-09, "loss": 0.0187, "step": 33590 }, { "epoch": 3.983279971540377, "grad_norm": 0.6627186557306701, "learning_rate": 2.2909455152014816e-09, "loss": 0.0348, "step": 33591 }, { "epoch": 3.983398553302502, "grad_norm": 0.43597019598432496, "learning_rate": 2.258565554610903e-09, "loss": 0.0157, "step": 33592 }, { "epoch": 3.983517135064627, "grad_norm": 0.46387520875273175, "learning_rate": 2.2264160425417014e-09, "loss": 0.0215, "step": 33593 }, { "epoch": 3.983635716826752, "grad_norm": 0.32374978376330643, "learning_rate": 2.194496979290861e-09, "loss": 0.0147, "step": 33594 }, { "epoch": 3.983754298588877, "grad_norm": 0.4580560620995817, "learning_rate": 2.162808365152591e-09, "loss": 0.0178, "step": 33595 }, { "epoch": 3.983872880351002, "grad_norm": 0.5728562407092934, "learning_rate": 2.1313502004211004e-09, "loss": 0.0344, "step": 33596 }, { "epoch": 3.983991462113127, "grad_norm": 0.44290450154769523, "learning_rate": 2.100122485382272e-09, "loss": 0.0098, "step": 33597 }, { "epoch": 3.984110043875252, "grad_norm": 0.4756775893387092, "learning_rate": 2.0691252203247636e-09, "loss": 0.0241, "step": 33598 }, { "epoch": 3.984228625637377, "grad_norm": 0.31084128447007014, "learning_rate": 2.038358405537233e-09, "loss": 0.013, "step": 33599 }, { "epoch": 3.984347207399502, "grad_norm": 0.5233105380025254, "learning_rate": 2.0078220413000113e-09, "loss": 0.0242, "step": 33600 }, { "epoch": 3.984465789161627, "grad_norm": 0.6598965392024491, "learning_rate": 1.9775161278962063e-09, "loss": 0.0239, "step": 33601 }, { "epoch": 3.984584370923752, "grad_norm": 0.4526226754095667, "learning_rate": 1.9474406656061485e-09, "loss": 0.0207, "step": 33602 }, { "epoch": 3.984702952685877, "grad_norm": 0.565072330449816, "learning_rate": 1.917595654704618e-09, "loss": 0.0268, "step": 33603 }, { "epoch": 3.984821534448002, "grad_norm": 0.3016815861020026, "learning_rate": 1.8879810954691714e-09, "loss": 0.0129, "step": 33604 }, { "epoch": 3.984940116210127, "grad_norm": 0.5452603531605277, "learning_rate": 1.8585969881718124e-09, "loss": 0.0257, "step": 33605 }, { "epoch": 3.985058697972252, "grad_norm": 0.7591945727810463, "learning_rate": 1.8294433330845462e-09, "loss": 0.0309, "step": 33606 }, { "epoch": 3.9851772797343767, "grad_norm": 0.4452983913755919, "learning_rate": 1.800520130473826e-09, "loss": 0.0147, "step": 33607 }, { "epoch": 3.985295861496502, "grad_norm": 0.3287115383582657, "learning_rate": 1.7718273806061058e-09, "loss": 0.0153, "step": 33608 }, { "epoch": 3.9854144432586267, "grad_norm": 0.46751824992008906, "learning_rate": 1.7433650837478389e-09, "loss": 0.0241, "step": 33609 }, { "epoch": 3.985533025020752, "grad_norm": 0.40866564282025025, "learning_rate": 1.715133240162703e-09, "loss": 0.0151, "step": 33610 }, { "epoch": 3.9856516067828767, "grad_norm": 0.5052760960742709, "learning_rate": 1.687131850108825e-09, "loss": 0.0171, "step": 33611 }, { "epoch": 3.985770188545002, "grad_norm": 0.6417656309723662, "learning_rate": 1.6593609138415567e-09, "loss": 0.0292, "step": 33612 }, { "epoch": 3.9858887703071266, "grad_norm": 0.6367968319451515, "learning_rate": 1.6318204316245756e-09, "loss": 0.0268, "step": 33613 }, { "epoch": 3.986007352069252, "grad_norm": 0.4157680008014895, "learning_rate": 1.6045104037049064e-09, "loss": 0.0144, "step": 33614 }, { "epoch": 3.9861259338313766, "grad_norm": 0.4579044353303659, "learning_rate": 1.5774308303351248e-09, "loss": 0.0223, "step": 33615 }, { "epoch": 3.986244515593502, "grad_norm": 0.5137368517214654, "learning_rate": 1.5505817117678068e-09, "loss": 0.0255, "step": 33616 }, { "epoch": 3.9863630973556266, "grad_norm": 0.8412525988533851, "learning_rate": 1.5239630482499767e-09, "loss": 0.0331, "step": 33617 }, { "epoch": 3.9864816791177518, "grad_norm": 0.9947469518663493, "learning_rate": 1.4975748400258838e-09, "loss": 0.0361, "step": 33618 }, { "epoch": 3.9866002608798765, "grad_norm": 0.5169840031998991, "learning_rate": 1.4714170873370016e-09, "loss": 0.0223, "step": 33619 }, { "epoch": 3.9867188426420017, "grad_norm": 0.8460715577408767, "learning_rate": 1.4454897904275788e-09, "loss": 0.05, "step": 33620 }, { "epoch": 3.9868374244041265, "grad_norm": 0.7896949309309669, "learning_rate": 1.4197929495363138e-09, "loss": 0.0268, "step": 33621 }, { "epoch": 3.9869560061662517, "grad_norm": 0.4186136021737811, "learning_rate": 1.3943265648991288e-09, "loss": 0.0234, "step": 33622 }, { "epoch": 3.987074587928377, "grad_norm": 0.5775343123184404, "learning_rate": 1.3690906367491707e-09, "loss": 0.0242, "step": 33623 }, { "epoch": 3.9871931696905016, "grad_norm": 0.5487106579676693, "learning_rate": 1.344085165322362e-09, "loss": 0.0288, "step": 33624 }, { "epoch": 3.9873117514526264, "grad_norm": 0.5498563462698809, "learning_rate": 1.319310150846298e-09, "loss": 0.0226, "step": 33625 }, { "epoch": 3.9874303332147516, "grad_norm": 0.5229337959761348, "learning_rate": 1.2947655935513503e-09, "loss": 0.0294, "step": 33626 }, { "epoch": 3.987548914976877, "grad_norm": 0.531239254144231, "learning_rate": 1.2704514936651146e-09, "loss": 0.0248, "step": 33627 }, { "epoch": 3.9876674967390016, "grad_norm": 0.527544705445687, "learning_rate": 1.24636785140686e-09, "loss": 0.0184, "step": 33628 }, { "epoch": 3.9877860785011263, "grad_norm": 0.5113854545253814, "learning_rate": 1.222514667004182e-09, "loss": 0.0205, "step": 33629 }, { "epoch": 3.9879046602632515, "grad_norm": 0.3792287381961478, "learning_rate": 1.1988919406735744e-09, "loss": 0.0173, "step": 33630 }, { "epoch": 3.9880232420253767, "grad_norm": 0.6965342148912004, "learning_rate": 1.1754996726343059e-09, "loss": 0.0319, "step": 33631 }, { "epoch": 3.9881418237875015, "grad_norm": 0.47333366194580706, "learning_rate": 1.1523378631000946e-09, "loss": 0.0256, "step": 33632 }, { "epoch": 3.9882604055496262, "grad_norm": 0.5286099910429098, "learning_rate": 1.1294065122846587e-09, "loss": 0.0263, "step": 33633 }, { "epoch": 3.9883789873117514, "grad_norm": 0.5331980659173664, "learning_rate": 1.1067056204017156e-09, "loss": 0.0212, "step": 33634 }, { "epoch": 3.9884975690738766, "grad_norm": 0.7258285959128449, "learning_rate": 1.0842351876594325e-09, "loss": 0.0391, "step": 33635 }, { "epoch": 3.9886161508360014, "grad_norm": 0.5409661246808224, "learning_rate": 1.0619952142659762e-09, "loss": 0.0208, "step": 33636 }, { "epoch": 3.988734732598126, "grad_norm": 0.3002689352670013, "learning_rate": 1.0399857004239622e-09, "loss": 0.0181, "step": 33637 }, { "epoch": 3.9888533143602514, "grad_norm": 0.3966165143311295, "learning_rate": 1.0182066463360063e-09, "loss": 0.0224, "step": 33638 }, { "epoch": 3.9889718961223766, "grad_norm": 0.3316309890639357, "learning_rate": 9.966580522074997e-10, "loss": 0.0171, "step": 33639 }, { "epoch": 3.9890904778845013, "grad_norm": 0.5212634991875398, "learning_rate": 9.753399182327316e-10, "loss": 0.0264, "step": 33640 }, { "epoch": 3.989209059646626, "grad_norm": 0.6159713288546432, "learning_rate": 9.542522446087666e-10, "loss": 0.0306, "step": 33641 }, { "epoch": 3.9893276414087513, "grad_norm": 0.4763141011965461, "learning_rate": 9.33395031532669e-10, "loss": 0.0212, "step": 33642 }, { "epoch": 3.9894462231708765, "grad_norm": 0.42080754682203575, "learning_rate": 9.127682791931769e-10, "loss": 0.0202, "step": 33643 }, { "epoch": 3.9895648049330013, "grad_norm": 0.5330000857971132, "learning_rate": 8.923719877818038e-10, "loss": 0.0297, "step": 33644 }, { "epoch": 3.9896833866951265, "grad_norm": 0.7016440956815122, "learning_rate": 8.722061574872875e-10, "loss": 0.0268, "step": 33645 }, { "epoch": 3.989801968457251, "grad_norm": 0.4319781576291787, "learning_rate": 8.522707884955905e-10, "loss": 0.0167, "step": 33646 }, { "epoch": 3.9899205502193764, "grad_norm": 0.45886129118860536, "learning_rate": 8.325658809871239e-10, "loss": 0.022, "step": 33647 }, { "epoch": 3.990039131981501, "grad_norm": 0.6130258972948387, "learning_rate": 8.130914351506258e-10, "loss": 0.0327, "step": 33648 }, { "epoch": 3.9901577137436264, "grad_norm": 0.6642057813490203, "learning_rate": 7.938474511581806e-10, "loss": 0.0301, "step": 33649 }, { "epoch": 3.990276295505751, "grad_norm": 0.5632778825455299, "learning_rate": 7.748339291929751e-10, "loss": 0.0176, "step": 33650 }, { "epoch": 3.9903948772678763, "grad_norm": 0.3173727431114688, "learning_rate": 7.560508694243184e-10, "loss": 0.0145, "step": 33651 }, { "epoch": 3.990513459030001, "grad_norm": 0.4741272719130685, "learning_rate": 7.374982720326218e-10, "loss": 0.0263, "step": 33652 }, { "epoch": 3.9906320407921263, "grad_norm": 0.5898058295732699, "learning_rate": 7.19176137181643e-10, "loss": 0.0241, "step": 33653 }, { "epoch": 3.990750622554251, "grad_norm": 0.5921164658781082, "learning_rate": 7.010844650462422e-10, "loss": 0.024, "step": 33654 }, { "epoch": 3.9908692043163763, "grad_norm": 0.47323120946602737, "learning_rate": 6.832232557901775e-10, "loss": 0.0229, "step": 33655 }, { "epoch": 3.990987786078501, "grad_norm": 0.5588687033202039, "learning_rate": 6.655925095772065e-10, "loss": 0.0218, "step": 33656 }, { "epoch": 3.9911063678406262, "grad_norm": 0.5292311796999236, "learning_rate": 6.481922265738627e-10, "loss": 0.0229, "step": 33657 }, { "epoch": 3.991224949602751, "grad_norm": 0.3239922876370491, "learning_rate": 6.310224069355774e-10, "loss": 0.0128, "step": 33658 }, { "epoch": 3.991343531364876, "grad_norm": 0.26040638973292135, "learning_rate": 6.14083050823333e-10, "loss": 0.0087, "step": 33659 }, { "epoch": 3.991462113127001, "grad_norm": 0.4032213186930263, "learning_rate": 5.973741583953363e-10, "loss": 0.0204, "step": 33660 }, { "epoch": 3.991580694889126, "grad_norm": 0.43479049403119147, "learning_rate": 5.808957298014672e-10, "loss": 0.0212, "step": 33661 }, { "epoch": 3.991699276651251, "grad_norm": 0.5175378863483658, "learning_rate": 5.646477651943815e-10, "loss": 0.0208, "step": 33662 }, { "epoch": 3.991817858413376, "grad_norm": 0.591747172910268, "learning_rate": 5.486302647267349e-10, "loss": 0.0253, "step": 33663 }, { "epoch": 3.991936440175501, "grad_norm": 0.6020362868040511, "learning_rate": 5.328432285428564e-10, "loss": 0.0309, "step": 33664 }, { "epoch": 3.992055021937626, "grad_norm": 0.6471834547628356, "learning_rate": 5.172866567898505e-10, "loss": 0.0298, "step": 33665 }, { "epoch": 3.992173603699751, "grad_norm": 0.3685518768971127, "learning_rate": 5.01960549612046e-10, "loss": 0.0112, "step": 33666 }, { "epoch": 3.992292185461876, "grad_norm": 0.6830127206172448, "learning_rate": 4.868649071509968e-10, "loss": 0.028, "step": 33667 }, { "epoch": 3.992410767224001, "grad_norm": 0.3838299055873761, "learning_rate": 4.719997295427047e-10, "loss": 0.0167, "step": 33668 }, { "epoch": 3.992529348986126, "grad_norm": 0.4309503917131873, "learning_rate": 4.57365016925948e-10, "loss": 0.0141, "step": 33669 }, { "epoch": 3.9926479307482508, "grad_norm": 0.35569858578517843, "learning_rate": 4.4296076943672883e-10, "loss": 0.0161, "step": 33670 }, { "epoch": 3.992766512510376, "grad_norm": 0.975119559184954, "learning_rate": 4.2878698720549837e-10, "loss": 0.0445, "step": 33671 }, { "epoch": 3.992885094272501, "grad_norm": 0.5200923440159667, "learning_rate": 4.1484367036548344e-10, "loss": 0.0232, "step": 33672 }, { "epoch": 3.993003676034626, "grad_norm": 0.7408761880840793, "learning_rate": 4.0113081904435966e-10, "loss": 0.0331, "step": 33673 }, { "epoch": 3.9931222577967507, "grad_norm": 0.6745318201928373, "learning_rate": 3.8764843336702714e-10, "loss": 0.023, "step": 33674 }, { "epoch": 3.993240839558876, "grad_norm": 0.6482523857271871, "learning_rate": 3.7439651346116153e-10, "loss": 0.021, "step": 33675 }, { "epoch": 3.993359421321001, "grad_norm": 0.6945647982862011, "learning_rate": 3.613750594461118e-10, "loss": 0.0241, "step": 33676 }, { "epoch": 3.993478003083126, "grad_norm": 0.3678891512442868, "learning_rate": 3.4858407144122696e-10, "loss": 0.0141, "step": 33677 }, { "epoch": 3.9935965848452506, "grad_norm": 0.4162566622982007, "learning_rate": 3.360235495658559e-10, "loss": 0.0183, "step": 33678 }, { "epoch": 3.993715166607376, "grad_norm": 0.3048330144341975, "learning_rate": 3.236934939365721e-10, "loss": 0.0132, "step": 33679 }, { "epoch": 3.993833748369501, "grad_norm": 0.5878183041208995, "learning_rate": 3.115939046671734e-10, "loss": 0.027, "step": 33680 }, { "epoch": 3.9939523301316258, "grad_norm": 0.6453979075761415, "learning_rate": 2.997247818659066e-10, "loss": 0.0285, "step": 33681 }, { "epoch": 3.9940709118937505, "grad_norm": 0.3859614514564942, "learning_rate": 2.8808612564656944e-10, "loss": 0.0178, "step": 33682 }, { "epoch": 3.9941894936558757, "grad_norm": 0.41000320405515134, "learning_rate": 2.7667793611463324e-10, "loss": 0.0172, "step": 33683 }, { "epoch": 3.994308075418001, "grad_norm": 0.38458064668626896, "learning_rate": 2.655002133727935e-10, "loss": 0.0174, "step": 33684 }, { "epoch": 3.9944266571801257, "grad_norm": 0.27637193927271314, "learning_rate": 2.545529575292971e-10, "loss": 0.0121, "step": 33685 }, { "epoch": 3.9945452389422504, "grad_norm": 0.6171368202108135, "learning_rate": 2.438361686785129e-10, "loss": 0.0276, "step": 33686 }, { "epoch": 3.9946638207043756, "grad_norm": 0.38349654586616105, "learning_rate": 2.333498469259121e-10, "loss": 0.0185, "step": 33687 }, { "epoch": 3.994782402466501, "grad_norm": 0.41099419894213385, "learning_rate": 2.2309399236308814e-10, "loss": 0.0215, "step": 33688 }, { "epoch": 3.9949009842286256, "grad_norm": 0.6479957672159053, "learning_rate": 2.1306860508718552e-10, "loss": 0.0295, "step": 33689 }, { "epoch": 3.9950195659907504, "grad_norm": 0.717555715372708, "learning_rate": 2.0327368519257317e-10, "loss": 0.0254, "step": 33690 }, { "epoch": 3.9951381477528756, "grad_norm": 0.4121756213290982, "learning_rate": 1.9370923276251785e-10, "loss": 0.0181, "step": 33691 }, { "epoch": 3.9952567295150008, "grad_norm": 0.8941225572277204, "learning_rate": 1.8437524789138848e-10, "loss": 0.0375, "step": 33692 }, { "epoch": 3.9953753112771255, "grad_norm": 0.8571961534966666, "learning_rate": 1.752717306652274e-10, "loss": 0.038, "step": 33693 }, { "epoch": 3.9954938930392503, "grad_norm": 0.6243121199047095, "learning_rate": 1.6639868116452573e-10, "loss": 0.0221, "step": 33694 }, { "epoch": 3.9956124748013755, "grad_norm": 0.5868070800543955, "learning_rate": 1.577560994725502e-10, "loss": 0.0233, "step": 33695 }, { "epoch": 3.9957310565635007, "grad_norm": 0.38239393635625263, "learning_rate": 1.4934398567256757e-10, "loss": 0.0122, "step": 33696 }, { "epoch": 3.9958496383256255, "grad_norm": 0.45868983181876694, "learning_rate": 1.4116233983396677e-10, "loss": 0.0188, "step": 33697 }, { "epoch": 3.9959682200877507, "grad_norm": 0.529481903235593, "learning_rate": 1.3321116204001448e-10, "loss": 0.0195, "step": 33698 }, { "epoch": 3.9960868018498754, "grad_norm": 0.4107249088425968, "learning_rate": 1.2549045236009972e-10, "loss": 0.0234, "step": 33699 }, { "epoch": 3.9962053836120006, "grad_norm": 0.3868706580755921, "learning_rate": 1.1800021086638692e-10, "loss": 0.0242, "step": 33700 }, { "epoch": 3.9963239653741254, "grad_norm": 0.4070829714326567, "learning_rate": 1.1074043762826502e-10, "loss": 0.0169, "step": 33701 }, { "epoch": 3.9964425471362506, "grad_norm": 0.7392751042575342, "learning_rate": 1.0371113271234745e-10, "loss": 0.0307, "step": 33702 }, { "epoch": 3.9965611288983753, "grad_norm": 0.9481558660513774, "learning_rate": 9.6912296182472e-11, "loss": 0.0404, "step": 33703 }, { "epoch": 3.9966797106605005, "grad_norm": 0.4968466548184237, "learning_rate": 9.03439281024765e-11, "loss": 0.0207, "step": 33704 }, { "epoch": 3.9967982924226253, "grad_norm": 0.5012704826708081, "learning_rate": 8.400602853064766e-11, "loss": 0.0237, "step": 33705 }, { "epoch": 3.9969168741847505, "grad_norm": 0.42001221180105486, "learning_rate": 7.789859753082329e-11, "loss": 0.0209, "step": 33706 }, { "epoch": 3.9970354559468753, "grad_norm": 0.3963288993433118, "learning_rate": 7.202163515296345e-11, "loss": 0.0257, "step": 33707 }, { "epoch": 3.9971540377090005, "grad_norm": 0.5723230209586793, "learning_rate": 6.637514145535484e-11, "loss": 0.0235, "step": 33708 }, { "epoch": 3.997272619471125, "grad_norm": 0.4173565135640968, "learning_rate": 6.095911649073305e-11, "loss": 0.0172, "step": 33709 }, { "epoch": 3.9973912012332504, "grad_norm": 0.5230037641081093, "learning_rate": 5.5773560303507e-11, "loss": 0.0245, "step": 33710 }, { "epoch": 3.997509782995375, "grad_norm": 0.7697285386886425, "learning_rate": 5.081847294918785e-11, "loss": 0.0333, "step": 33711 }, { "epoch": 3.9976283647575004, "grad_norm": 0.5488396845705643, "learning_rate": 4.6093854466633393e-11, "loss": 0.0277, "step": 33712 }, { "epoch": 3.997746946519625, "grad_norm": 0.7904149592883477, "learning_rate": 4.159970490302811e-11, "loss": 0.0286, "step": 33713 }, { "epoch": 3.9978655282817503, "grad_norm": 0.693615208210422, "learning_rate": 3.7336024297229823e-11, "loss": 0.0346, "step": 33714 }, { "epoch": 3.997984110043875, "grad_norm": 0.5985554558319839, "learning_rate": 3.330281269087188e-11, "loss": 0.0283, "step": 33715 }, { "epoch": 3.9981026918060003, "grad_norm": 0.391757914639111, "learning_rate": 2.9500070122812084e-11, "loss": 0.0166, "step": 33716 }, { "epoch": 3.998221273568125, "grad_norm": 0.6648502242299664, "learning_rate": 2.592779662358158e-11, "loss": 0.0282, "step": 33717 }, { "epoch": 3.9983398553302503, "grad_norm": 0.6715748802245397, "learning_rate": 2.258599222926261e-11, "loss": 0.0262, "step": 33718 }, { "epoch": 3.998458437092375, "grad_norm": 0.5747163916174693, "learning_rate": 1.947465696761075e-11, "loss": 0.0257, "step": 33719 }, { "epoch": 3.9985770188545002, "grad_norm": 0.8131396677848285, "learning_rate": 1.659379087193269e-11, "loss": 0.0495, "step": 33720 }, { "epoch": 3.9986956006166254, "grad_norm": 0.5331465683482607, "learning_rate": 1.3943393964432893e-11, "loss": 0.0226, "step": 33721 }, { "epoch": 3.99881418237875, "grad_norm": 0.720182761970104, "learning_rate": 1.1523466272866933e-11, "loss": 0.0294, "step": 33722 }, { "epoch": 3.998932764140875, "grad_norm": 0.4756599105814044, "learning_rate": 9.334007816663715e-12, "loss": 0.0252, "step": 33723 }, { "epoch": 3.999051345903, "grad_norm": 0.41386766727788715, "learning_rate": 7.375018618027695e-12, "loss": 0.0227, "step": 33724 }, { "epoch": 3.9991699276651254, "grad_norm": 0.4749884881976774, "learning_rate": 5.646498693612223e-12, "loss": 0.0198, "step": 33725 }, { "epoch": 3.99928850942725, "grad_norm": 0.4836314140155837, "learning_rate": 4.1484480628461996e-12, "loss": 0.0278, "step": 33726 }, { "epoch": 3.999407091189375, "grad_norm": 0.8755373724610497, "learning_rate": 2.880866734056298e-12, "loss": 0.0362, "step": 33727 }, { "epoch": 3.9995256729515, "grad_norm": 0.6573198795969022, "learning_rate": 1.843754723895863e-12, "loss": 0.0369, "step": 33728 }, { "epoch": 3.9996442547136253, "grad_norm": 0.3458545455223809, "learning_rate": 1.0371120379160104e-12, "loss": 0.0123, "step": 33729 }, { "epoch": 3.99976283647575, "grad_norm": 0.2844134928352069, "learning_rate": 4.609386844434127e-13, "loss": 0.0112, "step": 33730 }, { "epoch": 3.999881418237875, "grad_norm": 0.4253005662856694, "learning_rate": 1.1523467180474256e-13, "loss": 0.0191, "step": 33731 }, { "epoch": 4.0, "grad_norm": 1.141259252134078, "learning_rate": 0.0, "loss": 0.0277, "step": 33732 }, { "epoch": 4.0, "step": 33732, "total_flos": 4540393750798336.0, "train_loss": 0.26791081111871257, "train_runtime": 255040.4618, "train_samples_per_second": 16.929, "train_steps_per_second": 0.132 } ], "logging_steps": 1.0, "max_steps": 33732, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4540393750798336.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }